Update README.md #123

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open

psyolent wants to merge 5 commits into finopsfoundation:dev from psyolent:patch-1

.bumpversion.cfg

-Original file line number
+Diff line change
@@ -1,5 +1,5 @@
     [bumpversion]
-    current_version = 0.5.0
+    current_version = 1.0.0
     commit = True
     tag = True
     parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>(rc|dev))(?P<build>\d+))?
@@ Expand All / @@ -18,3 +18,5 @@ values = @@
     first_value = 1
     [bumpversion:file:pyproject.toml]
+    search = version = "{current_version}"
+    replace = version = "{new_version}"

.github/workflows/coverage.yaml

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -1,5 +1,4 @@
  
    name: Coverage

    on:

      push:

        branches:

    @@ -9,20 +8,23 @@ on:
  
      pull_request:

        branches:

          - main

          - dev

    jobs:

      coverage:

        runs-on: ubuntu-latest

        steps:

          - uses: actions/checkout@v3

          - name: Set up Python ${{ matrix.python-version }}

          - name: Set up Python 3.9

            uses: actions/setup-python@v4

            with:

              python-version: 3.9

          - name: Install poetry

            uses: abatilo/actions-poetry@v2

          - name: Setup a local virtual environment

            run: |

              poetry config virtualenvs.create true --local

              poetry config virtualenvs.in-project true --local

              poetry lock

          - uses: actions/cache@v3

            name: Define a cache for the virtual environment based on the dependencies lock file

            with:

.github/workflows/lint.yaml

-Original file line number
+Diff line change
@@ -1,21 +1,26 @@
     name: Lint
     on:
       push:
+      pull_request:
+        branches:
+          - main
+          - dev
     jobs:
       lint:
         runs-on: ubuntu-latest
         steps:
           - uses: actions/checkout@v3
-          - name: Set up Python ${{ matrix.python-version }}
+          - name: Set up Python 3.9
             uses: actions/setup-python@v4
+            with:
+              python-version: 3.9
           - name: Install poetry
             uses: abatilo/actions-poetry@v2
           - name: Setup a local virtual environment
             run: |
               poetry config virtualenvs.create true --local
               poetry config virtualenvs.in-project true --local
+              poetry lock
           - uses: actions/cache@v3
             name: Define a cache for the virtual environment based on the dependencies lock file
             with:
@@ Expand Down @@

.github/workflows/main.yaml

-Original file line number
+Diff line change
@@ Expand Up / @@ -5,7 +5,6 @@ on: @@
           - main
           - dev
           - issue/**
     jobs:
       validate_focus:
         runs-on: ubuntu-latest
@@ Expand All / @@ -14,8 +13,10 @@ jobs: @@
         steps:
           - name: Check out repository code
             uses: actions/checkout@v3
-          - name: Set up Python ${{ matrix.python-version }}
+          - name: Set up Python 3.9
             uses: actions/setup-python@v4
+            with:
+              python-version: 3.9
           - name: Install poetry
             uses: abatilo/actions-poetry@v2
           - name: Setup a local virtual environment
@@ Expand Down @@

.github/workflows/publish.yaml

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -4,22 +4,22 @@ on:
  
        tags:

          - 'v\d\.\d\.\d'

          - 'v\d\.\d\.\d-(dev|rc)\d'

    jobs:

      publish:

        permissions:

          id-token: write

        runs-on: ubuntu-latest

        steps:

          - uses: actions/checkout@v3

          - name: Set up Python 3.8

          - name: Set up Python 3.11

            uses: actions/setup-python@v4

            with:

              python-version: 3.8

              python-version: 3.11

          - name: Install poetry

            uses: abatilo/actions-poetry@v2

          - name: Install dependencies

            run: |

              poetry build

              find -type l -exec bash -c 'ln -f "$(readlink -m "$0")" "$0"' {} \;

              poetry build --format=sdist

          - name: Publish package distributions to PyPI

            uses: pypa/gh-action-pypi-publish@release/v1

.github/workflows/unittest.yaml

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -1,5 +1,4 @@
  
    name: Unittest

    on:

      push:

        branches:

    @@ -9,15 +8,14 @@ on:
  
      pull_request:

        branches:

          - main

          - dev

    jobs:

      test:

        runs-on: ubuntu-latest

        runs-on: ${{ matrix.os }}

        strategy:

          matrix:

            python-version: [ "3.8", "3.9", "3.10", "3.11" ]

            python-version: [ "3.9", "3.10", "3.11", "3.12" ]

            os: [ windows-latest, ubuntu-latest, macos-latest ]

        steps:

          - uses: actions/checkout@v3

          - name: Set up Python ${{ matrix.python-version }}

    @@ -35,7 +33,7 @@ jobs:
  
            name: Define a cache for the virtual environment based on the dependencies lock file

            with:

              path: ./.venv

              key: venv-${{ hashFiles('poetry.lock') }}

              key: venv-${{ hashFiles('poetry.lock') }}-${{ matrix.os }}-${{ matrix.python-version }}

          - name: Install dependencies

            run: |

              poetry install

README.md

-Original file line number
+Diff line change
@@ Expand Up / @@ -10,7 +10,7 @@ tbd @@
     ### Prerequisites
-    - Python 3.8+
+    - Python 3.9+
     - Poetry (Package & Dependency Manager)
     ### Installation
@@ Expand Down Expand Up @@
     ```bash
     poetry install
     ```
+    #### 4. Downgrade Multimethod
+    By default Multimethod 2.0 is installed, focus_validator works with 1.9.1
+    ```bash
+    poetry add "multimethod@1.9.1"
+    ```
+    #### 5. Install Virtual Environment Shell
+    Shell is not installed by default, you need to install it.
+    ```bash
+    poetry self add poetry-plugin-shell
+    ```
     ## Usage
@@ Expand Down Expand Up / @@ -67,6 +83,14 @@ poetry run pytest @@
     Ensure you have `pytest` defined as a development dependency in your `pyproject.toml`.
+    If running on legacy CPUs and the tests crash on the polars library, run the following locally only:
+    ```bash
+    poetry add polars-lts-cpu
+    ```
+    This will align the polars execution with your system hardware. It should NOT be committed back into the repository.
     ## License
     This project is licensed under the MIT License - see the `LICENSE` file for details.

build.py

-Original file line number
+Diff line change
@@ -0,0 +1,21 @@
+    import os
+    import pathlib
+    import shutil
+    import yaml
+    def copy_rules(basedir):
+    	with open(os.path.join(basedir, 'version_sets.yaml'), 'r') as file:
+    		version_sets = yaml.safe_load(file)
+    	for version, base_files in version_sets.items():
+    		dest = os.path.join(basedir, 'version_sets', version)
+    		if os.path.exists(dest):
+    			shutil.rmtree(dest)
+    		pathlib.Path(dest).mkdir(parents=True)
+    		for f in base_files:
+    			src_file = os.path.join(basedir, 'base_rule_definitions', f)
+    			dest_file = os.path.join(dest, f)
+    			shutil.copyfile(src_file, dest_file)
+    if __name__ == "__main__":
+    	copy_rules(basedir='focus_validator/rules')

focus_validator/config_objects/common.py

-Original file line number
+Diff line change
@@ -1,7 +1,8 @@
     from enum import Enum
     from typing import List, Literal
-    from pydantic import BaseModel
+    import sqlglot
+    from pydantic import BaseModel, field_validator
     class AllowNullsCheck(BaseModel):
@@ Expand All / @@ -12,6 +13,22 @@ class ValueInCheck(BaseModel): @@
         value_in: List[str]
+    class SQLQueryCheck(BaseModel):
+        sql_query: str
+        @field_validator("sql_query")
+        def check_sql_query(cls, sql_query):
+            returned_columns = [
+                column.alias
+                for column in sqlglot.parse_one(sql_query).find_all(sqlglot.exp.Alias)
+            ]
+            assert returned_columns == [
+                "check_output"
+            ], "SQL query must only return a column called 'check_output'"
+            return sql_query
     SIMPLE_CHECKS = Literal["check_unique", "column_required"]
@@ Expand All / @@ -20,6 +37,7 @@ class DataTypes(Enum): @@
         DECIMAL = "decimal"
         DATETIME = "datetime"
         CURRENCY_CODE = "currency-code"
+        STRINGIFIED_JSON_OBJECT = "stringified-json-object"
     class DataTypeCheck(BaseModel):
@@ Expand Down Expand Up / @@ -50,3 +68,8 @@ def generate_check_friendly_name(check, column_id): @@
                 return f"{column_id} does not allow null values."
         elif isinstance(check, DataTypeCheck):
             return f"{column_id} requires values of type {check.data_type.value}."
+        elif isinstance(check, SQLQueryCheck):
+            sql_query = " ".join([word.strip() for word in check.sql_query.split()])
+            return f"{column_id} requires values that return true when evaluated by the following SQL query: {sql_query}"
+        else:
+            raise NotImplementedError(f"Check {check} not implemented.")

focus_validator/config_objects/focus_to_pandera_schema_converter.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -1,7 +1,10 @@
  
    import os

    from itertools import groupby

    from typing import Dict, List, Optional, Set, Union

    import pandas as pd

    import pandera as pa

    import sqlglot

    from pandera.api.pandas.types import PandasDtypeInputTypes

    from focus_validator.config_objects import ChecklistObject, InvalidRule, Rule

    @@ -10,11 +13,25 @@
  
        ChecklistObjectStatus,

        DataTypeCheck,

        DataTypes,

        SQLQueryCheck,

        ValueInCheck,

    )

    from focus_validator.config_objects.override import Override

    from focus_validator.exceptions import FocusNotImplementedError

    # group index column adds a column to the dataframe which is used to group the dataframe, otherwise the default

    # groupby function does not carry forward all rows in the dataframe causing it to not have row numbers

    GROUP_INDEX_COLUMN = "group_index_column"

    def __groupby_fnc__(df: pd.DataFrame, column_alias: List[str]):

        """

        Custom groupby function to be used with pandera check_sql_query, allowing null values

        Default groupby function does not allow null values

        """

        df[GROUP_INDEX_COLUMN] = range(0, len(df))

        return df.groupby(column_alias + [GROUP_INDEX_COLUMN], dropna=False)

    class FocusToPanderaSchemaConverter:

        @staticmethod

    @@ -40,9 +57,22 @@ def __generate_pandera_check__(rule: Rule, check_id):
  
                return pa.Check.check_value_in(

                    allowed_values=check.value_in, error=error_string

                )

            elif isinstance(check, SQLQueryCheck):

                column_alias = [

                    column.alias_or_name

                    for column in sqlglot.parse_one(check.sql_query).find_all(

                        sqlglot.exp.Column

                    )

                ]

                return pa.Check.check_sql_query(

                    sql_query=check.sql_query,

                    error=error_string,

                    column_alias=column_alias,

                    groupby=lambda df: __groupby_fnc__(df=df, column_alias=column_alias),

                )

            elif isinstance(check, AllowNullsCheck):

                return pa.Check.check_not_null(

                    error=error_string, ignore_na=False, allow_nulls=check.allow_nulls

                    error=error_string, ignore_na=check.allow_nulls

                )

            else:

                raise FocusNotImplementedError(

    @@ -77,6 +107,14 @@ def __generate_column_definition__(
  
                        error=f"{rule.check_id}:::Ensures that column is of {data_type.value} type.",

                    )

                )

            elif data_type == DataTypes.STRINGIFIED_JSON_OBJECT:

                pandera_type = None

                column_checks.append(

                    pa.Check.check_stringified_json_object_dtype(

                        ignore_na=True,

                        error=f"{rule.check_id}:::Ensures that column is of {data_type.value} type.",

                    )

                )

            else:

                pandera_type = pa.String

    @@ -151,7 +189,7 @@ def generate_pandera_schema(
  
            for rule in rules:

                if isinstance(rule, InvalidRule):

                    checklist[rule.rule_path] = ChecklistObject(

                        check_name=rule.rule_path,

                        check_name=os.path.splitext(os.path.basename(rule.rule_path))[0],

                        column_id="Unknown",

                        error=f"{rule.error_type}: {rule.error}",

                        status=ChecklistObjectStatus.ERRORED,

    @@ -180,4 +218,7 @@ def generate_pandera_schema(
  
                    overrides=overrides,

                    schema_dict=schema_dict,

                )

            return pa.DataFrameSchema(schema_dict, strict=False), checklist

            return (

                pa.DataFrameSchema(schema_dict, strict=False),

                checklist,

            )

focus_validator/config_objects/override.py

-Original file line number
+Diff line change
@@ -1,7 +1,7 @@
     from typing import List
     import yaml
-    from pydantic import BaseModel
+    from pydantic.v1 import BaseModel
     class Override(BaseModel):
@@ Expand Down @@

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Update README.md #123

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!

Update README.md #123

Are you sure you want to change the base?

Uh oh!

Update README.md #123

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!