diff --git a/.gitignore b/.gitignore index 7511a18ea..0155e5934 100644 --- a/.gitignore +++ b/.gitignore @@ -193,6 +193,8 @@ lending_club_loan_data_*.csv *.pkl # Sample application scorecard model for validation notebook — do not remove! !notebooks/code_samples/model_validation/xgb_model_champion.pkl +# Sample logistic regression model for validation series — do not remove! +!notebooks/tutorials/model_validation/lr_model_champion.pkl notebooks/llm/datasets/*.jsonl diff --git a/notebooks/code_samples/model_validation/validate_application_scorecard.ipynb b/notebooks/code_samples/model_validation/validate_application_scorecard.ipynb index a2a6d900e..5946c78a7 100644 --- a/notebooks/code_samples/model_validation/validate_application_scorecard.ipynb +++ b/notebooks/code_samples/model_validation/validate_application_scorecard.ipynb @@ -1428,7 +1428,7 @@ "\n", "## Run feature importance tests\n", "\n", - "We want to verify the relative influence of different input features on our models' predictions, as well as inspect the differences between our champion and challenger model to see if a certain model offers more understandable or logical importance scores for features.\n", + "We also want to verify the relative influence of different input features on our models' predictions, as well as inspect the differences between our champion and challenger model to see if a certain model offers more understandable or logical importance scores for features.\n", "\n", "Use `list_tests()` to identify all the feature importance tests for classification:" ] @@ -1580,7 +1580,7 @@ "\n", "Our final task is to verify that all the tests provided by the model development team were run and reported accurately. Note the appended `result_ids` to delineate which dataset we ran the test with for the relevant tests.\n", "\n", - "Here, we'll specify all the tests we'd like to independently rerun in a dictionary called `test_config`:" + "Here, we'll specify all the tests we'd like to independently rerun in a dictionary called `test_config`. **Note here that `inputs` and `input_grid` expect the `input_id` of the dataset or model as the value rather than the variable name we specified**:" ] }, { diff --git a/notebooks/tutorials/model_development/101-set_up_validmind.ipynb b/notebooks/tutorials/model_development/1-set_up_validmind.ipynb similarity index 90% rename from notebooks/tutorials/model_development/101-set_up_validmind.ipynb rename to notebooks/tutorials/model_development/1-set_up_validmind.ipynb index 1a316cbba..46a002a83 100644 --- a/notebooks/tutorials/model_development/101-set_up_validmind.ipynb +++ b/notebooks/tutorials/model_development/1-set_up_validmind.ipynb @@ -2,10 +2,10 @@ "cells": [ { "cell_type": "markdown", - "id": "97710f2a", + "id": "b6fa2ac0", "metadata": {}, "source": [ - "# ValidMind for model development — 101 Set up the ValidMind Library\n", + "# ValidMind for model development 1 — Set up the ValidMind Library\n", "\n", "Learn how to use ValidMind for your end-to-end model documentation process based on common model development scenarios with our series of four introductory notebooks. This first notebook walks you through the initial setup of the ValidMind Library.\n", "\n", @@ -14,7 +14,7 @@ }, { "cell_type": "markdown", - "id": "d3bb0ff8", + "id": "fe2e0eca", "metadata": {}, "source": [ "::: {.content-hidden when-format=\"html\"}\n", @@ -30,6 +30,7 @@ " - [Get your code snippet](#toc3_2_1_) \n", "- [Getting to know ValidMind](#toc4_) \n", " - [Preview the documentation template](#toc4_1_) \n", + " - [View model documentation in the ValidMind Platform](#toc4_1_1_) \n", " - [Explore available tests](#toc4_2_) \n", "- [Upgrade ValidMind](#toc5_) \n", "- [In summary](#toc6_) \n", @@ -49,7 +50,7 @@ }, { "cell_type": "markdown", - "id": "d78e3887", + "id": "814da22c", "metadata": {}, "source": [ "\n", @@ -66,7 +67,7 @@ }, { "cell_type": "markdown", - "id": "f40a5e0a", + "id": "4b966a95", "metadata": {}, "source": [ "\n", @@ -80,7 +81,7 @@ }, { "cell_type": "markdown", - "id": "12af6ba2", + "id": "87936431", "metadata": {}, "source": [ "\n", @@ -94,7 +95,7 @@ }, { "cell_type": "markdown", - "id": "5f9cc87c", + "id": "cb9f8dc1", "metadata": {}, "source": [ "\n", @@ -110,7 +111,7 @@ }, { "cell_type": "markdown", - "id": "31c5cde0", + "id": "a0d16aca", "metadata": {}, "source": [ "\n", @@ -145,7 +146,7 @@ }, { "cell_type": "markdown", - "id": "1c06378f", + "id": "215d62a7", "metadata": {}, "source": [ "\n", @@ -173,7 +174,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8883bbc3", + "id": "827eb6bd", "metadata": {}, "outputs": [], "source": [ @@ -182,7 +183,7 @@ }, { "cell_type": "markdown", - "id": "780b6b39", + "id": "5e37f9fe", "metadata": {}, "source": [ "\n", @@ -211,7 +212,7 @@ }, { "cell_type": "markdown", - "id": "d00f6f07", + "id": "48eb92b3", "metadata": {}, "source": [ " - Documentation template: `Binary classification`\n", @@ -233,7 +234,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5f22e91d", + "id": "a58d951f", "metadata": {}, "outputs": [], "source": [ @@ -256,7 +257,7 @@ }, { "cell_type": "markdown", - "id": "c3186121", + "id": "99cf2df8", "metadata": {}, "source": [ "\n", @@ -280,13 +281,31 @@ { "cell_type": "code", "execution_count": null, - "id": "32ab4cac", + "id": "819a40bc", "metadata": {}, "outputs": [], "source": [ "vm.preview_template()" ] }, + { + "cell_type": "markdown", + "id": "cf63d701", + "metadata": {}, + "source": [ + "\n", + "\n", + "#### View model documentation in the ValidMind Platform\n", + "\n", + "Next, let's head to the ValidMind Platform to see the template in action:\n", + "\n", + "1. In a browser, [log in to ValidMind](https://docs.validmind.ai/guide/configuration/log-in-to-validmind.html).\n", + "\n", + "2. In the left sidebar, navigate to **Inventory** and select the model you registered for this \"ValidMind for model development\" series of notebooks.\n", + "\n", + "3. Click on the **Documentation** for your model and note how the structure of the documentation matches our preview above." + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -303,7 +322,7 @@ { "cell_type": "code", "execution_count": null, - "id": "acf76128", + "id": "7ccc7776", "metadata": {}, "outputs": [], "source": [ @@ -326,7 +345,7 @@ { "cell_type": "code", "execution_count": null, - "id": "95bede03", + "id": "f5d3216d", "metadata": {}, "outputs": [], "source": [ @@ -347,7 +366,7 @@ }, { "cell_type": "markdown", - "id": "6a7bf101", + "id": "9b8aa1cc", "metadata": {}, "source": [ "You may need to restart your kernel after running the upgrade package for changes to be applied." @@ -355,7 +374,7 @@ }, { "cell_type": "markdown", - "id": "207875f2", + "id": "65ece5fb", "metadata": {}, "source": [ "\n", @@ -364,15 +383,15 @@ "\n", "In this first notebook, you learned how to:\n", "\n", - "- [ ] Register a model within the ValidMind Platform\n", - "- [ ] Install and initialize the ValidMind Library\n", - "- [ ] Preview the documentation template for your model\n", - "- [ ] Explore the available tests offered by the ValidMind Library" + "- [x] Register a model within the ValidMind Platform\n", + "- [x] Install and initialize the ValidMind Library\n", + "- [x] Preview the documentation template for your model\n", + "- [x] Explore the available tests offered by the ValidMind Library" ] }, { "cell_type": "markdown", - "id": "29781eb4", + "id": "a262f940", "metadata": {}, "source": [ "\n", @@ -388,7 +407,7 @@ "\n", "### Start the model development process\n", "\n", - "Now that the ValidMind Library is connected to your model in the ValidMind Library with the correct template applied, we can go ahead and start the model development process: **[102 Start the model development process](102-start_development_process.ipynb)**" + "Now that the ValidMind Library is connected to your model in the ValidMind Library with the correct template applied, we can go ahead and start the model development process: **[2 — Start the model development process](2-start_development_process.ipynb)**" ] } ], diff --git a/notebooks/tutorials/model_development/102-start_development_process.ipynb b/notebooks/tutorials/model_development/2-start_development_process.ipynb similarity index 88% rename from notebooks/tutorials/model_development/102-start_development_process.ipynb rename to notebooks/tutorials/model_development/2-start_development_process.ipynb index 68c637d2c..74bec6960 100644 --- a/notebooks/tutorials/model_development/102-start_development_process.ipynb +++ b/notebooks/tutorials/model_development/2-start_development_process.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# ValidMind for model development — 102 Start the model development process\n", + "# ValidMind for model development 2 — Start the model development process\n", "\n", "Learn how to use ValidMind for your end-to-end model documentation process with our series of four introductory notebooks. In this second notebook, you'll run tests and investigate results, then add the results or evidence to your documentation.\n", "\n", @@ -28,7 +28,7 @@ "- [Running tests](#toc3_) \n", " - [Run tabular data tests](#toc3_1_) \n", " - [Utilize test output](#toc3_2_) \n", - "- [Documenting results](#toc4_) \n", + "- [Documenting test results](#toc4_) \n", " - [Run and log multiple tests](#toc4_1_) \n", " - [Run and log an individual test](#toc4_2_) \n", " - [Add individual test results to model documentation](#toc4_2_1_) \n", @@ -62,12 +62,12 @@ "\n", "In order to log test results or evidence to your model documentation with this notebook, you'll need to first have:\n", "\n", - "- [ ] Registered a model within the ValidMind Platform with a predefined documentation template\n", - "- [ ] Installed the ValidMind Library in your local environment, allowing you to access all its features\n", + "- [x] Registered a model within the ValidMind Platform with a predefined documentation template\n", + "- [x] Installed the ValidMind Library in your local environment, allowing you to access all its features\n", "\n", "
Need help with the above steps?\n", "

\n", - "Refer to the first notebook in this series: 101 Set up ValidMind
\n" + "Refer to the first notebook in this series: 1 — Set up the ValidMind Library" ] }, { @@ -167,7 +167,10 @@ "\n", "Next, let's say we want to do some data quality assessments by running a few individual tests.\n", "\n", - "Use the [`vm.tests.list_tests()` function](https://docs.validmind.ai/validmind/validmind/tests.html#list_tests) introduced by the first notebook in this series in combination with [`vm.tests.list_tags()`](https://docs.validmind.ai/validmind/validmind/tests.html#list_tags) and [`vm.tests.list_tasks()`](https://docs.validmind.ai/validmind/validmind/tests.html#list_tasks) to find which prebuilt tests are relevant for data quality assessment:\n" + "Use the [`vm.tests.list_tests()` function](https://docs.validmind.ai/validmind/validmind/tests.html#list_tests) introduced by the first notebook in this series in combination with [`vm.tests.list_tags()`](https://docs.validmind.ai/validmind/validmind/tests.html#list_tags) and [`vm.tests.list_tasks()`](https://docs.validmind.ai/validmind/validmind/tests.html#list_tasks) to find which prebuilt tests are relevant for data quality assessment:\n", + "\n", + "- **`tasks`** represent the kind of modeling task associated with a test. Here we'll focus on `classification` tasks.\n", + "- **`tags`** are free-form descriptions providing more details about the test, for example, what category the test falls into. Here we'll focus on the `data_quality` tag.\n" ] }, { @@ -176,8 +179,8 @@ "metadata": {}, "outputs": [], "source": [ - "# Get the list of available tags\n", - "sorted(vm.tests.list_tags())" + "# Get the list of available task types\n", + "sorted(vm.tests.list_tasks())" ] }, { @@ -186,8 +189,8 @@ "metadata": {}, "outputs": [], "source": [ - "# Get the list of available task types\n", - "sorted(vm.tests.list_tasks())" + "# Get the list of available tags\n", + "sorted(vm.tests.list_tags())" ] }, { @@ -391,7 +394,9 @@ "\n", "### Utilize test output\n", "\n", - "You can utilize the output from a ValidMind test for further use, for example, if you want to remove highly correlated features. Below we demonstrate how to retrieve the list of features with the highest correlation coefficients and use them to reduce the final list of features for modeling.\n", + "You can utilize the output from a ValidMind test for further use, for example, if you want to remove highly correlated features. Removing highly correlated features helps make the model simpler, more stable, and easier to understand.\n", + "\n", + "Below we demonstrate how to retrieve the list of features with the highest correlation coefficients and use them to reduce the final list of features for modeling.\n", "\n", "First, we'll run [`validmind.data_validation.HighPearsonCorrelation`](https://docs.validmind.ai/tests/data_validation/HighPearsonCorrelation.html) with the `balanced_raw_dataset` we initialized previously as input as is for comparison with later runs:" ] @@ -415,6 +420,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ + "The output above shows that the test did not pass according to the value we set for `max_threshold`.\n", + "\n", "`corr_result` is an object of type `TestResult`. We can inspect the result object to see what the test has produced:" ] }, @@ -517,7 +524,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Re-running the test with the reduced feature set should pass the test:\n" + "Re-running the test with the reduced feature set should pass the test:" ] }, { @@ -560,9 +567,9 @@ "source": [ "\n", "\n", - "## Documenting results\n", + "## Documenting test results\n", "\n", - "We've now done some analysis on two different datasets, and we should be able to document why certain things were done to the raw data with testing to support it.\n", + "Now that we've done some analysis on two different datasets, we can use ValidMind to easily document why certain things were done to our raw data with testing to support it.\n", "\n", "Every test result returned by the `run_test()` function has a [`.log()` method](https://docs.validmind.ai/validmind/validmind/vm_models.html#TestResult.log) that can be used to send the test results to the ValidMind Platform:\n", "\n", @@ -629,7 +636,7 @@ "\n", "### Run and log an individual test\n", "\n", - "Next, we'll use the previously initialized `vm_balanced_raw_dataset` (that had a highly correlated `Age` column) as input to run an individual test, then log the result to the ValidMind Platform.\n", + "Next, we'll use the previously initialized `vm_balanced_raw_dataset` (that still has a highly correlated `Age` column) as input to run an individual test, then log the result to the ValidMind Platform.\n", "\n", "When running individual tests, **you can use a custom `result_id` to tag the individual result with a unique identifier:** \n", "\n", @@ -763,7 +770,10 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Using `GridSearchCV`, we'll find the best-performing hyperparameters or settings and save them:" + "We'll split our preprocessed dataset into training and testing, to help assess how well the model generalizes to unseen data:\n", + "\n", + "- We start by dividing our `balanced_raw_no_age_df` dataset into training and test subsets using `train_test_split`, with 80% of the data allocated to training (`train_df`) and 20% to testing (`test_df`).\n", + "- From each subset, we separate the features (all columns except \"Exited\") into `X_train` and `X_test`, and the target column (\"Exited\") into `y_train` and `y_test`." ] }, { @@ -772,18 +782,30 @@ "metadata": {}, "outputs": [], "source": [ - "from sklearn.linear_model import LogisticRegression\n", "from sklearn.model_selection import train_test_split\n", "\n", - "# Split the input and target variables\n", - "X = balanced_raw_no_age_df.drop(\"Exited\", axis=1)\n", - "y = balanced_raw_no_age_df[\"Exited\"]\n", - "X_train, X_test, y_train, y_test = train_test_split(\n", - " X,\n", - " y,\n", - " test_size=0.2,\n", - " random_state=42,\n", - ")\n", + "train_df, test_df = train_test_split(balanced_raw_no_age_df, test_size=0.20)\n", + "\n", + "X_train = train_df.drop(\"Exited\", axis=1)\n", + "y_train = train_df[\"Exited\"]\n", + "X_test = test_df.drop(\"Exited\", axis=1)\n", + "y_test = test_df[\"Exited\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Then using `GridSearchCV`, we'll find the best-performing hyperparameters or settings and save them:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.linear_model import LogisticRegression\n", "\n", "# Logistic Regression grid params\n", "log_reg_params = {\n", @@ -810,9 +832,7 @@ "\n", "### Initialize model evaluation objects\n", "\n", - "The last step for evaluating the model's performance is to initialize the ValidMind `Dataset` and `Model` objects in preparation for assigning model predictions to each dataset.\n", - "\n", - "Use the `init_dataset` and [`init_model`](https://docs.validmind.ai/validmind/validmind.html#init_model) functions to initialize these objects:\n" + "The last step for evaluating the model's performance is to initialize the ValidMind `Dataset` and `Model` objects in preparation for assigning model predictions to each dataset." ] }, { @@ -821,11 +841,7 @@ "metadata": {}, "outputs": [], "source": [ - "train_df = X_train\n", - "train_df[\"Exited\"] = y_train\n", - "test_df = X_test\n", - "test_df[\"Exited\"] = y_test\n", - "\n", + "# Initialize the datasets into their own dataset objects\n", "vm_train_ds = vm.init_dataset(\n", " input_id=\"train_dataset_final\",\n", " dataset=train_df,\n", @@ -836,8 +852,24 @@ " input_id=\"test_dataset_final\",\n", " dataset=test_df,\n", " target_column=\"Exited\",\n", - ")\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You'll also need to initialize a ValidMind model object (`vm_model`) that can be passed to other functions for analysis and tests on the data for each of our three models.\n", "\n", + "You simply initialize this model object with [`vm.init_model()`](https://docs.validmind.ai/validmind/validmind.html#init_model):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ "# Register the model\n", "vm_model = vm.init_model(log_reg, input_id=\"log_reg_model_v1\")" ] @@ -850,7 +882,10 @@ "\n", "### Assign predictions\n", "\n", - "Once the model has been registered you can assign model predictions to the training and test datasets. The [`assign_predictions()` method](https://docs.validmind.ai/validmind/validmind/vm_models.html#VMDataset.assign_predictions) from the `Dataset` object can link existing predictions to any number of models.\n", + "Once the model has been registered you can assign model predictions to the training and test datasets.\n", + "\n", + "- The [`assign_predictions()` method](https://docs.validmind.ai/validmind/validmind/vm_models.html#assign_predictions) from the `Dataset` object can link existing predictions to any number of models.\n", + "- This method links the model's class prediction values and probabilities to our `vm_train_ds` and `vm_test_ds` datasets.\n", "\n", "If no prediction values are passed, the method will compute predictions automatically:\n" ] @@ -917,14 +952,14 @@ "\n", "In this second notebook, you learned how to:\n", "\n", - "- [ ] Import a sample dataset\n", - "- [ ] Identify which tests you might want to run with ValidMind\n", - "- [ ] Initialize ValidMind datasets\n", - "- [ ] Run individual tests\n", - "- [ ] Utilize the output from tests you've run\n", - "- [ ] Log test results from sets of or individual tests as evidence to the ValidMind Platform\n", - "- [ ] Add supplementary individual test results to your documentation\n", - "- [ ] Assign model predictions to your ValidMind datasets\n" + "- [x] Import a sample dataset\n", + "- [x] Identify which tests you might want to run with ValidMind\n", + "- [x] Initialize ValidMind datasets\n", + "- [x] Run individual tests\n", + "- [x] Utilize the output from tests you've run\n", + "- [x] Log test results from sets of or individual tests as evidence to the ValidMind Platform\n", + "- [x] Add supplementary individual test results to your documentation\n", + "- [x] Assign model predictions to your ValidMind model objects\n" ] }, { @@ -944,7 +979,7 @@ "\n", "### Integrate custom tests\n", "\n", - "Now that you're familiar with the basics of using the ValidMind Library to run and log tests to provide evidence for your model documentation, let's learn how to incorporate your own custom tests into ValidMind: **[103 Integrate custom tests](103-integrate_custom_tests.ipynb)**" + "Now that you're familiar with the basics of using the ValidMind Library to run and log tests to provide evidence for your model documentation, let's learn how to incorporate your own custom tests into ValidMind: **[3 — Integrate custom tests](3-integrate_custom_tests.ipynb)**" ] } ], diff --git a/notebooks/tutorials/model_development/103-integrate_custom_tests.ipynb b/notebooks/tutorials/model_development/3-integrate_custom_tests.ipynb similarity index 93% rename from notebooks/tutorials/model_development/103-integrate_custom_tests.ipynb rename to notebooks/tutorials/model_development/3-integrate_custom_tests.ipynb index 31cd0758f..038f45c38 100644 --- a/notebooks/tutorials/model_development/103-integrate_custom_tests.ipynb +++ b/notebooks/tutorials/model_development/3-integrate_custom_tests.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# ValidMind for model development — 103 Integrate custom tests\n", + "# ValidMind for model development 3 — Integrate custom tests\n", "\n", "Learn how to use ValidMind for your end-to-end model documentation process with our series of four introductory notebooks. In this third notebook, supplement ValidMind tests with your own and include them as additional evidence in your documentation.\n", "\n", @@ -67,20 +67,18 @@ "\n", "In order to integrate custom tests with your model documentation with this notebook, you'll need to first have:\n", "\n", - "- [ ] Registered a model within the ValidMind Platform with a predefined documentation template\n", - "- [ ] Installed the ValidMind Library in your local environment, allowing you to access all its features\n", - "- [ ] Learned how to import and initialize datasets for use with ValidMind\n", - "- [ ] Understood the basics of how to run and log tests with ValidMind\n", - "- [ ] Inserted a test-driven block for the results of your `HighPearsonCorrelation:balanced_raw_dataset` test into your model's documentation\n", + "- [x] Registered a model within the ValidMind Platform with a predefined documentation template\n", + "- [x] Installed the ValidMind Library in your local environment, allowing you to access all its features\n", + "- [x] Learned how to import and initialize datasets for use with ValidMind\n", + "- [x] Understood the basics of how to run and log tests with ValidMind\n", + "- [x] Inserted a test-driven block for the results of your `HighPearsonCorrelation:balanced_raw_dataset` test into your model's documentation\n", "\n", "
Need help with the above steps?\n", "

\n", "Refer to the first two notebooks in this series:\n", "\n", - "
    \n", - "
  1. 101 Set up ValidMind
  2. \n", - "
  3. 102 Start the model development process
  4. \n", - "
\n", + "- 1 — Set up the ValidMind Library\n", + "- 2 — Start the model development process\n", "\n", "
\n" ] @@ -93,7 +91,7 @@ "\n", "## Setting up\n", "\n", - "This section should be quite familiar to you — as we performed the same actions in the previous notebook, **[102 Start the model development process](102-start_development_process.ipynb)**." + "This section should be quite familiar to you — as we performed the same actions in the previous notebook, **[2 — Start the model development process](2-start_development_process.ipynb)**." ] }, { @@ -342,18 +340,24 @@ "metadata": {}, "outputs": [], "source": [ - "from sklearn.linear_model import LogisticRegression\n", + "# Split the processed dataset into train and test\n", "from sklearn.model_selection import train_test_split\n", "\n", - "# Split the input and target variables\n", - "X = balanced_raw_no_age_df.drop(\"Exited\", axis=1)\n", - "y = balanced_raw_no_age_df[\"Exited\"]\n", - "X_train, X_test, y_train, y_test = train_test_split(\n", - " X,\n", - " y,\n", - " test_size=0.2,\n", - " random_state=42,\n", - ")\n", + "train_df, test_df = train_test_split(balanced_raw_no_age_df, test_size=0.20)\n", + "\n", + "X_train = train_df.drop(\"Exited\", axis=1)\n", + "y_train = train_df[\"Exited\"]\n", + "X_test = test_df.drop(\"Exited\", axis=1)\n", + "y_test = test_df[\"Exited\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.linear_model import LogisticRegression\n", "\n", "# Logistic Regression grid params\n", "log_reg_params = {\n", @@ -389,11 +393,6 @@ "metadata": {}, "outputs": [], "source": [ - "train_df = X_train\n", - "train_df[\"Exited\"] = y_train\n", - "test_df = X_test\n", - "test_df[\"Exited\"] = y_test\n", - "\n", "# Initialize the datasets into their own dataset objects\n", "vm_train_ds = vm.init_dataset(\n", " input_id=\"train_dataset_final\",\n", @@ -627,7 +626,7 @@ "- Since these are `VMDataset` or `VMModel` inputs, they have a special meaning.\n", "- When declaring a `dataset`, `model`, `datasets` or `models` argument in a custom test function, the ValidMind Library will expect these get passed as `inputs` to `run_test()` or `run_documentation_tests()`.\n", "\n", - "Re-running the confusion matrix with `normalize=True` looks like this:\n" + "Re-running the confusion matrix with `normalize=True` and our testing dataset looks like this:\n" ] }, { @@ -640,7 +639,7 @@ "result = vm.tests.run_test(\n", " \"my_custom_tests.ConfusionMatrix:test_dataset_normalized\",\n", " inputs={\"model\": vm_model, \"dataset\": vm_test_ds},\n", - " params={\"normalize\": True},\n", + " params={\"normalize\": True}\n", ")" ] }, @@ -652,7 +651,7 @@ "\n", "### Log the confusion matrix results\n", "\n", - "As we learned in **[102 Start the model development process](102-start_development_process.ipynb)** under **Documenting results** > **Run and log an individual tests**, you can log any result to the ValidMind Platform with the [`.log()` method](https://docs.validmind.ai/validmind/validmind/vm_models.html#TestResult.log) of the result object, allowing you to then add the result to the documentation.\n", + "As we learned in **[2 — Start the model development process](2-start_development_process.ipynb)** under **Documenting results** > **Run and log an individual tests**, you can log any result to the ValidMind Platform with the [`.log()` method](https://docs.validmind.ai/validmind/validmind/vm_models.html#TestResult.log) of the result object, allowing you to then add the result to the documentation.\n", "\n", "You can now do the same for the confusion matrix results:\n" ] @@ -735,9 +734,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "After running the command above, confirm that the new `my_tests` directory was created successfully:\n", + "After running the command above, confirm that a new `my_tests` directory was created successfully. For example:\n", "\n", - "\"Screenshot" + "```\n", + "~/notebooks/tutorials/model_development/my_tests/\n", + "```" ] }, { @@ -781,8 +782,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "- [ ] Confirm that the `save()` method saved the `confusion_matrix` function to a file named `ConfusionMatrix.py` in the `my_tests` folder.\n", - "- [ ] Note that the new file provides some context on the origin of the test, which is useful for traceability:\n", + "- [x] Confirm that the `save()` method saved the `confusion_matrix` function to a file named `ConfusionMatrix.py` in the `my_tests` folder.\n", + "- [x] Note that the new file provides some context on the origin of the test, which is useful for traceability:\n", "\n", " ```\n", " # Saved from __main__.confusion_matrix\n", @@ -790,7 +791,7 @@ " # New Test ID: .ConfusionMatrix\n", " ```\n", "\n", - "- [ ] Additionally, the new test function has been stripped off its decorator, as it now resides in a file that will be loaded by the test provider:\n", + "- [x] Additionally, the new test function has been stripped off its decorator, as it now resides in a file that will be loaded by the test provider:\n", "\n", " ```python\n", " def ConfusionMatrix(dataset, model, normalize=False):\n", @@ -808,7 +809,7 @@ "Now that your `my_tests` folder has a sample custom test, let's initialize a test provider that will tell the ValidMind Library where to find your custom tests:\n", "\n", "- ValidMind offers out-of-the-box test providers for local tests (tests in a folder) or a Github provider for tests in a Github repository.\n", - "- You can also create your own test provider by creating a class that has a [`load_test` method](https://docs.validmind.ai/validmind/validmind/tests.html#TestProvider.load_test) that takes a test ID and returns the test function matching that ID.\n", + "- You can also create your own test provider by creating a class that has a [`load_test` method](https://docs.validmind.ai/validmind/validmind/tests.html#load_test) that takes a test ID and returns the test function matching that ID.\n", "\n", "
Want to learn more about test providers?\n", "

\n", @@ -862,7 +863,7 @@ "- For tests that reside in a test provider directory, the test ID will be the `namespace` specified when registering the provider, followed by the path to the test file relative to the tests folder.\n", "- For example, the Confusion Matrix test we created earlier will have the test ID `my_test_provider.ConfusionMatrix`. You could organize the tests in subfolders, say `classification` and `regression`, and the test ID for the Confusion Matrix test would then be `my_test_provider.classification.ConfusionMatrix`.\n", "\n", - "Let's go ahead and re-run the confusion matrix test by using the test ID `my_test_provider.ConfusionMatrix`. This should load the test from the test provider and run it as before.\n" + "Let's go ahead and re-run the confusion matrix test with our testing dataset by using the test ID `my_test_provider.ConfusionMatrix`. This should load the test from the test provider and run it as before.\n" ] }, { @@ -935,10 +936,10 @@ "\n", "In this third notebook, you learned how to:\n", "\n", - "- [ ] Implement a custom inline test\n", - "- [ ] Run and log your custom inline tests\n", - "- [ ] Use external custom test providers\n", - "- [ ] Run and log tests from your custom test providers" + "- [x] Implement a custom inline test\n", + "- [x] Run and log your custom inline tests\n", + "- [x] Use external custom test providers\n", + "- [x] Run and log tests from your custom test providers" ] }, { @@ -958,7 +959,7 @@ "\n", "### Finalize testing and documentation\n", "\n", - "Now that you're proficient at using the ValidMind Library to run and log tests, let's put the last pieces in place to prepare our fully documented sample model for review: **[104 Finalize testing and documentation](104-finalize_testing_documentation.ipynb)**" + "Now that you're proficient at using the ValidMind Library to run and log tests, let's put the last pieces in place to prepare our fully documented sample model for review: **[4 — Finalize testing and documentation](4-finalize_testing_documentation.ipynb)**" ] } ], diff --git a/notebooks/tutorials/model_development/104-finalize_testing_documentation.ipynb b/notebooks/tutorials/model_development/4-finalize_testing_documentation.ipynb similarity index 94% rename from notebooks/tutorials/model_development/104-finalize_testing_documentation.ipynb rename to notebooks/tutorials/model_development/4-finalize_testing_documentation.ipynb index 3ec70b841..13a4f1e14 100644 --- a/notebooks/tutorials/model_development/104-finalize_testing_documentation.ipynb +++ b/notebooks/tutorials/model_development/4-finalize_testing_documentation.ipynb @@ -4,11 +4,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# ValidMind for model development — 104 Finalize testing and documentation\n", + "# ValidMind for model development 4 — Finalize testing and documentation\n", "\n", "Learn how to use ValidMind for your end-to-end model documentation process with our introductory notebook series. In this last notebook, finalize the testing and documentation of your model and have a fully documented sample model ready for review.\n", "\n", - "We'll first use [`run_documentation_tests()`](https://docs.validmind.ai/validmind/validmind.html#run_documentation_tests) previously covered in **[102 Start the model development process](102-start_development_process.ipynb)** to ensure that your custom test results generated in **[103 Integrate custom tests](103-integrate_custom_tests.ipynb)** are included in your documentation. Then, we'll view and update the configuration for the entire model documentation template to suit your needs.\n" + "We'll first use [`run_documentation_tests()`](https://docs.validmind.ai/validmind/validmind.html#run_documentation_tests) previously covered in **[2 — Start the model development process](2-start_development_process.ipynb)** to ensure that your custom test results generated in **[3 — Integrate custom tests](3-integrate_custom_tests.ipynb)** are included in your documentation. Then, we'll view and update the configuration for the entire model documentation template to suit your needs.\n" ] }, { @@ -61,24 +61,22 @@ "\n", "In order to finalize the testing and documentation for your sample model, you'll need to first have:\n", "\n", - "- [ ] Registered a model within the ValidMind Platform with a predefined documentation template\n", - "- [ ] Installed the ValidMind Library in your local environment, allowing you to access all its features\n", - "- [ ] Learned how to import and initialize datasets for use with ValidMind\n", - "- [ ] Learned how to run and log default and custom tests with ValidMind, including from external test providers\n", - "- [ ] Inserted test-driven blocks for the results of the following tests into your model's documentation:\n", - " - [ ] `HighPearsonCorrelation:balanced_raw_dataset`\n", - " - [ ] `my_test_provider.ConfusionMatrix`\n", - " - [ ] `my_custom_tests.ConfusionMatrix:test_dataset_normalized`\n", + "- [x] Registered a model within the ValidMind Platform with a predefined documentation template\n", + "- [x] Installed the ValidMind Library in your local environment, allowing you to access all its features\n", + "- [x] Learned how to import and initialize datasets for use with ValidMind\n", + "- [x] Learned how to run and log default and custom tests with ValidMind, including from external test providers\n", + "- [x] Inserted test-driven blocks for the results of the following tests into your model's documentation:\n", + " - [x] `HighPearsonCorrelation:balanced_raw_dataset`\n", + " - [x] `my_test_provider.ConfusionMatrix`\n", + " - [x] `my_custom_tests.ConfusionMatrix:test_dataset_normalized`\n", "\n", "
Need help with the above steps?\n", "

\n", "Refer to the first three notebooks in this series:\n", "\n", - "
    \n", - "
  1. 101 Set up ValidMind
  2. \n", - "
  3. 102 Start the model development process
  4. \n", - "
  5. 103 Integrate custom tests
  6. \n", - "
\n", + "- 1 — Set up the ValidMind Library\n", + "- 2 — Start the model development process\n", + "- 3 — Integrate custom tests\n", "\n", "
" ] @@ -340,18 +338,24 @@ "metadata": {}, "outputs": [], "source": [ - "from sklearn.linear_model import LogisticRegression\n", + "# Split the processed dataset into train and test\n", "from sklearn.model_selection import train_test_split\n", "\n", - "# Split the input and target variables\n", - "X = balanced_raw_no_age_df.drop(\"Exited\", axis=1)\n", - "y = balanced_raw_no_age_df[\"Exited\"]\n", - "X_train, X_test, y_train, y_test = train_test_split(\n", - " X,\n", - " y,\n", - " test_size=0.2,\n", - " random_state=42,\n", - ")\n", + "train_df, test_df = train_test_split(balanced_raw_no_age_df, test_size=0.20)\n", + "\n", + "X_train = train_df.drop(\"Exited\", axis=1)\n", + "y_train = train_df[\"Exited\"]\n", + "X_test = test_df.drop(\"Exited\", axis=1)\n", + "y_test = test_df[\"Exited\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.linear_model import LogisticRegression\n", "\n", "# Logistic Regression grid params\n", "log_reg_params = {\n", @@ -387,11 +391,6 @@ "metadata": {}, "outputs": [], "source": [ - "train_df = X_train\n", - "train_df[\"Exited\"] = y_train\n", - "test_df = X_test\n", - "test_df[\"Exited\"] = y_test\n", - "\n", "# Initialize the datasets into their own dataset objects\n", "vm_train_ds = vm.init_dataset(\n", " input_id=\"train_dataset_final\",\n", @@ -638,7 +637,7 @@ "\n", "Let's run all tests in the Model Evaluation section of the documentation. Note that we have been running the sample custom confusion matrix with `normalize=True` to demonstrate the ability to provide custom parameters.\n", "\n", - "In the **Run the model evaluation tests** section of **[102 Start the model development process](102-start_development_process.ipynb)**, you learned how to assign inputs to individual tests with [`run_documentation_tests()`](https://docs.validmind.ai/validmind/validmind.html#run_documentation_tests). Assigning parameters is similar, you only need to provide assign a `params` dictionary to a given test ID, `my_test_provider.ConfusionMatrix` in this case.\n" + "In the **Run the model evaluation tests** section of **[2 — Start the model development process](2-start_development_process.ipynb)**, you learned how to assign inputs to individual tests with [`run_documentation_tests()`](https://docs.validmind.ai/validmind/validmind.html#run_documentation_tests). Assigning parameters is similar, you only need to provide assign a `params` dictionary to a given test ID, `my_test_provider.ConfusionMatrix` in this case.\n" ] }, { @@ -864,9 +863,9 @@ "\n", "In this final notebook, you learned how to:\n", "\n", - "- [ ] Refresh the connection from the ValidMind Library to the ValidMind Platform after you've inserted test-driven blocks to your documentation\n", - "- [ ] Include custom test results in your model documentation\n", - "- [ ] View and configure the configuration for your model documentation template\n", + "- [x] Refresh the connection from the ValidMind Library to the ValidMind Platform after you've inserted test-driven blocks to your documentation\n", + "- [x] Include custom test results in your model documentation\n", + "- [x] View and configure the configuration for your model documentation template\n", "\n", "With our ValidMind for model development series of notebooks, you learned how to document a model end-to-end with the ValidMind Library by running through some common scenarios in a typical model development setting:\n", "\n", diff --git a/notebooks/tutorials/model_development/my_tests_directory.png b/notebooks/tutorials/model_development/my_tests_directory.png deleted file mode 100644 index 47baffe80..000000000 Binary files a/notebooks/tutorials/model_development/my_tests_directory.png and /dev/null differ diff --git a/notebooks/tutorials/model_validation/1-set_up_validmind_for_validation.ipynb b/notebooks/tutorials/model_validation/1-set_up_validmind_for_validation.ipynb new file mode 100644 index 000000000..2f85b39d7 --- /dev/null +++ b/notebooks/tutorials/model_validation/1-set_up_validmind_for_validation.ipynb @@ -0,0 +1,451 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a9d0996f", + "metadata": {}, + "source": [ + "# ValidMind for model validation 1 — Set up the ValidMind Library for validation\n", + "\n", + "Learn how to use ValidMind for your end-to-end model validation process based on common scenarios with our series of four introductory notebooks. In this first notebook, set up the ValidMind Library in preparation for validating a champion model.\n", + "\n", + "These notebooks use a binary classification model as an example, but the same principles shown here apply to other model types." + ] + }, + { + "cell_type": "markdown", + "id": "c747db34", + "metadata": {}, + "source": [ + "::: {.content-hidden when-format=\"html\"}\n", + "## Contents \n", + "- [Introduction](#toc1_) \n", + "- [About ValidMind](#toc2_) \n", + " - [Before you begin](#toc2_1_) \n", + " - [New to ValidMind?](#toc2_2_) \n", + " - [Key concepts](#toc2_3_) \n", + "- [Setting up](#toc3_) \n", + " - [Register a sample model](#toc3_1_) \n", + " - [Assign validator credentials](#toc3_1_1_) \n", + " - [Install the ValidMind Library](#toc3_2_) \n", + " - [Initialize the ValidMind Library](#toc3_3_) \n", + " - [Get your code snippet](#toc3_3_1_) \n", + "- [Getting to know ValidMind](#toc4_) \n", + " - [Preview the validation report template](#toc4_1_) \n", + " - [View validation report in the ValidMind Platform](#toc4_1_1_) \n", + " - [Explore available tests](#toc4_2_) \n", + "- [Upgrade ValidMind](#toc5_) \n", + "- [In summary](#toc6_) \n", + "- [Next steps](#toc7_) \n", + " - [Start the model validation process](#toc7_1_) \n", + "\n", + ":::\n", + "\n", + "" + ] + }, + { + "cell_type": "markdown", + "id": "f1d4715f", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Introduction\n", + "\n", + "Model validation aims to independently assess the compliance of *champion models* created by model developers with regulatory guidance by conducting thorough testing and analysis, potentially including the use of challenger models to benchmark performance. Assessments, presented in the form of a validation report, typically include *model findings* and recommendations to address those issues.\n", + "\n", + "A *binary classification model* is a type of predictive model used in churn analysis to identify customers who are likely to leave a service or subscription by analyzing various behavioral, transactional, and demographic factors.\n", + "\n", + "- This model helps businesses take proactive measures to retain at-risk customers by offering personalized incentives, improving customer service, or adjusting pricing strategies.\n", + "- Effective validation of a churn prediction model ensures that businesses can accurately identify potential churners, optimize retention efforts, and enhance overall customer satisfaction while minimizing revenue loss." + ] + }, + { + "cell_type": "markdown", + "id": "14c2d80d", + "metadata": {}, + "source": [ + "\n", + "\n", + "## About ValidMind\n", + "\n", + "ValidMind is a suite of tools for managing model risk, including risk associated with AI and statistical models.\n", + "\n", + "You use the ValidMind Library to automate comparison and other validation tests, and then use the ValidMind Platform to submit compliance assessments of champion models via comprehensive validation reports. Together, these products simplify model risk management, facilitate compliance with regulations and institutional standards, and enhance collaboration between yourself and model developers." + ] + }, + { + "cell_type": "markdown", + "id": "151a4ca5", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Before you begin\n", + "\n", + "This notebook assumes you have basic familiarity with Python, including an understanding of how functions work. If you are new to Python, you can still run the notebook but we recommend further familiarizing yourself with the language. \n", + "\n", + "If you encounter errors due to missing modules in your Python environment, install the modules with `pip install`, and then re-run the notebook. For more help, refer to [Installing Python Modules](https://docs.python.org/3/installing/index.html)." + ] + }, + { + "cell_type": "markdown", + "id": "089c960e", + "metadata": {}, + "source": [ + "\n", + "\n", + "### New to ValidMind?\n", + "\n", + "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models and running tests, as well as find code samples and our Python Library API reference.\n", + "\n", + "
For access to all features available in this notebook, create a free ValidMind account.\n", + "

\n", + "Signing up is FREE — Register with ValidMind
" + ] + }, + { + "cell_type": "markdown", + "id": "5f307177", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Key concepts\n", + "\n", + "**Validation report**: A comprehensive and structured assessment of a model’s development and performance, focusing on verifying its integrity, appropriateness, and alignment with its intended use. It includes analyses of model assumptions, data quality, performance metrics, outcomes of testing procedures, and risk considerations. The validation report supports transparency, regulatory compliance, and informed decision-making by documenting the validator’s independent review and conclusions.\n", + "\n", + "**Validation report template**: Serves as a standardized framework for conducting and documenting model validation activities. It outlines the required sections, recommended analyses, and expected validation tests, ensuring consistency and completeness across validation reports. The template helps guide validators through a systematic review process while promoting comparability and traceability of validation outcomes.\n", + "\n", + "**Tests**: A function contained in the ValidMind Library, designed to run a specific quantitative test on the dataset or model. Tests are the building blocks of ValidMind, used to evaluate and document models and datasets.\n", + "\n", + "**Metrics**: A subset of tests that do not have thresholds. In the context of this notebook, metrics and tests can be thought of as interchangeable concepts.\n", + "\n", + "**Custom metrics**: Custom metrics are functions that you define to evaluate your model or dataset. These functions can be registered with the ValidMind Library to be used in the ValidMind Platform.\n", + "\n", + "**Inputs**: Objects to be evaluated and documented in the ValidMind Library. They can be any of the following:\n", + "\n", + " - **model**: A single model that has been initialized in ValidMind with [`vm.init_model()`](https://docs.validmind.ai/validmind/validmind.html#init_model).\n", + " - **dataset**: Single dataset that has been initialized in ValidMind with [`vm.init_dataset()`](https://docs.validmind.ai/validmind/validmind.html#init_dataset).\n", + " - **models**: A list of ValidMind models - usually this is used when you want to compare multiple models in your custom metric.\n", + " - **datasets**: A list of ValidMind datasets - usually this is used when you want to compare multiple datasets in your custom metric. (Learn more: [Run tests with multiple datasets](https://docs.validmind.ai/notebooks/how_to/run_tests_that_require_multiple_datasets.html))\n", + "\n", + "**Parameters**: Additional arguments that can be passed when running a ValidMind test, used to pass additional information to a metric, customize its behavior, or provide additional context.\n", + "\n", + "**Outputs**: Custom metrics can return elements like tables or plots. Tables may be a list of dictionaries (each representing a row) or a pandas DataFrame. Plots may be matplotlib or plotly figures." + ] + }, + { + "cell_type": "markdown", + "id": "c42665b8", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Setting up" + ] + }, + { + "cell_type": "markdown", + "id": "0faed42c", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Register a sample model\n", + "\n", + "In a usual model lifecycle, a champion model will have been independently registered in your model inventory and submitted to you for validation by your model development team as part of the effective challenge process. (**Learn more:** [Submit for approval](https://docs.validmind.ai/guide/model-documentation/submit-for-approval.html))\n", + "\n", + "For this series of notebooks, we'll have you register a dummy model in the ValidMind Platform inventory and assign yourself as the validator to familiarize you with the ValidMind interface and circumvent the need for an existing model:\n", + "\n", + "1. In a browser, [log in to ValidMind](https://docs.validmind.ai/guide/configuration/log-in-to-validmind.html).\n", + "\n", + "2. In the left sidebar, navigate to **Inventory** and click **+ Register Model**.\n", + "\n", + "3. Enter the model details and click **Continue**. ([Need more help?](https://docs.validmind.ai/guide/model-inventory/register-models-in-inventory.html))\n", + "\n", + " For example, to register a model for use with this notebook, select:\n", + "\n", + " - Documentation template: `Binary classification`\n", + " - Use case: `Marketing/Sales - Attrition/Churn Management`\n", + "\n", + " You can fill in other options according to your preference." + ] + }, + { + "cell_type": "markdown", + "id": "0c350e0d", + "metadata": {}, + "source": [ + "\n", + "\n", + "#### Assign validator credentials\n", + "\n", + "In order to log tests as a validator instead of as a developer, on the model details page that appears after you've successfully registered your sample model:\n", + "\n", + "1. Remove yourself as a developer: \n", + "\n", + " - Click on the **DEVELOPERS** tile.\n", + " - Click the **x** next to your name to remove yourself from that model's role.\n", + " - Click **Save** to apply your changes to that role.\n", + "\n", + "2. Add yourself as a validator: \n", + "\n", + " - Click on the **VALIDATORS** tile.\n", + " - Select your name from the drop-down menu.\n", + " - Click **Save** to apply your changes to that role." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Install the ValidMind Library\n", + "\n", + "
Recommended Python versions\n", + "

\n", + "Python 3.8 <= x <= 3.11
\n", + "\n", + "To install the library:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "931d8f7f", + "metadata": {}, + "outputs": [], + "source": [ + "%pip install -q validmind" + ] + }, + { + "cell_type": "markdown", + "id": "5ec7fcb7", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Initialize the ValidMind Library\n", + "\n", + "ValidMind generates a unique _code snippet_ for each registered model to connect with your validation environment. You initialize the ValidMind Library with this code snippet, which ensures that your test results are uploaded to the correct model when you run the notebook." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "#### Get your code snippet\n", + "\n", + "1. In a browser, [log in to ValidMind](https://docs.validmind.ai/guide/configuration/log-in-to-validmind.html).\n", + "\n", + "2. In the left sidebar, navigate to **Inventory** and select the model you registered for this \"ValidMind for model validation\" series of notebooks.\n", + "\n", + "3. Go to **Getting Started** and click **Copy snippet to clipboard**.\n", + "\n", + "Next, [load your model identifier credentials from an `.env` file](https://docs.validmind.ai/developer/model-documentation/store-credentials-in-env-file.html) or replace the placeholder with your own code snippet:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5d87e2d", + "metadata": {}, + "outputs": [], + "source": [ + "# Load your model identifier credentials from an `.env` file\n", + "\n", + "%load_ext dotenv\n", + "%dotenv .env\n", + "\n", + "# Or replace with your code snippet\n", + "\n", + "import validmind as vm\n", + "\n", + "vm.init(\n", + " # api_host=\"...\",\n", + " # api_key=\"...\",\n", + " # api_secret=\"...\",\n", + " # model=\"...\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "b4b5a00f", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Getting to know ValidMind" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Preview the validation report template\n", + "\n", + "Let's verify that you have connected the ValidMind Library to the ValidMind Platform and that the appropriate *template* is selected for model validation. A template predefines sections for your validation report and provides a general outline to follow, making the validation process much easier.\n", + "\n", + "You will attach evidence to this template in the form of risk assessment notes, findings, and test results later on. For now, **take a look at the default structure that the template provides with [the `vm.preview_template()` function](https://docs.validmind.ai/validmind/validmind.html#preview_template)** from the ValidMind library:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "13d34bbb", + "metadata": {}, + "outputs": [], + "source": [ + "vm.preview_template()" + ] + }, + { + "cell_type": "markdown", + "id": "a2e86bc8", + "metadata": {}, + "source": [ + "\n", + "\n", + "#### View validation report in the ValidMind Platform\n", + "\n", + "Next, let's head to the ValidMind Platform to see the template in action:\n", + "\n", + "1. In a browser, [log in to ValidMind](https://docs.validmind.ai/guide/configuration/log-in-to-validmind.html).\n", + "\n", + "2. In the left sidebar, navigate to **Inventory** and select the model you registered for this \"ValidMind for model validation\" series of notebooks.\n", + "\n", + "3. Click on the **Validation Report** for your model and note:\n", + "\n", + " - [x] The risk assessment compliance summary at the top of the report (screenshot below)\n", + " - [x] How the structure of the validation report reflects the previewed template\n", + "\n", + " \"Screenshot\n", + "

" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Explore available tests\n", + "\n", + "Next, let's explore the list of all available tests in the ValidMind Library with [the `vm.tests.list_tests()` function](https://docs.validmind.ai/validmind/validmind/tests.html#list_tests) — we'll later narrow down the tests we want to run from this list when we learn to run tests." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "de6abc2a", + "metadata": {}, + "outputs": [], + "source": [ + "vm.tests.list_tests()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Upgrade ValidMind\n", + "\n", + "
After installing ValidMind, you’ll want to periodically make sure you are on the latest version to access any new features and other enhancements.
\n", + "\n", + "Retrieve the information for the currently installed version of ValidMind:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10272aa9", + "metadata": {}, + "outputs": [], + "source": [ + "%pip show validmind" + ] + }, + { + "cell_type": "markdown", + "id": "upgrade-version-d64591ca-3073-4b3e-9586-d3577adda203", + "metadata": {}, + "source": [ + "If the version returned is lower than the version indicated in our [production open-source code](https://github.com/validmind/validmind-library/blob/prod/validmind/__version__.py), restart your notebook and run:\n", + "\n", + "```bash\n", + "%pip install --upgrade validmind\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "44657dea", + "metadata": {}, + "source": [ + "You may need to restart your kernel after running the upgrade package for changes to be applied." + ] + }, + { + "cell_type": "markdown", + "id": "39f45f58", + "metadata": {}, + "source": [ + "\n", + "\n", + "## In summary\n", + "\n", + "In this first notebook, you learned how to:\n", + "\n", + "- [x] Register a model within the ValidMind Platform and assign yourself as the validator\n", + "- [x] Install and initialize the ValidMind Library\n", + "- [x] Preview the validation report template for your model\n", + "- [x] Explore the available tests offered by the ValidMind Library\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Next steps\n", + "\n", + "\n", + "\n", + "### Start the model validation process\n", + "\n", + "Now that the ValidMind Library is connected to your model in the ValidMind Library with the correct template applied, we can go ahead and start the model validation process: **[2 — Start the model validation process](2-start_validation_process.ipynb)**" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "ValidMind Library", + "language": "python", + "name": "validmind" + }, + "language_info": { + "name": "python", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/tutorials/model_validation/2-start_validation_process.ipynb b/notebooks/tutorials/model_validation/2-start_validation_process.ipynb new file mode 100644 index 000000000..5493f1f9c --- /dev/null +++ b/notebooks/tutorials/model_validation/2-start_validation_process.ipynb @@ -0,0 +1,873 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# ValidMind for model validation 2 — Start the model validation process\n", + "\n", + "Learn how to use ValidMind for your end-to-end model validation process with our series of four introductory notebooks. In this second notebook, independently verify the data quality tests performed on the dataset used to train the champion model.\n", + "\n", + "You'll learn how to run relevant validation tests with ValidMind, log the results of those tests to the ValidMind Platform, and insert your logged test results as evidence into your validation report. You'll become familiar with the tests available in ValidMind, as well as how to run them. Running tests during model validation is crucial to the effective challenge process, as we want to independently evaluate the evidence and assessments provided by the model development team.\n", + "\n", + "While running our tests in this notebook, we'll focus on:\n", + "\n", + "- Ensuring that data used for training and testing the model is of appropriate data quality\n", + "- Ensuring that the raw data has been preprocessed appropriately and that the resulting final datasets reflects this\n", + "\n", + "**For a full list of out-of-the-box tests,** refer to our [Test descriptions](https://docs.validmind.ai/developer/model-testing/test-descriptions.html) or try the interactive [Test sandbox](https://docs.validmind.ai/developer/model-testing/test-sandbox.html)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "::: {.content-hidden when-format=\"html\"}\n", + "## Contents \n", + "- [Prerequisites](#toc1_) \n", + "- [Setting up](#toc2_) \n", + " - [Initialize the ValidMind Library](#toc2_1_) \n", + "- [Load the sample dataset](#toc3_) \n", + "- [Verifying data quality adjustments](#toc4_) \n", + " - [Identify qualitative tests](#toc4_1_) \n", + " - [Initialize the ValidMind datasets](#toc4_2_) \n", + " - [Run data quality tests](#toc4_3_) \n", + " - [Run tabular data tests](#toc4_3_1_) \n", + " - [Remove highly correlated features](#toc4_4_) \n", + "- [Documenting test results](#toc5_) \n", + " - [Configure and run comparison tests](#toc5_1_) \n", + " - [Log tests with a unique identifiers](#toc5_2_) \n", + " - [Add test results to reporting](#toc5_3_) \n", + "- [Split the preprocessed dataset](#toc6_) \n", + " - [Initialize the split datasets](#toc6_1_) \n", + "- [In summary](#toc7_) \n", + "- [Next steps](#toc8_) \n", + " - [Develop potential challenger models](#toc8_1_) \n", + "\n", + ":::\n", + "\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Prerequisites\n", + "\n", + "In order to independently assess the quality of your datasets with notebook, you'll need to first have:\n", + "\n", + "- [x] Registered a model within the ValidMind Platform and granted yourself access to the model as a validator\n", + "- [x] Installed the ValidMind Library in your local environment, allowing you to access all its features\n", + "\n", + "
Need help with the above steps?\n", + "

\n", + "Refer to the first notebook in this series: 1 — Set up the ValidMind Library for validation
" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Setting up" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Initialize the ValidMind Library\n", + "\n", + "First, let's connect up the ValidMind Library to our model we previously registered in the ValidMind Platform:\n", + "\n", + "1. In a browser, [log in to ValidMind](https://docs.validmind.ai/guide/configuration/log-in-to-validmind.html).\n", + "\n", + "2. In the left sidebar, navigate to **Inventory** and select the model you registered for this \"ValidMind for model validation\" series of notebooks.\n", + "\n", + "3. Go to **Getting Started** and click **Copy snippet to clipboard**.\n", + "\n", + "Next, [load your model identifier credentials from an `.env` file](https://docs.validmind.ai/developer/model-documentation/store-credentials-in-env-file.html) or replace the placeholder with your own code snippet:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Make sure the ValidMind Library is installed\n", + "\n", + "%pip install -q validmind\n", + "\n", + "# Load your model identifier credentials from an `.env` file\n", + "\n", + "%load_ext dotenv\n", + "%dotenv .env\n", + "\n", + "# Or replace with your code snippet\n", + "\n", + "import validmind as vm\n", + "\n", + "vm.init(\n", + " # api_host=\"...\",\n", + " # api_key=\"...\",\n", + " # api_secret=\"...\",\n", + " # model=\"...\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Load the sample dataset\n", + "\n", + "Let's first import the public [Bank Customer Churn Prediction](https://www.kaggle.com/datasets/shantanudhakadd/bank-customer-churn-prediction) dataset from Kaggle, which was used to develop the dummy champion model.\n", + "\n", + "We'll use this dataset to review steps that should have been conducted during the initial development and documentation of the model to ensure that the model was built correctly. By independently performing steps taken by the model development team, we can confirm whether the model was built using appropriate and properly processed data.\n", + "\n", + "In our below example, note that:\n", + "\n", + "- The target column, `Exited` has a value of `1` when a customer has churned and `0` otherwise.\n", + "- The ValidMind Library provides a wrapper to automatically load the dataset as a Pandas [DataFrame](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html) object." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from validmind.datasets.classification import customer_churn as demo_dataset\n", + "\n", + "print(\n", + " f\"Loaded demo dataset with: \\n\\n\\t• Target column: '{demo_dataset.target_column}' \\n\\t• Class labels: {demo_dataset.class_labels}\"\n", + ")\n", + "\n", + "raw_df = demo_dataset.load_data()\n", + "raw_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Verifying data quality adjustments\n", + "\n", + "Let's say that thanks to the documentation submitted by the model development team ([Learn more ...](https://docs.validmind.ai/developer/validmind-library.html#for-model-development)), we know that the sample dataset was first modified before being used to train the champion model. After performing some data quality assessments on the raw dataset, it was determined that the dataset required rebalancing, and highly correlated features were also removed." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Identify qualitative tests\n", + "\n", + "During model validation, we use the same data processing logic and training procedure to confirm that the model's results can be reproduced independently, so let's start by doing some data quality assessments by running a few individual tests just like the development team did.\n", + "\n", + "Use the [`vm.tests.list_tests()` function](https://docs.validmind.ai/validmind/validmind/tests.html#list_tests) introduced by the first notebook in this series in combination with [`vm.tests.list_tags()`](https://docs.validmind.ai/validmind/validmind/tests.html#list_tags) and [`vm.tests.list_tasks()`](https://docs.validmind.ai/validmind/validmind/tests.html#list_tasks) to find which prebuilt tests are relevant for data quality assessment:\n", + "\n", + "- **`tasks`** represent the kind of modeling task associated with a test. Here we'll focus on `classification` tasks.\n", + "- **`tags`** are free-form descriptions providing more details about the test, for example, what category the test falls into. Here we'll focus on the `data_quality` tag." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Get the list of available task types\n", + "sorted(vm.tests.list_tasks())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Get the list of available tags\n", + "sorted(vm.tests.list_tags())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can pass `tags` and `tasks` as parameters to the `vm.tests.list_tests()` function to filter the tests based on the tags and task types.\n", + "\n", + "For example, to find tests related to tabular data quality for classification models, you can call `list_tests()` like this:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vm.tests.list_tests(task=\"classification\", tags=[\"tabular_data\", \"data_quality\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
Want to learn more about navigating ValidMind tests?\n", + "

\n", + "Refer to our notebook outlining the utilities available for viewing and understanding available ValidMind tests: Explore tests
" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Initialize the ValidMind datasets\n", + "\n", + "With the individual tests we want to run identified, the next step is to connect your data with a ValidMind `Dataset` object. **This step is always necessary every time you want to connect a dataset to documentation and produce test results through ValidMind,** but you only need to do it once per dataset.\n", + "\n", + "Initialize a ValidMind dataset object using the [`init_dataset` function](https://docs.validmind.ai/validmind/validmind.html#init_dataset) from the ValidMind (`vm`) module. For this example, we'll pass in the following arguments:\n", + "\n", + "- **`dataset`** — The raw dataset that you want to provide as input to tests.\n", + "- **`input_id`** — A unique identifier that allows tracking what inputs are used when running each individual test.\n", + "- **`target_column`** — A required argument if tests require access to true values. This is the name of the target column in the dataset." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# vm_raw_dataset is now a VMDataset object that you can pass to any ValidMind test\n", + "vm_raw_dataset = vm.init_dataset(\n", + " dataset=raw_df,\n", + " input_id=\"raw_dataset\",\n", + " target_column=\"Exited\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Run data quality tests\n", + "\n", + "Now that we know how to initialize a ValidMind `dataset` object, we're ready to run some tests!\n", + "\n", + "You run individual tests by calling [the `run_test` function](https://docs.validmind.ai/validmind/validmind/tests.html#run_test) provided by the `validmind.tests` module. For the examples below, we'll pass in the following arguments:\n", + "\n", + "- **`test_id`** — The ID of the test to run, as seen in the `ID` column when you run `list_tests`. \n", + "- **`params`** — A dictionary of parameters for the test. These will override any `default_params` set in the test definition. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "#### Run tabular data tests\n", + "\n", + "The inputs expected by a test can also be found in the test definition — let's take [`validmind.data_validation.DescriptiveStatistics`](https://docs.validmind.ai/tests/data_validation/DescriptiveStatistics.html) as an example.\n", + "\n", + "Note that the output of the [`describe_test()` function](https://docs.validmind.ai/validmind/validmind/tests.html#describe_test) below shows that this test expects a `dataset` as input:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vm.tests.describe_test(\"validmind.data_validation.DescriptiveStatistics\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, let's run a few tests to assess the quality of the dataset:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "result2 = vm.tests.run_test(\n", + " test_id=\"validmind.data_validation.ClassImbalance\",\n", + " inputs={\"dataset\": vm_raw_dataset},\n", + " params={\"min_percent_threshold\": 30},\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The output above shows that [the class imbalance test](https://docs.validmind.ai/tests/data_validation/ClassImbalance.html) did not pass according to the value we set for `min_percent_threshold` — great, this matches what was reported by the model development team.\n", + "\n", + "To address this issue, we'll re-run the test on some processed data. In this case let's apply a very simple rebalancing technique to the dataset:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "raw_copy_df = raw_df.sample(frac=1) # Create a copy of the raw dataset\n", + "\n", + "# Create a balanced dataset with the same number of exited and not exited customers\n", + "exited_df = raw_copy_df.loc[raw_copy_df[\"Exited\"] == 1]\n", + "not_exited_df = raw_copy_df.loc[raw_copy_df[\"Exited\"] == 0].sample(n=exited_df.shape[0])\n", + "\n", + "balanced_raw_df = pd.concat([exited_df, not_exited_df])\n", + "balanced_raw_df = balanced_raw_df.sample(frac=1, random_state=42)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "With this new balanced dataset, you can re-run the individual test to see if it now passes the class imbalance test requirement.\n", + "\n", + "As this is technically a different dataset, **remember to first initialize a new ValidMind `Dataset` object** to pass in as input as required by `run_test()`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Register new data and now 'balanced_raw_dataset' is the new dataset object of interest\n", + "vm_balanced_raw_dataset = vm.init_dataset(\n", + " dataset=balanced_raw_df,\n", + " input_id=\"balanced_raw_dataset\",\n", + " target_column=\"Exited\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Pass the initialized `balanced_raw_dataset` as input into the test run\n", + "result = vm.tests.run_test(\n", + " test_id=\"validmind.data_validation.ClassImbalance\",\n", + " inputs={\"dataset\": vm_balanced_raw_dataset},\n", + " params={\"min_percent_threshold\": 30},\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Remove highly correlated features\n", + "\n", + "Next, let's also remove highly correlated features from our dataset as outlined by the development team. Removing highly correlated features helps make the model simpler, more stable, and easier to understand.\n", + "\n", + "You can utilize the output from a ValidMind test for further use — in this below example, to retrieve the list of features with the highest correlation coefficients and use them to reduce the final list of features for modeling.\n", + "\n", + "First, we'll run [`validmind.data_validation.HighPearsonCorrelation`](https://docs.validmind.ai/tests/data_validation/HighPearsonCorrelation.html) with the `balanced_raw_dataset` we initialized previously as input as is for comparison with later runs:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "corr_result = vm.tests.run_test(\n", + " test_id=\"validmind.data_validation.HighPearsonCorrelation\",\n", + " params={\"max_threshold\": 0.3},\n", + " inputs={\"dataset\": vm_balanced_raw_dataset},\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The output above shows that the test did not pass according to the value we set for `max_threshold` — as reported and expected.\n", + "\n", + "`corr_result` is an object of type `TestResult`. We can inspect the result object to see what the test has produced:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(type(corr_result))\n", + "print(\"Result ID: \", corr_result.result_id)\n", + "print(\"Params: \", corr_result.params)\n", + "print(\"Passed: \", corr_result.passed)\n", + "print(\"Tables: \", corr_result.tables)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's remove the highly correlated features and create a new VM `dataset` object.\n", + "\n", + "We'll begin by checking out the table in the result and extracting a list of features that failed the test:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Extract table from `corr_result.tables`\n", + "features_df = corr_result.tables[0].data\n", + "features_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Extract list of features that failed the test\n", + "high_correlation_features = features_df[features_df[\"Pass/Fail\"] == \"Fail\"][\"Columns\"].tolist()\n", + "high_correlation_features" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, extract the feature names from the list of strings (example: `(Age, Exited)` > `Age`):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "high_correlation_features = [feature.split(\",\")[0].strip(\"()\") for feature in high_correlation_features]\n", + "high_correlation_features" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, it's time to re-initialize the dataset with the highly correlated features removed.\n", + "\n", + "**Note the use of a different `input_id`.** This allows tracking the inputs used when running each individual test." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Remove the highly correlated features from the dataset\n", + "balanced_raw_no_age_df = balanced_raw_df.drop(columns=high_correlation_features)\n", + "\n", + "# Re-initialize the dataset object\n", + "vm_raw_dataset_preprocessed = vm.init_dataset(\n", + " dataset=balanced_raw_no_age_df,\n", + " input_id=\"raw_dataset_preprocessed\",\n", + " target_column=\"Exited\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Re-running the test with the reduced feature set should pass the test:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "corr_result = vm.tests.run_test(\n", + " test_id=\"validmind.data_validation.HighPearsonCorrelation\",\n", + " params={\"max_threshold\": 0.3},\n", + " inputs={\"dataset\": vm_raw_dataset_preprocessed},\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can also plot the correlation matrix to visualize the new correlation between features:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "corr_result = vm.tests.run_test(\n", + " test_id=\"validmind.data_validation.PearsonCorrelationMatrix\",\n", + " inputs={\"dataset\": vm_raw_dataset_preprocessed},\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Documenting test results\n", + "\n", + "Now that we've done some analysis on two different datasets, we can use ValidMind to easily document why certain things were done to our raw data with testing to support it. As we learned above, every test result returned by the `run_test()` function has a `.log()` method that can be used to send the test results to the ValidMind Platform.\n", + "\n", + "When logging validation test results to the platform, you'll need to manually add those results to the desired section of the validation report. To demonstrate how to add test results to your validation report, we'll log our data quality tests and insert the results via the ValidMind Platform." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Configure and run comparison tests\n", + "\n", + "Below, we'll perform comparison tests between the original raw dataset (`raw_dataset`) and the final preprocessed (`raw_dataset_preprocessed`) dataset, again logging the results to the ValidMind Platform. \n", + "\n", + "We can specify all the tests we'd ike to run in a dictionary called `test_config`, and we'll pass in the following arguments for each test:\n", + "\n", + " - **`params`:** Individual test parameters.\n", + " - **`input_grid`:** Individual test inputs to compare. In this case, we'll input our two datasets for comparison.\n", + "\n", + "**Note here that the `input_grid` expects the `input_id` of the dataset as the value rather than the variable name we specified:**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Individual test config with inputs specified\n", + "test_config = {\n", + " \"validmind.data_validation.ClassImbalance\": {\n", + " \"input_grid\": {\"dataset\": [\"raw_dataset\", \"raw_dataset_preprocessed\"]},\n", + " \"params\": {\"min_percent_threshold\": 30}\n", + " },\n", + " \"validmind.data_validation.HighPearsonCorrelation\": {\n", + " \"input_grid\": {\"dataset\": [\"raw_dataset\", \"raw_dataset_preprocessed\"]},\n", + " \"params\": {\"max_threshold\": 0.3}\n", + " },\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Then batch run and log our tests in `test_config`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for t in test_config:\n", + " print(t)\n", + " try:\n", + " # Check if test has input_grid\n", + " if 'input_grid' in test_config[t]:\n", + " # For tests with input_grid, pass the input_grid configuration\n", + " if 'params' in test_config[t]:\n", + " vm.tests.run_test(t, input_grid=test_config[t]['input_grid'], params=test_config[t]['params']).log()\n", + " else:\n", + " vm.tests.run_test(t, input_grid=test_config[t]['input_grid']).log()\n", + " else:\n", + " # Original logic for regular inputs\n", + " if 'params' in test_config[t]:\n", + " vm.tests.run_test(t, inputs=test_config[t]['inputs'], params=test_config[t]['params']).log()\n", + " else:\n", + " vm.tests.run_test(t, inputs=test_config[t]['inputs']).log()\n", + " except Exception as e:\n", + " print(f\"Error running test {t}: {str(e)}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
Note the output returned indicating that a test-driven block doesn't currently exist in your model's documentation for some test IDs. \n", + "

\n", + "That's expected, as when we run validations tests the results logged need to be manually added to your report as part of your compliance assessment process within the ValidMind Platform.
" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Log tests with a unique identifiers\n", + "\n", + "Next, we'll use the previously initialized `vm_balanced_raw_dataset` (that still has a highly correlated `Age` column) as input to run an individual test, then log the result to the ValidMind Platform.\n", + "\n", + "When running individual tests, **you can use a custom `result_id` to tag the individual result with a unique identifier:**\n", + "\n", + "- This `result_id` can be appended to `test_id` with a `:` separator.\n", + "- The `balanced_raw_dataset` result identifier will correspond to the `balanced_raw_dataset` input, the dataset that still has the `Age` column." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "result = vm.tests.run_test(\n", + " test_id=\"validmind.data_validation.HighPearsonCorrelation:balanced_raw_dataset\",\n", + " params={\"max_threshold\": 0.3},\n", + " inputs={\"dataset\": vm_balanced_raw_dataset},\n", + ")\n", + "result.log()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Add test results to reporting\n", + "\n", + "With some test results logged, let's head to the model we connected to at the beginning of this notebook and learn how to insert a test result into our validation report ([Need more help?](https://docs.validmind.ai/guide/model-validation/assess-compliance.html#link-validator-evidence)).\n", + "\n", + "While the example below focuses on a specific test result, you can follow the same general procedure for your other results:\n", + "\n", + "1. From the **Inventory** in the ValidMind Platform, go to the model you connected to earlier.\n", + "\n", + "2. In the left sidebar that appears for your model, click **Validation Report**.\n", + "\n", + "3. Locate the Data Preparation section and click on **2.2.1. Data Quality** to expand that section.\n", + "\n", + "4. Under the Class Imbalance Assessment section, locate Validator Evidence then click **Link Evidence to Report**:\n", + "\n", + " \"Screenshot\n", + "

\n", + "\n", + "5. Select the Class Imbalance test results we logged: **ValidMind Data Validation Class Imbalance** \n", + "\n", + " \"Screenshot\n", + "

\n", + "\n", + "6. Click **Update Linked Evidence** to add the test results to the validation report.\n", + "\n", + " Confirm that the results for the Class Imbalance test you inserted has been correctly inserted into section **2.2.1. Data Quality** of the report:\n", + "\n", + " \"Screenshot\n", + "

\n", + "\n", + "7. Note that these test results are flagged as **Requires Attention** — as they include comparative results from our initial raw dataset.\n", + "\n", + " Click **See evidence details** to review the LLM-generated description that summarizes the test results, that confirm that our final preprocessed dataset actually passes our test:\n", + "\n", + " \"Screenshot\n", + "

\n", + "\n", + "\n", + "
Here in this text editor, you can make qualitative edits to the draft that ValidMind generated to finalize the test results.\n", + "

\n", + "Learn more: Work with content blocks
" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Split the preprocessed dataset\n", + "\n", + "With our raw dataset rebalanced with highly correlated features removed, let's now **spilt our dataset into train and test** in preparation for model evaluation testing.\n", + "\n", + "To start, let's grab the first few rows from the `balanced_raw_no_age_df` dataset we initialized earlier:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "balanced_raw_no_age_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Before training the model, we need to encode the categorical features in the dataset:\n", + "\n", + "- Use the `OneHotEncoder` class from the `sklearn.preprocessing` module to encode the categorical features.\n", + "- The categorical features in the dataset are `Geography` and `Gender`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "balanced_raw_no_age_df = pd.get_dummies(\n", + " balanced_raw_no_age_df, columns=[\"Geography\", \"Gender\"], drop_first=True\n", + ")\n", + "balanced_raw_no_age_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Splitting our dataset into training and testing is essential for proper validation testing, as this helps assess how well the model generalizes to unseen data:\n", + "\n", + "- We start by dividing our `balanced_raw_no_age_df` dataset into training and test subsets using `train_test_split`, with 80% of the data allocated to training (`train_df`) and 20% to testing (`test_df`).\n", + "- From each subset, we separate the features (all columns except \"Exited\") into `X_train` and `X_test`, and the target column (\"Exited\") into `y_train` and `y_test`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "\n", + "train_df, test_df = train_test_split(balanced_raw_no_age_df, test_size=0.20)\n", + "\n", + "X_train = train_df.drop(\"Exited\", axis=1)\n", + "y_train = train_df[\"Exited\"]\n", + "X_test = test_df.drop(\"Exited\", axis=1)\n", + "y_test = test_df[\"Exited\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Initialize the split datasets\n", + "\n", + "Next, let's initialize the training and testing datasets so they are available for use:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vm_train_ds = vm.init_dataset(\n", + " input_id=\"train_dataset_final\",\n", + " dataset=train_df,\n", + " target_column=\"Exited\",\n", + ")\n", + "\n", + "vm_test_ds = vm.init_dataset(\n", + " input_id=\"test_dataset_final\",\n", + " dataset=test_df,\n", + " target_column=\"Exited\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "## In summary\n", + "\n", + "In this second notebook, you learned how to:\n", + "\n", + "- [x] Import a sample dataset\n", + "- [x] Identify which tests you might want to run with ValidMind\n", + "- [x] Initialize ValidMind datasets\n", + "- [x] Run individual tests\n", + "- [x] Utilize the output from tests you’ve run\n", + "- [x] Log test results as evidence to the ValidMind Platform\n", + "- [x] Insert test results into your validation report" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Next steps\n", + "\n", + "\n", + "\n", + "### Develop potential challenger models\n", + "\n", + "Now that you're familiar with the basics of using the ValidMind Library, let's use it to develop a challenger model: **[3 — Developing a potential challenger model](3-developing_challenger_model.ipynb)**" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "ValidMind Library", + "language": "python", + "name": "validmind" + }, + "language_info": { + "name": "python", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/tutorials/model_validation/3-developing_challenger_model.ipynb b/notebooks/tutorials/model_validation/3-developing_challenger_model.ipynb new file mode 100644 index 000000000..b0d226012 --- /dev/null +++ b/notebooks/tutorials/model_validation/3-developing_challenger_model.ipynb @@ -0,0 +1,871 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# ValidMind for model validation 3 — Developing a potential challenger model\n", + "\n", + "Learn how to use ValidMind for your end-to-end model validation process with our series of four introductory notebooks. In this third notebook, develop a potential challenger model and then pass your model and its predictions to ValidMind.\n", + "\n", + "A *challenger model* is an alternate model that attempt to outperform the champion model, ensuring that the best performing fit-for-purpose model is always considered for deployment. Challenger models also help avoid over-reliance on a single model, and allow testing of new features, algorithms, or data sources without disrupting the production lifecycle." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "::: {.content-hidden when-format=\"html\"}\n", + "## Contents \n", + "- [Prerequisites](#toc1_) \n", + "- [Setting up](#toc2_) \n", + " - [Initialize the ValidMind Library](#toc2_1_) \n", + " - [Import the sample dataset](#toc2_2_) \n", + " - [Preprocess the dataset](#toc2_2_1_) \n", + " - [Split the preprocessed dataset](#toc2_3_) \n", + "- [Import the champion model](#toc3_) \n", + "- [Training a potential challenger model](#toc4_) \n", + " - [Random forest classification model](#toc4_1_) \n", + "- [Initializing the model objects](#toc5_) \n", + " - [Initialize the model objects](#toc5_1_) \n", + " - [Assign predictions](#toc5_2_) \n", + "- [Running model validation tests](#toc6_) \n", + " - [Run model performance tests](#toc6_1_) \n", + " - [Evaluate performance of the champion model](#toc6_1_1_) \n", + " - [Log a model finding](#toc6_1_2_) \n", + " - [Evaluate performance of challenger model](#toc6_1_3_) \n", + " - [Run diagnostic tests](#toc6_2_) \n", + " - [Run feature importance tests](#toc6_3_) \n", + "- [In summary](#toc7_) \n", + "- [Next steps](#toc8_) \n", + " - [Finalize validation and reporting](#toc8_1_) \n", + "\n", + ":::\n", + "\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Prerequisites\n", + "\n", + "In order to develop potential challenger models with this notebook, you'll need to first have:\n", + "\n", + "- [x] Registered a model within the ValidMind Platform and granted yourself access to the model as a validator\n", + "- [x] Installed the ValidMind Library in your local environment, allowing you to access all its features\n", + "- [x] Learned how to import and initialize datasets for use with ValidMind\n", + "- [x] Understood the basics of how to run and log tests with ValidMind\n", + "- [x] Run data quality tests on the datasets used to train the champion model, and logged the results of those tests to ValidMind\n", + "- [x] Inserted your logged test results into your validation report\n", + "\n", + "
Need help with the above steps?\n", + "

\n", + "Refer to the first two notebooks in this series:\n", + "\n", + "- 1 — Set up the ValidMind Library for validation\n", + "- 2 — Start the model validation process\n", + "\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Setting up\n", + "\n", + "This section should be quite familiar to you — as we performed the same actions in the previous notebook, **[2 — Start the model validation process](2-start_validation_process.ipynb)**." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Initialize the ValidMind Library\n", + "\n", + "As usual, let's first connect up the ValidMind Library to our model we previously registered in the ValidMind Platform:\n", + "\n", + "1. In a browser, [log in to ValidMind](https://docs.validmind.ai/guide/configuration/log-in-to-validmind.html).\n", + "\n", + "2. In the left sidebar, navigate to **Inventory** and select the model you registered for this \"ValidMind for model validation\" series of notebooks.\n", + "\n", + "3. Go to **Getting Started** and click **Copy snippet to clipboard**.\n", + "\n", + "Next, [load your model identifier credentials from an `.env` file](https://docs.validmind.ai/developer/model-documentation/store-credentials-in-env-file.html) or replace the placeholder with your own code snippet:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Make sure the ValidMind Library is installed\n", + "\n", + "%pip install -q validmind\n", + "\n", + "# Load your model identifier credentials from an `.env` file\n", + "\n", + "%load_ext dotenv\n", + "%dotenv .env\n", + "\n", + "# Or replace with your code snippet\n", + "\n", + "import validmind as vm\n", + "\n", + "vm.init(\n", + " # api_host=\"...\",\n", + " # api_key=\"...\",\n", + " # api_secret=\"...\",\n", + " # model=\"...\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Import the sample dataset\n", + "\n", + "Next, we'll load in the sample [Bank Customer Churn Prediction](https://www.kaggle.com/datasets/shantanudhakadd/bank-customer-churn-prediction) dataset used to develop the champion model that we will independently preprocess:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Load the sample dataset\n", + "from validmind.datasets.classification import customer_churn as demo_dataset\n", + "\n", + "print(\n", + " f\"Loaded demo dataset with: \\n\\n\\t• Target column: '{demo_dataset.target_column}' \\n\\t• Class labels: {demo_dataset.class_labels}\"\n", + ")\n", + "\n", + "raw_df = demo_dataset.load_data()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "#### Preprocess the dataset\n", + "\n", + "We’ll apply a simple rebalancing technique to the dataset before continuing:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "raw_copy_df = raw_df.sample(frac=1) # Create a copy of the raw dataset\n", + "\n", + "# Create a balanced dataset with the same number of exited and not exited customers\n", + "exited_df = raw_copy_df.loc[raw_copy_df[\"Exited\"] == 1]\n", + "not_exited_df = raw_copy_df.loc[raw_copy_df[\"Exited\"] == 0].sample(n=exited_df.shape[0])\n", + "\n", + "balanced_raw_df = pd.concat([exited_df, not_exited_df])\n", + "balanced_raw_df = balanced_raw_df.sample(frac=1, random_state=42)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let’s also quickly remove highly correlated features from the dataset using the output from a ValidMind test.\n", + "\n", + "As you know, before we can run tests you’ll need to initialize a ValidMind dataset object with the [`init_dataset` function](https://docs.validmind.ai/validmind/validmind.html#init_dataset):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Register new data and now 'balanced_raw_dataset' is the new dataset object of interest\n", + "vm_balanced_raw_dataset = vm.init_dataset(\n", + " dataset=balanced_raw_df,\n", + " input_id=\"balanced_raw_dataset\",\n", + " target_column=\"Exited\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "With our balanced dataset initialized, we can then run our test and utilize the output to help us identify the features we want to remove:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Run HighPearsonCorrelation test with our balanced dataset as input and return a result object\n", + "corr_result = vm.tests.run_test(\n", + " test_id=\"validmind.data_validation.HighPearsonCorrelation\",\n", + " params={\"max_threshold\": 0.3},\n", + " inputs={\"dataset\": vm_balanced_raw_dataset},\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# From result object, extract table from `corr_result.tables`\n", + "features_df = corr_result.tables[0].data\n", + "features_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Extract list of features that failed the test\n", + "high_correlation_features = features_df[features_df[\"Pass/Fail\"] == \"Fail\"][\"Columns\"].tolist()\n", + "high_correlation_features" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Extract feature names from the list of strings\n", + "high_correlation_features = [feature.split(\",\")[0].strip(\"()\") for feature in high_correlation_features]\n", + "high_correlation_features" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can then re-initialize the dataset with a different `input_id` and the highly correlated features removed and re-run the test for confirmation:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Remove the highly correlated features from the dataset\n", + "balanced_raw_no_age_df = balanced_raw_df.drop(columns=high_correlation_features)\n", + "\n", + "# Re-initialize the dataset object\n", + "vm_raw_dataset_preprocessed = vm.init_dataset(\n", + " dataset=balanced_raw_no_age_df,\n", + " input_id=\"raw_dataset_preprocessed\",\n", + " target_column=\"Exited\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Re-run the test with the reduced feature set\n", + "corr_result = vm.tests.run_test(\n", + " test_id=\"validmind.data_validation.HighPearsonCorrelation\",\n", + " params={\"max_threshold\": 0.3},\n", + " inputs={\"dataset\": vm_raw_dataset_preprocessed},\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Split the preprocessed dataset\n", + "\n", + "With our raw dataset rebalanced with highly correlated features removed, let's now **spilt our dataset into train and test** in preparation for model evaluation testing:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Encode categorical features in the dataset\n", + "balanced_raw_no_age_df = pd.get_dummies(\n", + " balanced_raw_no_age_df, columns=[\"Geography\", \"Gender\"], drop_first=True\n", + ")\n", + "balanced_raw_no_age_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "\n", + "# Split the dataset into train and test\n", + "train_df, test_df = train_test_split(balanced_raw_no_age_df, test_size=0.20)\n", + "\n", + "X_train = train_df.drop(\"Exited\", axis=1)\n", + "y_train = train_df[\"Exited\"]\n", + "X_test = test_df.drop(\"Exited\", axis=1)\n", + "y_test = test_df[\"Exited\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize the split datasets\n", + "vm_train_ds = vm.init_dataset(\n", + " input_id=\"train_dataset_final\",\n", + " dataset=train_df,\n", + " target_column=\"Exited\",\n", + ")\n", + "\n", + "vm_test_ds = vm.init_dataset(\n", + " input_id=\"test_dataset_final\",\n", + " dataset=test_df,\n", + " target_column=\"Exited\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Import the champion model\n", + "\n", + "With our raw dataset assessed and preprocessed, let's go ahead and import the champion model submitted by the model development team in the format of a `.pkl` file: **[lr_model_champion.pkl](lr_model_champion.pkl)**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Import the champion model\n", + "import pickle as pkl\n", + "\n", + "with open(\"lr_model_champion.pkl\", \"rb\") as f:\n", + " log_reg = pkl.load(f)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Training a potential challenger model\n", + "\n", + "We're curious how an alternate model compares to our champion model, so let's train a challenger model as a basis for our testing.\n", + "\n", + "Our champion *logistic regression model* is a simpler, parametric model that assumes a linear relationship between the independent variables and the log-odds of the outcome. While logistic regression may not capture complex patterns as effectively, it offers a high degree of interpretability and is easier to explain to stakeholders. However, model risk is not calculated in isolation from a single factor, but rather in consideration with trade-offs in predictive performance, ease of interpretability, and overall alignment with business objectives." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Random forest classification model\n", + "\n", + "A *random forest classification model* is an ensemble machine learning algorithm that uses multiple decision trees to classify data. In ensemble learning, multiple models are combined to improve prediction accuracy and robustness.\n", + "\n", + "Random forest classification models generally have higher accuracy because they capture complex, non-linear relationships, but as a result they lack transparency in their predictions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Import the Random Forest Classification model\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "\n", + "# Create the model instance with 50 decision trees\n", + "rf_model = RandomForestClassifier(\n", + " n_estimators=50,\n", + " random_state=42,\n", + ")\n", + "\n", + "# Train the model\n", + "rf_model.fit(X_train, y_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Initializing the model objects" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Initialize the model objects\n", + "\n", + "In addition to the initialized datasets, you'll also need to initialize a ValidMind model object (`vm_model`) that can be passed to other functions for analysis and tests on the data for each of our two models.\n", + "\n", + "You simply initialize this model object with [`vm.init_model()`](https://docs.validmind.ai/validmind/validmind.html#init_model):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize the champion logistic regression model\n", + "vm_log_model = vm.init_model(\n", + " log_reg,\n", + " input_id=\"log_model_champion\",\n", + ")\n", + "\n", + "# Initialize the challenger random forest classification model\n", + "vm_rf_model = vm.init_model(\n", + " rf_model,\n", + " input_id=\"rf_model\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Assign predictions\n", + "\n", + "With our models registered, we'll move on to assigning both the predictive probabilities coming directly from each model's predictions, and the binary prediction after applying the cutoff threshold described in the Compute binary predictions step above.\n", + "\n", + "- The [`assign_predictions()` method](https://docs.validmind.ai/validmind/validmind/vm_models.html#assign_predictions) from the `Dataset` object can link existing predictions to any number of models.\n", + "- This method links the model's class prediction values and probabilities to our `vm_train_ds` and `vm_test_ds` datasets.\n", + "\n", + "If no prediction values are passed, the method will compute predictions automatically:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Champion — Logistic regression model\n", + "vm_train_ds.assign_predictions(model=vm_log_model)\n", + "vm_test_ds.assign_predictions(model=vm_log_model)\n", + "\n", + "# Challenger — Random forest classification model\n", + "vm_train_ds.assign_predictions(model=vm_rf_model)\n", + "vm_test_ds.assign_predictions(model=vm_rf_model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Running model validation tests\n", + "\n", + "With everything ready for us, let's run the rest of our validation tests. We'll focus on comprehensive testing around model performance of both the champion and challenger models going forward as we've already verified the data quality of the datasets used to train the champion model." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Run model performance tests\n", + "\n", + "Let's run some performance tests, beginning with independent testing of our champion logistic regression model, then moving on to our potential challenger model.\n", + "\n", + "Use [`vm.tests.list_tests()`](https://docs.validmind.ai/validmind/validmind/tests.html#list_tests) to identify all the model performance tests for classification:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "vm.tests.list_tests(tags=[\"model_performance\"], task=\"classification\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We'll isolate the specific tests we want to run in `mpt`:\n", + "\n", + "- [`ClassifierPerformance`](https://docs.validmind.ai/tests/model_validation/sklearn/ClassifierPerformance.html)\n", + "- [`ConfusionMatrix`](https://docs.validmind.ai/tests/model_validation/sklearn/ConfusionMatrix.html)\n", + "- [`MinimumAccuracy`](https://docs.validmind.ai/tests/model_validation/sklearn/MinimumAccuracy.html)\n", + "- [`MinimumF1Score`](https://docs.validmind.ai/tests/model_validation/sklearn/MinimumF1Score.html)\n", + "- [`ROCCurve`](https://docs.validmind.ai/tests/model_validation/sklearn/ROCCurve.html)\n", + "\n", + "As we learned in the previous notebook [2 — Start the model validation process](2-start_validation_process.ipynb), you can use a custom `result_id` to tag the individual result with a unique identifier by appending this `result_id` to the `test_id` with a `:` separator. We'll append an identifier for our champion model here:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mpt = [\n", + " \"validmind.model_validation.sklearn.ClassifierPerformance:logreg_champion\",\n", + " \"validmind.model_validation.sklearn.ConfusionMatrix:logreg_champion\",\n", + " \"validmind.model_validation.sklearn.MinimumAccuracy:logreg_champion\",\n", + " \"validmind.model_validation.sklearn.MinimumF1Score:logreg_champion\",\n", + " \"validmind.model_validation.sklearn.ROCCurve:logreg_champion\"\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "#### Evaluate performance of the champion model\n", + "\n", + "Now, let's run and log our batch of model performance tests using our testing dataset (`vm_test_ds`) for our champion model:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for test in mpt:\n", + " vm.tests.run_test(\n", + " test,\n", + " inputs={\n", + " \"dataset\": vm_test_ds, \"model\" : vm_log_model,\n", + " },\n", + " ).log()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
Note the output returned indicating that a test-driven block doesn't currently exist in your model's documentation for some test IDs. \n", + "

\n", + "That's expected, as when we run validations tests the results logged need to be manually added to your report as part of your compliance assessment process within the ValidMind Platform.
" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "#### Log a model finding\n", + "\n", + "As we can observe from the output above, our champion model doesn't pass the `MinimumAccuracy` based on the default thresholds of the out-of-the-box test, so let's log a model finding in the ValidMind Platform ([Need more help?](https://docs.validmind.ai/guide/model-validation/add-manage-model-findings.html)):\n", + "\n", + "1. From the **Inventory** in the ValidMind Platform, go to the model you connected to earlier.\n", + "\n", + "2. In the left sidebar that appears for your model, click **Validation Report**.\n", + "\n", + "3. Locate the Data Preparation section and click on **2.2.2. Model Performance** to expand that section.\n", + "\n", + "4. Under the Model Performance Metrics section, locate Findings then click **Link Finding to Report**:\n", + "\n", + " \"Screenshot\n", + "

\n", + "\n", + "5. Click **+ Create New Finding** to add a finding.\n", + "\n", + "6. Enter in the details for your finding, for example:\n", + "\n", + " - **TITLE** — Champion Logistic Regression Model Fails Minimum Accuracy Threshold\n", + " - **RISK AREA** — Model Performance\n", + " - **DOCUMENTATION SECTION** — 3.2. Model Evaluation\n", + " - **DESCRIPTION** — The logistic regression champion model was subjected to a Minimum Accuracy test to determine whether its predictive accuracy meets the predefined performance threshold of 0.7. The model achieved an accuracy score of 0.6136, which falls below the required minimum. As a result, the test produced a Fail outcome.\n", + "\n", + "7. Click **Save**.\n", + "\n", + "8. Select the finding you just added to link to your validation report:\n", + "\n", + " \"Screenshot\n", + "

\n", + "\n", + "9. Click **Update Linked Findings** to insert your finding.\n", + "\n", + "10. Confirm that finding you inserted has been correctly inserted into section **2.2.2. Model Performance** of the report:\n", + "\n", + " \"Screenshot\n", + "

\n", + "\n", + "11. Click on the finding to expand the finding, where you can adjust details such as severity, owner, due date, status, etc. as well as include proposed remediation plans or supporting documentation as attachments." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "#### Evaluate performance of challenger model\n", + "\n", + "We've now conducted similar tests as the model development team for our champion model, with the aim of verifying their test results.\n", + "\n", + "Next, let's see how our challenger models compare. We'll use the same batch of tests here as we did in `mpt`, but append a different `result_id` to indicate that these results should be associated with our challenger model:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mpt_chall = [\n", + " \"validmind.model_validation.sklearn.ClassifierPerformance:champion_vs_challenger\",\n", + " \"validmind.model_validation.sklearn.ConfusionMatrix:champion_vs_challenger\",\n", + " \"validmind.model_validation.sklearn.MinimumAccuracy:champion_vs_challenger\",\n", + " \"validmind.model_validation.sklearn.MinimumF1Score:champion_vs_challenger\",\n", + " \"validmind.model_validation.sklearn.ROCCurve:champion_vs_challenger\"\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We'll run each test once for each model with the same `vm_test_ds` dataset to compare them:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for test in mpt_chall:\n", + " vm.tests.run_test(\n", + " test,\n", + " input_grid={\n", + " \"dataset\": [vm_test_ds], \"model\" : [vm_log_model,vm_rf_model]\n", + " }\n", + " ).log()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
Based on the performance metrics, our challenger random forest classification model passes the MinimumAccuracy where our champion did not.\n", + "

\n", + "In your validation report, support your recommendation in your finding's Proposed Remediation Plan to investigate the usage of our challenger model by inserting the performance tests we logged with this notebook into the appropriate section.
" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Run diagnostic tests\n", + "\n", + "Next we want to inspect the robustness and stability testing comparison between our champion and challenger model.\n", + "\n", + "Use `list_tests()` to identify all the model diagnosis tests for classification:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vm.tests.list_tests(tags=[\"model_diagnosis\"], task=\"classification\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's see if models suffer from any *overfit* potentials and also where there are potential sub-segments of issues with the [`OverfitDiagnosis` test](https://docs.validmind.ai/tests/model_validation/sklearn/OverfitDiagnosis.html). \n", + "\n", + "Overfitting occurs when a model learns the training data too well, capturing not only the true pattern but noise and random fluctuations resulting in excellent performance on the training dataset but poor generalization to new, unseen data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vm.tests.run_test(\n", + " test_id=\"validmind.model_validation.sklearn.OverfitDiagnosis:champion_vs_challenger\",\n", + " input_grid={\n", + " \"datasets\": [[vm_train_ds,vm_test_ds]],\n", + " \"model\" : [vm_log_model,vm_rf_model]\n", + " }\n", + ").log()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's also conduct *robustness* and *stability* testing of the two models with the [`RobustnessDiagnosis` test](https://docs.validmind.ai/tests/model_validation/sklearn/RobustnessDiagnosis.html).\n", + "\n", + "Robustness refers to a model's ability to maintain consistent performance, and stability refers to a model's ability to produce consistent outputs over time across different data subsets." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vm.tests.run_test(\n", + " test_id=\"validmind.model_validation.sklearn.RobustnessDiagnosis:Champion_vs_LogRegression\",\n", + " input_grid={\n", + " \"datasets\": [[vm_train_ds,vm_test_ds]],\n", + " \"model\" : [vm_log_model,vm_rf_model]\n", + " },\n", + ").log()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Run feature importance tests\n", + "\n", + "We also want to verify the relative influence of different input features on our models' predictions, as well as inspect the differences between our champion and challenger model to see if a certain model offers more understandable or logical importance scores for features.\n", + "\n", + "Use `list_tests()` to identify all the feature importance tests for classification:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Store the feature importance tests\n", + "FI = vm.tests.list_tests(tags=[\"feature_importance\"], task=\"classification\",pretty=False)\n", + "FI" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Run and log our feature importance tests for both models for the testing dataset\n", + "for test in FI:\n", + " vm.tests.run_test(\n", + " \"\".join((test,':champion_vs_challenger')),\n", + " input_grid={\n", + " \"dataset\": [vm_test_ds], \"model\" : [vm_log_model,vm_rf_model]\n", + " },\n", + " ).log()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "## In summary\n", + "\n", + "In this third notebook, you learned how to:\n", + "\n", + "- [x] Initialize ValidMind model objects\n", + "- [x] Assign predictions and probabilities to your ValidMind model objects\n", + "- [x] Use tests from ValidMind to evaluate the potential of models, including comparative tests between champion and challenger models\n", + "- [x] Log a model finding in the ValidMind Platform" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Next steps\n", + "\n", + "\n", + "\n", + "### Finalize validation and reporting\n", + "\n", + "Now that you're familiar with the basics of using the ValidMind Library to run and log validation tests, let's learn how to implement some custom tests and wrap up our validation: **[4 — Finalize validation and reporting](4-finalize_validation_reporting.ipynb)**" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "ValidMind Library", + "language": "python", + "name": "validmind" + }, + "language_info": { + "name": "python", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/tutorials/model_validation/4-finalize_validation_reporting.ipynb b/notebooks/tutorials/model_validation/4-finalize_validation_reporting.ipynb new file mode 100644 index 000000000..1e5561c51 --- /dev/null +++ b/notebooks/tutorials/model_validation/4-finalize_validation_reporting.ipynb @@ -0,0 +1,1207 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# ValidMind for model validation 4 — Finalize testing and reporting\n", + "\n", + "Learn how to use ValidMind for your end-to-end model validation process with our series of four introductory notebooks. In this last notebook, finalize the compliance assessment process and have a complete validation report ready for review.\n", + "\n", + "This notebook will walk you through how to supplement ValidMind tests with your own custom tests and include them as additional evidence in your validation report. A custom test is any function that takes a set of inputs and parameters as arguments and returns one or more outputs:\n", + "\n", + "- The function can be as simple or as complex as you need it to be — it can use external libraries, make API calls, or do anything else that you can do in Python.\n", + "- The only requirement is that the function signature and return values can be \"understood\" and handled by the ValidMind Library. As such, custom tests offer added flexibility by extending the default tests provided by ValidMind, enabling you to document any type of model or use case.\n", + "\n", + "**For a more in-depth introduction to custom tests,** refer to our [Implement custom tests](../../code_samples/custom_tests/implement_custom_tests.ipynb) notebook." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "::: {.content-hidden when-format=\"html\"}\n", + "## Contents \n", + "- [Prerequisites](#toc1_) \n", + "- [Setting up](#toc2_) \n", + " - [Initialize the ValidMind Library](#toc2_1_) \n", + " - [Import the sample dataset](#toc2_2_) \n", + " - [Split the preprocessed dataset](#toc2_3_) \n", + " - [Import the champion model](#toc2_4_) \n", + " - [Train potential challenger model](#toc2_5_) \n", + " - [Initialize the model objects](#toc2_6_) \n", + "- [Implementing custom tests](#toc3_) \n", + " - [Implement a custom inline test](#toc3_1_) \n", + " - [Create a confusion matrix plot](#toc3_1_1_) \n", + " - [Add parameters to custom tests](#toc3_1_2_) \n", + " - [Pass parameters to custom tests](#toc3_1_3_) \n", + " - [Use external test providers](#toc3_2_) \n", + " - [Create custom tests folder](#toc3_2_1_) \n", + " - [Save an inline test](#toc3_2_2_) \n", + " - [Register a local test provider](#toc3_2_3_) \n", + "- [Verify test runs](#toc4_) \n", + "- [In summary](#toc5_) \n", + "- [Next steps](#toc6_) \n", + " - [Work with your validation report](#toc6_1_) \n", + " - [Learn more](#toc6_2_) \n", + " - [More how-to guides and code samples](#toc6_2_1_) \n", + " - [Discover more learning resources](#toc6_2_2_) \n", + "\n", + ":::\n", + "\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Prerequisites\n", + "\n", + "In order to finalize validation and reporting, you'll need to first have:\n", + "\n", + "- [x] Registered a model within the ValidMind Platform and granted yourself access to the model as a validator\n", + "- [x] Installed the ValidMind Library in your local environment, allowing you to access all its features\n", + "- [x] Learned how to import and initialize datasets and models for use with ValidMind\n", + "- [x] Understood the basics of how to identify and run validation tests\n", + "- [x] Run validation tests for your champion and challenger models, and logged the results of those tests to the ValidMind Platform\n", + "- [x] Inserted your logged test results into your validation report\n", + "- [x] Added some preliminary findings to your validation report\n", + "\n", + "
Need help with the above steps?\n", + "

\n", + "Refer to the first three notebooks in this series:\n", + "\n", + "- 1 — Set up the ValidMind Library for validation\n", + "- 2 — Start the model validation process\n", + "- 2 — Developing a potential challenger model\n", + "\n", + "
\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Setting up\n", + "\n", + "This section should be very familiar to you now — as we performed the same actions in the previous two notebooks in this series." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Initialize the ValidMind Library\n", + "\n", + "As usual, let's first connect up the ValidMind Library to our model we previously registered in the ValidMind Platform:\n", + "\n", + "1. In a browser, [log in to ValidMind](https://docs.validmind.ai/guide/configuration/log-in-to-validmind.html).\n", + "\n", + "2. In the left sidebar, navigate to **Inventory** and select the model you registered for this \"ValidMind for model validation\" series of notebooks.\n", + "\n", + "3. Go to **Getting Started** and click **Copy snippet to clipboard**.\n", + "\n", + "Next, [load your model identifier credentials from an `.env` file](https://docs.validmind.ai/developer/model-documentation/store-credentials-in-env-file.html) or replace the placeholder with your own code snippet:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Make sure the ValidMind Library is installed\n", + "\n", + "%pip install -q validmind\n", + "\n", + "# Load your model identifier credentials from an `.env` file\n", + "\n", + "%load_ext dotenv\n", + "%dotenv .env\n", + "\n", + "# Or replace with your code snippet\n", + "\n", + "import validmind as vm\n", + "\n", + "vm.init(\n", + " # api_host=\"...\",\n", + " # api_key=\"...\",\n", + " # api_secret=\"...\",\n", + " # model=\"...\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Import the sample dataset\n", + "\n", + "Next, we'll load in the same sample [Bank Customer Churn Prediction](https://www.kaggle.com/datasets/shantanudhakadd/bank-customer-churn-prediction) dataset used to develop the champion model that we will independently preprocess:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Load the sample dataset\n", + "from validmind.datasets.classification import customer_churn as demo_dataset\n", + "\n", + "print(\n", + " f\"Loaded demo dataset with: \\n\\n\\t• Target column: '{demo_dataset.target_column}' \\n\\t• Class labels: {demo_dataset.class_labels}\"\n", + ")\n", + "\n", + "raw_df = demo_dataset.load_data()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize the raw dataset for use in ValidMind tests\n", + "vm_raw_dataset = vm.init_dataset(\n", + " dataset=raw_df,\n", + " input_id=\"raw_dataset\",\n", + " target_column=\"Exited\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "raw_copy_df = raw_df.sample(frac=1) # Create a copy of the raw dataset\n", + "\n", + "# Create a balanced dataset with the same number of exited and not exited customers\n", + "exited_df = raw_copy_df.loc[raw_copy_df[\"Exited\"] == 1]\n", + "not_exited_df = raw_copy_df.loc[raw_copy_df[\"Exited\"] == 0].sample(n=exited_df.shape[0])\n", + "\n", + "balanced_raw_df = pd.concat([exited_df, not_exited_df])\n", + "balanced_raw_df = balanced_raw_df.sample(frac=1, random_state=42)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let’s also quickly remove highly correlated features from the dataset using the output from a ValidMind test:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Register new data and now 'balanced_raw_dataset' is the new dataset object of interest\n", + "vm_balanced_raw_dataset = vm.init_dataset(\n", + " dataset=balanced_raw_df,\n", + " input_id=\"balanced_raw_dataset\",\n", + " target_column=\"Exited\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Run HighPearsonCorrelation test with our balanced dataset as input and return a result object\n", + "corr_result = vm.tests.run_test(\n", + " test_id=\"validmind.data_validation.HighPearsonCorrelation\",\n", + " params={\"max_threshold\": 0.3},\n", + " inputs={\"dataset\": vm_balanced_raw_dataset},\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# From result object, extract table from `corr_result.tables`\n", + "features_df = corr_result.tables[0].data\n", + "features_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Extract list of features that failed the test\n", + "high_correlation_features = features_df[features_df[\"Pass/Fail\"] == \"Fail\"][\"Columns\"].tolist()\n", + "high_correlation_features" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Extract feature names from the list of strings\n", + "high_correlation_features = [feature.split(\",\")[0].strip(\"()\") for feature in high_correlation_features]\n", + "high_correlation_features" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Remove the highly correlated features from the dataset\n", + "balanced_raw_no_age_df = balanced_raw_df.drop(columns=high_correlation_features)\n", + "\n", + "# Re-initialize the dataset object\n", + "vm_raw_dataset_preprocessed = vm.init_dataset(\n", + " dataset=balanced_raw_no_age_df,\n", + " input_id=\"raw_dataset_preprocessed\",\n", + " target_column=\"Exited\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Re-run the test with the reduced feature set\n", + "corr_result = vm.tests.run_test(\n", + " test_id=\"validmind.data_validation.HighPearsonCorrelation\",\n", + " params={\"max_threshold\": 0.3},\n", + " inputs={\"dataset\": vm_raw_dataset_preprocessed},\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Split the preprocessed dataset\n", + "\n", + "With our raw dataset rebalanced with highly correlated features removed, let's now **spilt our dataset into train and test** in preparation for model evaluation testing:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Encode categorical features in the dataset\n", + "balanced_raw_no_age_df = pd.get_dummies(\n", + " balanced_raw_no_age_df, columns=[\"Geography\", \"Gender\"], drop_first=True\n", + ")\n", + "balanced_raw_no_age_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "\n", + "# Split the dataset into train and test\n", + "train_df, test_df = train_test_split(balanced_raw_no_age_df, test_size=0.20)\n", + "\n", + "X_train = train_df.drop(\"Exited\", axis=1)\n", + "y_train = train_df[\"Exited\"]\n", + "X_test = test_df.drop(\"Exited\", axis=1)\n", + "y_test = test_df[\"Exited\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize the split datasets\n", + "vm_train_ds = vm.init_dataset(\n", + " input_id=\"train_dataset_final\",\n", + " dataset=train_df,\n", + " target_column=\"Exited\",\n", + ")\n", + "\n", + "vm_test_ds = vm.init_dataset(\n", + " input_id=\"test_dataset_final\",\n", + " dataset=test_df,\n", + " target_column=\"Exited\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Import the champion model\n", + "\n", + "With our raw dataset assessed and preprocessed, let's go ahead and import the champion model submitted by the model development team in the format of a `.pkl` file: **[lr_model_champion.pkl](lr_model_champion.pkl)**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Import the champion model\n", + "import pickle as pkl\n", + "\n", + "with open(\"lr_model_champion.pkl\", \"rb\") as f:\n", + " log_reg = pkl.load(f)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Train potential challenger model\n", + "\n", + "We'll also train our random forest classification challenger model to see how it compares:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Import the Random Forest Classification model\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "\n", + "# Create the model instance with 50 decision trees\n", + "rf_model = RandomForestClassifier(\n", + " n_estimators=50,\n", + " random_state=42,\n", + ")\n", + "\n", + "# Train the model\n", + "rf_model.fit(X_train, y_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Initialize the model objects\n", + "\n", + "In addition to the initialized datasets, you'll also need to initialize a ValidMind model object (`vm_model`) that can be passed to other functions for analysis and tests on the data for each of our two models:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize the champion logistic regression model\n", + "vm_log_model = vm.init_model(\n", + " log_reg,\n", + " input_id=\"log_model_champion\",\n", + ")\n", + "\n", + "# Initialize the challenger random forest classification model\n", + "vm_rf_model = vm.init_model(\n", + " rf_model,\n", + " input_id=\"rf_model\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Assign predictions to Champion — Logistic regression model\n", + "vm_train_ds.assign_predictions(model=vm_log_model)\n", + "vm_test_ds.assign_predictions(model=vm_log_model)\n", + "\n", + "# Assign predictions to Challenger — Random forest classification model\n", + "vm_train_ds.assign_predictions(model=vm_rf_model)\n", + "vm_test_ds.assign_predictions(model=vm_rf_model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Implementing custom tests\n", + "\n", + "Thanks to the model documentation ([Learn more ...](https://docs.validmind.ai/developer/validmind-library.html#for-model-development)), we know that the model development team implemented a custom test to further evaluate the performance of the champion model.\n", + "\n", + "In a usual model validation situation, you would load a saved custom test provided by the model development team. In the following section, we'll have you implement the same custom test and make it available for reuse, to familiarize you with the processes.\n", + "\n", + "
Want to learn more about custom tests?\n", + "

\n", + "Refer to our in-depth introduction to custom tests: Implement custom tests
" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Implement a custom inline test\n", + "\n", + "Let's implement the same custom *inline test* that calculates the confusion matrix for a binary classification model that the model development team used in their performance evaluations.\n", + "\n", + "- An inline test refers to a test written and executed within the same environment as the code being tested — in this case, right in this Jupyter Notebook — without requiring a separate test file or framework.\n", + "- You'll note that the custom test function is just a regular Python function that can include and require any Python library as you see fit." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "#### Create a confusion matrix plot\n", + "\n", + "Let's first create a confusion matrix plot using the `confusion_matrix` function from the `sklearn.metrics` module:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "from sklearn import metrics\n", + "\n", + "# Get the predicted classes\n", + "y_pred = log_reg.predict(vm_test_ds.x)\n", + "\n", + "confusion_matrix = metrics.confusion_matrix(y_test, y_pred)\n", + "\n", + "cm_display = metrics.ConfusionMatrixDisplay(\n", + " confusion_matrix=confusion_matrix, display_labels=[False, True]\n", + ")\n", + "cm_display.plot()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, create a [`@vm.test` wrapper](https://docs.validmind.ai/validmind/validmind.html#test) that will allow you to create a reusable test. **Note the following changes in the code below:**\n", + "\n", + "- The function `confusion_matrix` takes two arguments `dataset` and `model`. This is a `VMDataset` and `VMModel` object respectively.\n", + " - `VMDataset` objects allow you to access the dataset's true (target) values by accessing the `.y` attribute.\n", + " - `VMDataset` objects allow you to access the predictions for a given model by accessing the `.y_pred()` method.\n", + "- The function docstring provides a description of what the test does. This will be displayed along with the result in this notebook as well as in the ValidMind Platform.\n", + "- The function body calculates the confusion matrix using the `sklearn.metrics.confusion_matrix` function as we just did above.\n", + "- The function then returns the `ConfusionMatrixDisplay.figure_` object — this is important as the ValidMind Library expects the output of the custom test to be a plot or a table.\n", + "- The `@vm.test` decorator is doing the work of creating a wrapper around the function that will allow it to be run by the ValidMind Library. It also registers the test so it can be found by the ID `my_custom_tests.ConfusionMatrix`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "@vm.test(\"my_custom_tests.ConfusionMatrix\")\n", + "def confusion_matrix(dataset, model):\n", + " \"\"\"The confusion matrix is a table that is often used to describe the performance of a classification model on a set of data for which the true values are known.\n", + "\n", + " The confusion matrix is a 2x2 table that contains 4 values:\n", + "\n", + " - True Positive (TP): the number of correct positive predictions\n", + " - True Negative (TN): the number of correct negative predictions\n", + " - False Positive (FP): the number of incorrect positive predictions\n", + " - False Negative (FN): the number of incorrect negative predictions\n", + "\n", + " The confusion matrix can be used to assess the holistic performance of a classification model by showing the accuracy, precision, recall, and F1 score of the model on a single figure.\n", + " \"\"\"\n", + " y_true = dataset.y\n", + " y_pred = dataset.y_pred(model=model)\n", + "\n", + " confusion_matrix = metrics.confusion_matrix(y_true, y_pred)\n", + "\n", + " cm_display = metrics.ConfusionMatrixDisplay(\n", + " confusion_matrix=confusion_matrix, display_labels=[False, True]\n", + " )\n", + " cm_display.plot()\n", + "\n", + " plt.close() # close the plot to avoid displaying it\n", + "\n", + " return cm_display.figure_ # return the figure object itself" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can now run the newly created custom test on both the training and test datasets for both models using the [`run_test()` function](https://docs.validmind.ai/validmind/validmind/tests.html#run_test):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Champion train and test\n", + "vm.tests.run_test(\n", + " test_id=\"my_custom_tests.ConfusionMatrix:champion\",\n", + " input_grid={\n", + " \"dataset\": [vm_train_ds,vm_test_ds],\n", + " \"model\" : [vm_log_model]\n", + " }\n", + ").log()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Challenger train and test\n", + "vm.tests.run_test(\n", + " test_id=\"my_custom_tests.ConfusionMatrix:challenger\",\n", + " input_grid={\n", + " \"dataset\": [vm_train_ds,vm_test_ds],\n", + " \"model\" : [vm_rf_model]\n", + " }\n", + ").log()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
Note the output returned indicating that a test-driven block doesn't currently exist in your model's documentation for some test IDs. \n", + "

\n", + "That's expected, as when we run validations tests the results logged need to be manually added to your report as part of your compliance assessment process within the ValidMind Platform.
" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "#### Add parameters to custom tests\n", + "\n", + "Custom tests can take parameters just like any other function. To demonstrate, let's modify the `confusion_matrix` function to take an additional parameter `normalize` that will allow you to normalize the confusion matrix:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "@vm.test(\"my_custom_tests.ConfusionMatrix\")\n", + "def confusion_matrix(dataset, model, normalize=False):\n", + " \"\"\"The confusion matrix is a table that is often used to describe the performance of a classification model on a set of data for which the true values are known.\n", + "\n", + " The confusion matrix is a 2x2 table that contains 4 values:\n", + "\n", + " - True Positive (TP): the number of correct positive predictions\n", + " - True Negative (TN): the number of correct negative predictions\n", + " - False Positive (FP): the number of incorrect positive predictions\n", + " - False Negative (FN): the number of incorrect negative predictions\n", + "\n", + " The confusion matrix can be used to assess the holistic performance of a classification model by showing the accuracy, precision, recall, and F1 score of the model on a single figure.\n", + " \"\"\"\n", + " y_true = dataset.y\n", + " y_pred = dataset.y_pred(model=model)\n", + "\n", + " if normalize:\n", + " confusion_matrix = metrics.confusion_matrix(y_true, y_pred, normalize=\"all\")\n", + " else:\n", + " confusion_matrix = metrics.confusion_matrix(y_true, y_pred)\n", + "\n", + " cm_display = metrics.ConfusionMatrixDisplay(\n", + " confusion_matrix=confusion_matrix, display_labels=[False, True]\n", + " )\n", + " cm_display.plot()\n", + "\n", + " plt.close() # close the plot to avoid displaying it\n", + "\n", + " return cm_display.figure_ # return the figure object itself" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "#### Pass parameters to custom tests\n", + "\n", + "You can pass parameters to custom tests by providing a dictionary of parameters to the `run_test()` function.\n", + "\n", + "- The parameters will override any default parameters set in the custom test definition. Note that `dataset` and `model` are still passed as `inputs`.\n", + "- Since these are `VMDataset` or `VMModel` inputs, they have a special meaning.\n", + "\n", + "Re-running and logging the custom confusion matrix with `normalize=True` for both models and our testing dataset looks like this:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Champion with test dataset and normalize=True\n", + "vm.tests.run_test(\n", + " test_id=\"my_custom_tests.ConfusionMatrix:test_normalized_champion\",\n", + " input_grid={\n", + " \"dataset\": [vm_test_ds],\n", + " \"model\" : [vm_log_model]\n", + " },\n", + " params={\"normalize\": True}\n", + ").log()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Challenger with test dataset and normalize=True\n", + "vm.tests.run_test(\n", + " test_id=\"my_custom_tests.ConfusionMatrix:test_normalized_challenger\",\n", + " input_grid={\n", + " \"dataset\": [vm_test_ds],\n", + " \"model\" : [vm_rf_model]\n", + " },\n", + " params={\"normalize\": True}\n", + ").log()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Use external test providers\n", + "\n", + "Sometimes you may want to reuse the same set of custom tests across multiple models and share them with others in your organization, like the model development team would have done with you in this example workflow featured in this series of notebooks. In this case, you can create an external custom *test provider* that will allow you to load custom tests from a local folder or a Git repository.\n", + "\n", + "In this section you will learn how to declare a local filesystem test provider that allows loading tests from a local folder following these high level steps:\n", + "\n", + "1. Create a folder of custom tests from existing inline tests (tests that exist in your active Jupyter Notebook)\n", + "2. Save an inline test to a file\n", + "3. Define and register a [`LocalTestProvider`](https://docs.validmind.ai/validmind/validmind/tests.html#LocalTestProvider) that points to that folder\n", + "4. Run test provider tests\n", + "5. Add the test results to your documentation" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "#### Create custom tests folder\n", + "\n", + "Let's start by creating a new folder that will contain reusable custom tests from your existing inline tests.\n", + "\n", + "The following code snippet will create a new `my_tests` directory in the current working directory if it doesn't exist:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tests_folder = \"my_tests\"\n", + "\n", + "import os\n", + "\n", + "# create tests folder\n", + "os.makedirs(tests_folder, exist_ok=True)\n", + "\n", + "# remove existing tests\n", + "for f in os.listdir(tests_folder):\n", + " # remove files and pycache\n", + " if f.endswith(\".py\") or f == \"__pycache__\":\n", + " os.system(f\"rm -rf {tests_folder}/{f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "After running the command above, confirm that a new `my_tests` directory was created successfully. For example:\n", + "\n", + "```\n", + "~/notebooks/tutorials/model_validation/my_tests/\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "#### Save an inline test\n", + "\n", + "The `@vm.test` decorator we used in **Implement a custom inline test** above to register one-off custom tests also includes a convenience method on the function object that allows you to simply call `.save()` to save the test to a Python file at a specified path.\n", + "\n", + "While `save()` will get you started by creating the file and saving the function code with the correct name, it won't automatically include any imports, or other functions or variables, outside of the functions that are needed for the test to run. To solve this, pass in an optional `imports` argument ensuring necessary imports are added to the file.\n", + "\n", + "The `confusion_matrix` test requires the following additional imports:\n", + "\n", + "```python\n", + "import matplotlib.pyplot as plt\n", + "from sklearn import metrics\n", + "```\n", + "\n", + "Let's pass these imports to the `save()` method to ensure they are included in the file with the following command:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "confusion_matrix.save(\n", + " # Save it to the custom tests folder we created\n", + " tests_folder,\n", + " imports=[\"import matplotlib.pyplot as plt\", \"from sklearn import metrics\"],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- [x] Confirm that the `save()` method saved the `confusion_matrix` function to a file named `ConfusionMatrix.py` in the `my_tests` folder.\n", + "- [x] Note that the new file provides some context on the origin of the test, which is useful for traceability:\n", + "\n", + " ```\n", + " # Saved from __main__.confusion_matrix\n", + " # Original Test ID: my_custom_tests.ConfusionMatrix\n", + " # New Test ID: .ConfusionMatrix\n", + " ```\n", + "\n", + "- [x] Additionally, the new test function has been stripped off its decorator, as it now resides in a file that will be loaded by the test provider:\n", + "\n", + " ```python\n", + " def ConfusionMatrix(dataset, model, normalize=False):\n", + " ```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "#### Register a local test provider\n", + "\n", + "Now that your `my_tests` folder has a sample custom test, let's initialize a test provider that will tell the ValidMind Library where to find your custom tests:\n", + "\n", + "- ValidMind offers out-of-the-box test providers for local tests (tests in a folder) or a Github provider for tests in a Github repository.\n", + "- You can also create your own test provider by creating a class that has a [`load_test` method](https://docs.validmind.ai/validmind/validmind/tests.html#load_test) that takes a test ID and returns the test function matching that ID.\n", + "\n", + "
Want to learn more about test providers?\n", + "

\n", + "An extended introduction to test providers can be found in: Integrate external test providers
" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### Initialize a local test provider\n", + "\n", + "For most use cases, using a `LocalTestProvider` that allows you to load custom tests from a designated directory should be sufficient.\n", + "\n", + "**The most important attribute for a test provider is its `namespace`.** This is a string that will be used to prefix test IDs in model documentation. This allows you to have multiple test providers with tests that can even share the same ID, but are distinguished by their namespace.\n", + "\n", + "Let's go ahead and load the custom tests from our `my_tests` directory:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from validmind.tests import LocalTestProvider\n", + "\n", + "# initialize the test provider with the tests folder we created earlier\n", + "my_test_provider = LocalTestProvider(tests_folder)\n", + "\n", + "vm.tests.register_test_provider(\n", + " namespace=\"my_test_provider\",\n", + " test_provider=my_test_provider,\n", + ")\n", + "# `my_test_provider.load_test()` will be called for any test ID that starts with `my_test_provider`\n", + "# e.g. `my_test_provider.ConfusionMatrix` will look for a function named `ConfusionMatrix` in `my_tests/ConfusionMatrix.py` file" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### Run test provider tests\n", + "\n", + "Now that we've set up the test provider, we can run any test that's located in the tests folder by using the `run_test()` method as with any other test:\n", + "\n", + "- For tests that reside in a test provider directory, the test ID will be the `namespace` specified when registering the provider, followed by the path to the test file relative to the tests folder.\n", + "- For example, the Confusion Matrix test we created earlier will have the test ID `my_test_provider.ConfusionMatrix`. You could organize the tests in subfolders, say `classification` and `regression`, and the test ID for the Confusion Matrix test would then be `my_test_provider.classification.ConfusionMatrix`.\n", + "\n", + "Let's go ahead and re-run the confusion matrix test with our testing dataset for our two models by using the test ID `my_test_provider.ConfusionMatrix`. This should load the test from the test provider and run it as before." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Champion with test dataset and test provider custom test\n", + "vm.tests.run_test(\n", + " test_id=\"my_test_provider.ConfusionMatrix:champion\",\n", + " input_grid={\n", + " \"dataset\": [vm_test_ds],\n", + " \"model\" : [vm_log_model]\n", + " }\n", + ").log()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Challenger with test dataset and test provider custom test\n", + "vm.tests.run_test(\n", + " test_id=\"my_test_provider.ConfusionMatrix:challenger\",\n", + " input_grid={\n", + " \"dataset\": [vm_test_ds],\n", + " \"model\" : [vm_rf_model]\n", + " }\n", + ").log()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Verify test runs\n", + "\n", + "Our final task is to verify that all the tests provided by the model development team were run and reported accurately. Note the appended `result_ids` to delineate which dataset we ran the test with for the relevant tests.\n", + "\n", + "Here, we'll specify all the tests we'd like to independently rerun in a dictionary called `test_config`. **Note here that `inputs` and `input_grid` expect the `input_id` of the dataset or model as the value rather than the variable name we specified**:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "test_config = {\n", + " # Run with the raw dataset\n", + " 'validmind.data_validation.DatasetDescription:raw_data': {\n", + " 'inputs': {'dataset': 'raw_dataset'}\n", + " },\n", + " 'validmind.data_validation.DescriptiveStatistics:raw_data': {\n", + " 'inputs': {'dataset': 'raw_dataset'}\n", + " },\n", + " 'validmind.data_validation.MissingValues:raw_data': {\n", + " 'inputs': {'dataset': 'raw_dataset'},\n", + " 'params': {'min_threshold': 1}\n", + " },\n", + " 'validmind.data_validation.ClassImbalance:raw_data': {\n", + " 'inputs': {'dataset': 'raw_dataset'},\n", + " 'params': {'min_percent_threshold': 10}\n", + " },\n", + " 'validmind.data_validation.Duplicates:raw_data': {\n", + " 'inputs': {'dataset': 'raw_dataset'},\n", + " 'params': {'min_threshold': 1}\n", + " },\n", + " 'validmind.data_validation.HighCardinality:raw_data': {\n", + " 'inputs': {'dataset': 'raw_dataset'},\n", + " 'params': {\n", + " 'num_threshold': 100,\n", + " 'percent_threshold': 0.1,\n", + " 'threshold_type': 'percent'\n", + " }\n", + " },\n", + " 'validmind.data_validation.Skewness:raw_data': {\n", + " 'inputs': {'dataset': 'raw_dataset'},\n", + " 'params': {'max_threshold': 1}\n", + " },\n", + " 'validmind.data_validation.UniqueRows:raw_data': {\n", + " 'inputs': {'dataset': 'raw_dataset'},\n", + " 'params': {'min_percent_threshold': 1}\n", + " },\n", + " 'validmind.data_validation.TooManyZeroValues:raw_data': {\n", + " 'inputs': {'dataset': 'raw_dataset'},\n", + " 'params': {'max_percent_threshold': 0.03}\n", + " },\n", + " 'validmind.data_validation.IQROutliersTable:raw_data': {\n", + " 'inputs': {'dataset': 'raw_dataset'},\n", + " 'params': {'threshold': 5}\n", + " },\n", + " # Run with the preprocessed dataset\n", + " 'validmind.data_validation.DescriptiveStatistics:preprocessed_data': {\n", + " 'inputs': {'dataset': 'raw_dataset_preprocessed'}\n", + " },\n", + " 'validmind.data_validation.TabularDescriptionTables:preprocessed_data': {\n", + " 'inputs': {'dataset': 'raw_dataset_preprocessed'}\n", + " },\n", + " 'validmind.data_validation.MissingValues:preprocessed_data': {\n", + " 'inputs': {'dataset': 'raw_dataset_preprocessed'},\n", + " 'params': {'min_threshold': 1}\n", + " },\n", + " 'validmind.data_validation.TabularNumericalHistograms:preprocessed_data': {\n", + " 'inputs': {'dataset': 'raw_dataset_preprocessed'}\n", + " },\n", + " 'validmind.data_validation.TabularCategoricalBarPlots:preprocessed_data': {\n", + " 'inputs': {'dataset': 'raw_dataset_preprocessed'}\n", + " },\n", + " 'validmind.data_validation.TargetRateBarPlots:preprocessed_data': {\n", + " 'inputs': {'dataset': 'raw_dataset_preprocessed'},\n", + " 'params': {'default_column': 'loan_status'}\n", + " },\n", + " # Run with the training and test datasets\n", + " 'validmind.data_validation.DescriptiveStatistics:development_data': {\n", + " 'input_grid': {'dataset': ['train_dataset_final', 'test_dataset_final']}\n", + " },\n", + " 'validmind.data_validation.TabularDescriptionTables:development_data': {\n", + " 'input_grid': {'dataset': ['train_dataset_final', 'test_dataset_final']}\n", + " },\n", + " 'validmind.data_validation.ClassImbalance:development_data': {\n", + " 'input_grid': {'dataset': ['train_dataset_final', 'test_dataset_final']},\n", + " 'params': {'min_percent_threshold': 10}\n", + " },\n", + " 'validmind.data_validation.UniqueRows:development_data': {\n", + " 'input_grid': {'dataset': ['train_dataset_final', 'test_dataset_final']},\n", + " 'params': {'min_percent_threshold': 1}\n", + " },\n", + " 'validmind.data_validation.TabularNumericalHistograms:development_data': {\n", + " 'input_grid': {'dataset': ['train_dataset_final', 'test_dataset_final']}\n", + " },\n", + " 'validmind.data_validation.MutualInformation:development_data': {\n", + " 'input_grid': {'dataset': ['train_dataset_final', 'test_dataset_final']},\n", + " 'params': {'min_threshold': 0.01}\n", + " },\n", + " 'validmind.data_validation.PearsonCorrelationMatrix:development_data': {\n", + " 'input_grid': {'dataset': ['train_dataset_final', 'test_dataset_final']}\n", + " },\n", + " 'validmind.data_validation.HighPearsonCorrelation:development_data': {\n", + " 'input_grid': {'dataset': ['train_dataset_final', 'test_dataset_final']},\n", + " 'params': {'max_threshold': 0.3, 'top_n_correlations': 10}\n", + " },\n", + " 'validmind.model_validation.ModelMetadata': {\n", + " 'input_grid': {'model': ['log_model_champion', 'rf_model']}\n", + " },\n", + " 'validmind.model_validation.sklearn.ModelParameters': {\n", + " 'input_grid': {'model': ['log_model_champion', 'rf_model']}\n", + " },\n", + " 'validmind.model_validation.sklearn.ROCCurve': {\n", + " 'input_grid': {'dataset': ['train_dataset_final', 'test_dataset_final'], 'model': ['log_model_champion']}\n", + " },\n", + " 'validmind.model_validation.sklearn.MinimumROCAUCScore': {\n", + " 'input_grid': {'dataset': ['train_dataset_final', 'test_dataset_final'], 'model': ['log_model_champion']},\n", + " 'params': {'min_threshold': 0.5}\n", + " }\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Then batch run and log our tests in `test_config`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for t in test_config:\n", + " print(t)\n", + " try:\n", + " # Check if test has input_grid\n", + " if 'input_grid' in test_config[t]:\n", + " # For tests with input_grid, pass the input_grid configuration\n", + " if 'params' in test_config[t]:\n", + " vm.tests.run_test(t, input_grid=test_config[t]['input_grid'], params=test_config[t]['params']).log()\n", + " else:\n", + " vm.tests.run_test(t, input_grid=test_config[t]['input_grid']).log()\n", + " else:\n", + " # Original logic for regular inputs\n", + " if 'params' in test_config[t]:\n", + " vm.tests.run_test(t, inputs=test_config[t]['inputs'], params=test_config[t]['params']).log()\n", + " else:\n", + " vm.tests.run_test(t, inputs=test_config[t]['inputs']).log()\n", + " except Exception as e:\n", + " print(f\"Error running test {t}: {str(e)}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "## In summary\n", + "\n", + "In this final notebook, you learned how to:\n", + "\n", + "- [x] Implement a custom inline test\n", + "- [x] Run and log your custom inline tests\n", + "- [x] Use external custom test providers\n", + "- [x] Run and log tests from your custom test providers\n", + "- [x] Re-run tests provided by your model development team to verify that they were run and reported accurately\n", + "\n", + "With our ValidMind for model validation series of notebooks, you learned how to validate a model end-to-end with the ValidMind Library by running through some common scenarios in a typical model validation setting:\n", + "\n", + "- Verifying the data quality steps performed by the model development team\n", + "- Independently replicating the champion model's results and conducting additional tests to assess performance, stability, and robustness\n", + "- Setting up test inputs and a challenger model for comparative analysis\n", + "- Running validation tests, analyzing results, and logging findings to ValidMind" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Next steps" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Work with your validation report\n", + "\n", + "Now that you've logged all your test results and verified the work done by the model development team, head to the ValidMind Platform to wrap up your validation report. Continue to work on your validation report by:\n", + "\n", + "- **Inserting additional test results:** Click **Link Evidence to Report** under any section of 2. Validation in your validation report. (Learn more: [Link evidence to reports](https://docs.validmind.ai/guide/model-validation/assess-compliance.html#link-evidence-to-reports))\n", + "\n", + "- **Making qualitative edits to your test descriptions:** Expand any linked evidence under Validator Evidence and click **See evidence details** to review and edit the ValidMind-generated test descriptions for quality and accuracy.\n", + "\n", + "- **Adding more findings:** Click **Link Finding to Report** in any validation report section, then click **+ Create New Finding**. (Learn more: [Add and manage model findings](https://docs.validmind.ai/guide/model-validation/add-manage-model-findings.html))\n", + "\n", + "- **Adding risk assessment notes:** Click under **Risk Assessment Notes** in any validation report section to access the text editor and content editing toolbar, including an option to generate a draft with AI. Edit your ValidMind-generated test descriptions (Learn more: [Work with content blocks](https://docs.validmind.ai/guide/model-documentation/work-with-content-blocks.html#content-editing-toolbar))\n", + "\n", + "- **Assessing compliance:** Under the Guideline for any validation report section, click **ASSESSMENT** and select the compliance status from the drop-down menu. (Learn more: [Provide compliance assessments](https://docs.validmind.ai/guide/model-validation/assess-compliance.html#provide-compliance-assessments))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Learn more\n", + "\n", + "Now that you're familiar with the basics, you can explore the following notebooks to get a deeper understanding on how the ValidMind Library assists you in streamlining model validation:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "#### More how-to guides and code samples\n", + "\n", + "- [Explore available tests in detail](../../how_to/explore_tests.ipynb)\n", + "- [In-depth guide on running dataset based tests](../../how_to/run_tests/1_run_dataset_based_tests.ipynb)\n", + "- [In-depth guide for running comparison tests](../../how_to/run_tests/2_run_comparison_tests.ipynb)\n", + "- [In-depth guide for implementing custom tests](../../code_samples/custom_tests/implement_custom_tests.ipynb)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "#### Discover more learning resources\n", + "\n", + "All notebook samples can be found in the following directories of the ValidMind Library GitHub repository:\n", + "\n", + "- [Code samples](https://github.com/validmind/validmind-library/tree/main/notebooks/code_samples)\n", + "- [How-to guides](https://github.com/validmind/validmind-library/tree/main/notebooks/how_to)\n", + "\n", + "Or, visit our [documentation](https://docs.validmind.ai/) to learn more about ValidMind." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "ValidMind Library", + "language": "python", + "name": "validmind" + }, + "language_info": { + "name": "python", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/tutorials/model_validation/class-imbalance-results-detail.png b/notebooks/tutorials/model_validation/class-imbalance-results-detail.png new file mode 100644 index 000000000..00251ecaa Binary files /dev/null and b/notebooks/tutorials/model_validation/class-imbalance-results-detail.png differ diff --git a/notebooks/tutorials/model_validation/compliance-summary.png b/notebooks/tutorials/model_validation/compliance-summary.png new file mode 100644 index 000000000..a8abd2dd3 Binary files /dev/null and b/notebooks/tutorials/model_validation/compliance-summary.png differ diff --git a/notebooks/tutorials/model_validation/inserted-class-imbalance-results.png b/notebooks/tutorials/model_validation/inserted-class-imbalance-results.png new file mode 100644 index 000000000..2efea1a09 Binary files /dev/null and b/notebooks/tutorials/model_validation/inserted-class-imbalance-results.png differ diff --git a/notebooks/tutorials/model_validation/inserted-finding.png b/notebooks/tutorials/model_validation/inserted-finding.png new file mode 100644 index 000000000..bb7843653 Binary files /dev/null and b/notebooks/tutorials/model_validation/inserted-finding.png differ diff --git a/notebooks/tutorials/model_validation/inserted-minimum-f1-scores.png b/notebooks/tutorials/model_validation/inserted-minimum-f1-scores.png new file mode 100644 index 000000000..52ae43c71 Binary files /dev/null and b/notebooks/tutorials/model_validation/inserted-minimum-f1-scores.png differ diff --git a/notebooks/tutorials/model_validation/link-finding.png b/notebooks/tutorials/model_validation/link-finding.png new file mode 100644 index 000000000..dd1c8a266 Binary files /dev/null and b/notebooks/tutorials/model_validation/link-finding.png differ diff --git a/notebooks/tutorials/model_validation/link-validator-evidence.png b/notebooks/tutorials/model_validation/link-validator-evidence.png new file mode 100644 index 000000000..7403cad67 Binary files /dev/null and b/notebooks/tutorials/model_validation/link-validator-evidence.png differ diff --git a/notebooks/tutorials/model_validation/link-validator-evidence_OLD.png b/notebooks/tutorials/model_validation/link-validator-evidence_OLD.png new file mode 100644 index 000000000..58823e8f2 Binary files /dev/null and b/notebooks/tutorials/model_validation/link-validator-evidence_OLD.png differ diff --git a/notebooks/tutorials/model_validation/lr_model_champion.pkl b/notebooks/tutorials/model_validation/lr_model_champion.pkl new file mode 100644 index 000000000..9b81662b2 Binary files /dev/null and b/notebooks/tutorials/model_validation/lr_model_champion.pkl differ diff --git a/notebooks/tutorials/model_validation/select-finding.png b/notebooks/tutorials/model_validation/select-finding.png new file mode 100644 index 000000000..ba35661d5 Binary files /dev/null and b/notebooks/tutorials/model_validation/select-finding.png differ diff --git a/notebooks/tutorials/model_validation/selecting-class-imbalance-results.png b/notebooks/tutorials/model_validation/selecting-class-imbalance-results.png new file mode 100644 index 000000000..cf8687422 Binary files /dev/null and b/notebooks/tutorials/model_validation/selecting-class-imbalance-results.png differ diff --git a/notebooks/tutorials/model_validation/selecting-minimum-f1-scores.png b/notebooks/tutorials/model_validation/selecting-minimum-f1-scores.png new file mode 100644 index 000000000..60ae6b960 Binary files /dev/null and b/notebooks/tutorials/model_validation/selecting-minimum-f1-scores.png differ diff --git a/pyproject.toml b/pyproject.toml index 94e353999..b8a4f62cb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ description = "ValidMind Library" license = "Commercial License" name = "validmind" readme = "README.pypi.md" -version = "2.8.17" +version = "2.8.18" [tool.poetry.dependencies] aiohttp = {extras = ["speedups"], version = "*"} diff --git a/validmind/__version__.py b/validmind/__version__.py index 42e92e36d..c2cb868ca 100644 --- a/validmind/__version__.py +++ b/validmind/__version__.py @@ -1 +1 @@ -__version__ = "2.8.17" +__version__ = "2.8.18"