diff --git a/.gitignore b/.gitignore index 0ce6f7c95..eb447b41e 100644 --- a/.gitignore +++ b/.gitignore @@ -191,6 +191,8 @@ lending_club_loan_data_*.csv # Pickle files *.pkl +# Sample application scorecard model for validation notebook — do not remove! +!notebooks/code_samples/model_validation/xgb_model_champion.pkl notebooks/llm/datasets/*.jsonl diff --git a/notebooks/code_samples/capital_markets/quickstart_option_pricing_models.ipynb b/notebooks/code_samples/capital_markets/quickstart_option_pricing_models.ipynb index b019c7b45..6da5cffde 100644 --- a/notebooks/code_samples/capital_markets/quickstart_option_pricing_models.ipynb +++ b/notebooks/code_samples/capital_markets/quickstart_option_pricing_models.ipynb @@ -82,7 +82,7 @@ "\n", "### New to ValidMind?\n", "\n", - "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models, find code samples, or read our developer reference.\n", + "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models and running tests, as well as find code samples and our Python Library API reference.\n", "\n", "
For access to all features available in this notebook, create a free ValidMind account.\n", "

\n", diff --git a/notebooks/code_samples/capital_markets/quickstart_option_pricing_models_quantlib.ipynb b/notebooks/code_samples/capital_markets/quickstart_option_pricing_models_quantlib.ipynb index e25191847..a8548826b 100644 --- a/notebooks/code_samples/capital_markets/quickstart_option_pricing_models_quantlib.ipynb +++ b/notebooks/code_samples/capital_markets/quickstart_option_pricing_models_quantlib.ipynb @@ -120,7 +120,7 @@ "\n", "### New to ValidMind?\n", "\n", - "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models, find code samples, or read our developer reference.\n", + "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models and running tests, as well as find code samples and our Python Library API reference.\n", "\n", "
For access to all features available in this notebook, create a free ValidMind account.\n", "

\n", diff --git a/notebooks/code_samples/credit_risk/application_scorecard_demo.ipynb b/notebooks/code_samples/credit_risk/application_scorecard_demo.ipynb index 5bb5985f4..1bc8aa931 100644 --- a/notebooks/code_samples/credit_risk/application_scorecard_demo.ipynb +++ b/notebooks/code_samples/credit_risk/application_scorecard_demo.ipynb @@ -86,7 +86,7 @@ "\n", "\n", "### New to ValidMind?\n", - "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models, find code samples, or read our developer reference.\n", + "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models and running tests, as well as find code samples and our Python Library API reference.\n", "\n", "
For access to all features available in this notebook, create a free ValidMind account.\n", "

\n", diff --git a/notebooks/code_samples/credit_risk/application_scorecard_executive.ipynb b/notebooks/code_samples/credit_risk/application_scorecard_executive.ipynb index 349cfd30c..8025ece67 100644 --- a/notebooks/code_samples/credit_risk/application_scorecard_executive.ipynb +++ b/notebooks/code_samples/credit_risk/application_scorecard_executive.ipynb @@ -37,7 +37,7 @@ "\n", "\n", "### New to ValidMind?\n", - "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models, find code samples, or read our developer reference.\n", + "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models and running tests, as well as find code samples and our Python Library API reference.\n", "\n", "
For access to all features available in this notebook, create a free ValidMind account.\n", "

\n", diff --git a/notebooks/code_samples/credit_risk/application_scorecard_full_suite.ipynb b/notebooks/code_samples/credit_risk/application_scorecard_full_suite.ipynb index 2c91302c1..77e5b2ba0 100644 --- a/notebooks/code_samples/credit_risk/application_scorecard_full_suite.ipynb +++ b/notebooks/code_samples/credit_risk/application_scorecard_full_suite.ipynb @@ -37,7 +37,7 @@ "\n", "\n", "### New to ValidMind?\n", - "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models, find code samples, or read our developer reference.\n", + "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models and running tests, as well as find code samples and our Python Library API reference.\n", "\n", "
For access to all features available in this notebook, create a free ValidMind account.\n", "

\n", diff --git a/notebooks/code_samples/credit_risk/application_scorecard_with_bias.ipynb b/notebooks/code_samples/credit_risk/application_scorecard_with_bias.ipynb index 0d6f4e270..0b909ddda 100644 --- a/notebooks/code_samples/credit_risk/application_scorecard_with_bias.ipynb +++ b/notebooks/code_samples/credit_risk/application_scorecard_with_bias.ipynb @@ -75,7 +75,7 @@ "\n", "\n", "### New to ValidMind?\n", - "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models, find code samples, or read our developer reference.\n" + "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models and running tests, as well as find code samples and our Python Library API reference.\n" ] }, { diff --git a/notebooks/code_samples/credit_risk/application_scorecard_with_ml.ipynb b/notebooks/code_samples/credit_risk/application_scorecard_with_ml.ipynb index ca1bdb4e3..961a92c5c 100644 --- a/notebooks/code_samples/credit_risk/application_scorecard_with_ml.ipynb +++ b/notebooks/code_samples/credit_risk/application_scorecard_with_ml.ipynb @@ -37,7 +37,7 @@ "\n", "\n", "### New to ValidMind?\n", - "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models, find code samples, or read our developer reference.\n", + "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models and running tests, as well as find code samples and our Python Library API reference.\n", "\n", "
For access to all features available in this notebook, create a free ValidMind account.\n", "

\n", diff --git a/notebooks/code_samples/custom_tests/implement_custom_tests.ipynb b/notebooks/code_samples/custom_tests/implement_custom_tests.ipynb index 34ed27e57..80393d5f3 100644 --- a/notebooks/code_samples/custom_tests/implement_custom_tests.ipynb +++ b/notebooks/code_samples/custom_tests/implement_custom_tests.ipynb @@ -78,7 +78,7 @@ "\n", "### New to ValidMind?\n", "\n", - "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models, find code samples, or read our developer reference.\n", + "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models and running tests, as well as find code samples and our Python Library API reference.\n", "\n", "
For access to all features available in this notebook, create a free ValidMind account.\n", "

\n", diff --git a/notebooks/code_samples/model_validation/validate_application_scorecard.ipynb b/notebooks/code_samples/model_validation/validate_application_scorecard.ipynb new file mode 100644 index 000000000..a2a6d900e --- /dev/null +++ b/notebooks/code_samples/model_validation/validate_application_scorecard.ipynb @@ -0,0 +1,1825 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Validate an application scorecard model\n", + "\n", + "Learn how to independently assess an application scorecard model developed using the ValidMind Library as a validator. You'll evaluate the development of the model by conducting thorough testing and analysis, including the use of challenger models to benchmark performance.\n", + "\n", + "An *application scorecard model* is a type of statistical model used in credit scoring to evaluate the creditworthiness of potential borrowers by generating a score based on various characteristics of an applicant such as credit history, income, employment status, and other relevant financial data.\n", + "\n", + " - This score assists lenders in making informed decisions about whether to approve or reject loan applications, as well as in determining the terms of the loan, including interest rates and credit limits.\n", + " - Effective validation of application scorecard models ensures that lenders can manage risk efficiently while maintaining a fast and transparent loan application process for applicants.\n", + "\n", + "This interactive notebook provides a step-by-step guide for:\n", + "\n", + "- Verifying the data quality steps performed by the model development team\n", + "- Independently replicating the champion model's results and conducting additional tests to assess performance, stability, and robustness\n", + "- Setting up test inputs and challenger models for comparative analysis\n", + "- Running validation tests, analyzing results, and logging findings to ValidMind" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "::: {.content-hidden when-format=\"html\"}\n", + "## Contents \n", + "- [About ValidMind](#toc1_) \n", + " - [Before you begin](#toc1_1_) \n", + " - [New to ValidMind?](#toc1_2_) \n", + " - [Key concepts](#toc1_3_) \n", + "- [Setting up](#toc2_) \n", + " - [Register a sample model](#toc2_1_) \n", + " - [Assign validator credentials](#toc2_1_1_) \n", + " - [Install the ValidMind Library](#toc2_2_) \n", + " - [Initialize the ValidMind Library](#toc2_3_) \n", + " - [Get your code snippet](#toc2_3_1_) \n", + " - [Importing the champion model](#toc2_4_) \n", + " - [Load the sample dataset](#toc2_5_) \n", + " - [Preprocess the dataset](#toc2_5_1_) \n", + " - [Apply feature engineering to the dataset](#toc2_5_2_) \n", + " - [Split the feature engineered dataset](#toc2_6_) \n", + "- [Developing potential challenger models](#toc3_) \n", + " - [Train potential challenger models](#toc3_1_) \n", + " - [Random forest classification model](#toc3_1_1_) \n", + " - [Logistic regression model](#toc3_1_2_) \n", + " - [Extract predicted probabilities](#toc3_2_) \n", + " - [Compute binary predictions](#toc3_2_1_) \n", + "- [Initializing the ValidMind objects](#toc4_) \n", + " - [Initialize the ValidMind datasets](#toc4_1_) \n", + " - [Initialize the model objects](#toc4_2_) \n", + " - [Assign predictions](#toc4_3_) \n", + " - [Compute credit risk scores](#toc4_4_) \n", + "- [Run data quality tests](#toc5_) \n", + " - [Run and log an individual data quality test](#toc5_1_) \n", + " - [Log multiple data quality tests](#toc5_2_) \n", + " - [Run data quality comparison tests](#toc5_3_) \n", + "- [Run performance tests](#toc6_) \n", + " - [Identify performance tests](#toc6_1_) \n", + " - [Run and log an individual performance test](#toc6_2_) \n", + " - [Log multiple performance tests](#toc6_3_) \n", + " - [Evaluate performance of the champion model](#toc6_4_) \n", + " - [Evaluate performance of challenger models](#toc6_5_) \n", + " - [Enable custom context for test descriptions](#toc6_5_1_) \n", + " - [Run performance comparison tests](#toc6_5_2_) \n", + "- [Adjust a ValidMind test](#toc7_) \n", + "- [Run diagnostic tests](#toc8_) \n", + "- [Run feature importance tests](#toc9_) \n", + "- [Implement a custom test](#toc10_) \n", + "- [Verify test runs](#toc11_) \n", + "- [Next steps](#toc12_) \n", + " - [Work with your validation report](#toc12_1_) \n", + " - [Discover more learning resources](#toc12_2_) \n", + "- [Upgrade ValidMind](#toc13_) \n", + "\n", + ":::\n", + "\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "## About ValidMind\n", + "\n", + "ValidMind is a suite of tools for managing model risk, including risk associated with AI and statistical models.\n", + "\n", + "You use the ValidMind Library to automate comparison and other validation tests, and then use the ValidMind Platform to submit compliance assessments of champion models via comprehensive validation reports. Together, these products simplify model risk management, facilitate compliance with regulations and institutional standards, and enhance collaboration between yourself and model developers." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Before you begin\n", + "\n", + "This notebook assumes you have basic familiarity with Python, including an understanding of how functions work. If you are new to Python, you can still run the notebook but we recommend further familiarizing yourself with the language. \n", + "\n", + "If you encounter errors due to missing modules in your Python environment, install the modules with `pip install`, and then re-run the notebook. For more help, refer to [Installing Python Modules](https://docs.python.org/3/installing/index.html)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### New to ValidMind?\n", + "\n", + "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models and running tests, as well as find code samples and our Python Library API reference.\n", + "\n", + "
For access to all features available in this notebook, create a free ValidMind account.\n", + "

\n", + "Signing up is FREE — Register with ValidMind
" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Key concepts\n", + "\n", + "**Validation report**: A comprehensive and structured assessment of a model’s development and performance, focusing on verifying its integrity, appropriateness, and alignment with its intended use. It includes analyses of model assumptions, data quality, performance metrics, outcomes of testing procedures, and risk considerations. The validation report supports transparency, regulatory compliance, and informed decision-making by documenting the validator’s independent review and conclusions.\n", + "\n", + "**Validation report template**: Serves as a standardized framework for conducting and documenting model validation activities. It outlines the required sections, recommended analyses, and expected validation tests, ensuring consistency and completeness across validation reports. The template helps guide validators through a systematic review process while promoting comparability and traceability of validation outcomes.\n", + "\n", + "**Tests**: A function contained in the ValidMind Library, designed to run a specific quantitative test on the dataset or model. Tests are the building blocks of ValidMind, used to evaluate and document models and datasets.\n", + "\n", + "**Metrics**: A subset of tests that do not have thresholds. In the context of this notebook, metrics and tests can be thought of as interchangeable concepts.\n", + "\n", + "**Custom metrics**: Custom metrics are functions that you define to evaluate your model or dataset. These functions can be registered with the ValidMind Library to be used in the ValidMind Platform.\n", + "\n", + "**Inputs**: Objects to be evaluated and documented in the ValidMind Library. They can be any of the following:\n", + "\n", + " - **model**: A single model that has been initialized in ValidMind with [`vm.init_model()`](https://docs.validmind.ai/validmind/validmind.html#init_model).\n", + " - **dataset**: Single dataset that has been initialized in ValidMind with [`vm.init_dataset()`](https://docs.validmind.ai/validmind/validmind.html#init_dataset).\n", + " - **models**: A list of ValidMind models - usually this is used when you want to compare multiple models in your custom metric.\n", + " - **datasets**: A list of ValidMind datasets - usually this is used when you want to compare multiple datasets in your custom metric. (Learn more: [Run tests with multiple datasets](https://docs.validmind.ai/notebooks/how_to/run_tests_that_require_multiple_datasets.html))\n", + "\n", + "**Parameters**: Additional arguments that can be passed when running a ValidMind test, used to pass additional information to a metric, customize its behavior, or provide additional context.\n", + "\n", + "**Outputs**: Custom metrics can return elements like tables or plots. Tables may be a list of dictionaries (each representing a row) or a pandas DataFrame. Plots may be matplotlib or plotly figures." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Setting up" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Register a sample model\n", + "\n", + "In a usual model lifecycle, a champion model will have been independently registered in your model inventory and submitted to you for validation by your model development team as part of the effective challenge process. (**Learn more:** [Submit for approval](https://docs.validmind.ai/guide/model-documentation/submit-for-approval.html))\n", + "\n", + "For this notebook, we'll have you register a dummy model in the ValidMind Platform inventory and assign yourself as the validator to familiarize you with the ValidMind interface and circumvent the need for an existing model:\n", + "\n", + "1. In a browser, [log in to ValidMind](https://docs.validmind.ai/guide/configuration/log-in-to-validmind.html).\n", + "\n", + "2. In the left sidebar, navigate to **Inventory** and click **+ Register Model**.\n", + "\n", + "3. Enter the model details and click **Continue**. ([Need more help?](https://docs.validmind.ai/guide/model-inventory/register-models-in-inventory.html))\n", + "\n", + " For example, to register a model for use with this notebook, select:\n", + "\n", + " - Documentation template: `Credit Risk Scorecard`\n", + " - Use case: `Credit Risk — CECL`\n", + "\n", + " You can fill in other options according to your preference." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "#### Assign validator credentials\n", + "\n", + "In order to log tests as a validator instead of as a developer, on the model details page that appears after you've successfully registered your sample model:\n", + "\n", + "1. Remove yourself as a developer: \n", + "\n", + " - Click on the **DEVELOPERS** tile.\n", + " - Click the **x** next to your name to remove yourself from that model's role.\n", + " - Click **Save** to apply your changes to that role.\n", + "\n", + "2. Add yourself as a validator: \n", + "\n", + " - Click on the **VALIDATORS** tile.\n", + " - Select your name from the drop-down menu.\n", + " - Click **Save** to apply your changes to that role." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Install the ValidMind Library\n", + "\n", + "
Recommended Python versions\n", + "

\n", + "Python 3.8 <= x <= 3.11
\n", + "\n", + "To install the library:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install -q validmind" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Initialize the ValidMind Library\n", + "\n", + "ValidMind generates a unique _code snippet_ for each registered model to connect with your validation environment. You initialize the ValidMind Library with this code snippet, which ensures that your test results are uploaded to the correct model when you run the notebook." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "#### Get your code snippet\n", + "\n", + "1. In a browser, [log in to ValidMind](https://docs.validmind.ai/guide/configuration/log-in-to-validmind.html).\n", + "\n", + "2. In the left sidebar, navigate to **Inventory** and select the model you registered for this notebook.\n", + "\n", + "3. Go to **Getting Started** and click **Copy snippet to clipboard**.\n", + "\n", + "Next, [load your model identifier credentials from an `.env` file](https://docs.validmind.ai/developer/model-documentation/store-credentials-in-env-file.html) or replace the placeholder with your own code snippet:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Load your model identifier credentials from an `.env` file\n", + "\n", + "%load_ext dotenv\n", + "%dotenv .env\n", + "\n", + "# Or replace with your code snippet\n", + "\n", + "import validmind as vm\n", + "\n", + "vm.init(\n", + " # api_host=\"...\",\n", + " # api_key=\"...\",\n", + " # api_secret=\"...\",\n", + " # model=\"...\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Importing the champion model\n", + "\n", + "With the ValidMind Library set up and ready to go, let's go ahead and import the champion model submitted by the model development team in the format of a `.pkl` file: **[xgb_model_champion.pkl](xgb_model_champion.pkl)**\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import xgboost as xgb\n", + "\n", + "#Load the saved model\n", + "xgb_model = xgb.XGBClassifier()\n", + "xgb_model.load_model(\"xgb_model_champion.pkl\")\n", + "xgb_model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Ensure that we have to appropriate order in feature names from Champion model and dataset\n", + "cols_when_model_builds = xgb_model.get_booster().feature_names" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Load the sample dataset\n", + "\n", + "Let's next import the public [Lending Club](https://www.kaggle.com/datasets/devanshi23/loan-data-2007-2014/data) dataset from Kaggle, which was used to develop the dummy champion model.\n", + "\n", + "- We'll use this dataset to review steps that should have been conducted during the initial development and documentation of the model to ensure that the model was built correctly.\n", + "- By independently performing steps such as preprocessing and feature engineering, we can confirm whether the model was built using appropriate and properly processed data.\n", + "\n", + "To be able to use the dataset, you'll need to import the dataset and load it into a pandas [DataFrame](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html), a two-dimensional tabular data structure that makes use of rows and columns:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from validmind.datasets.credit_risk import lending_club\n", + "\n", + "df = lending_club.load_data(source=\"offline\")\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "#### Preprocess the dataset\n", + "\n", + "We'll first quickly preprocess the dataset for data quality testing purposes using `lending_club.preprocess`. This function performs the following operations:\n", + "\n", + "- Filters the dataset to include only loans for debt consolidation or credit card purposes\n", + "- Removes loans classified under the riskier grades \"F\" and \"G\"\n", + "- Excludes uncommon home ownership types and standardizes employment length and loan terms into numerical formats\n", + "- Discards unnecessary fields and any entries with missing information to maintain a clean and robust dataset for modeling" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "preprocess_df = lending_club.preprocess(df)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "#### Apply feature engineering to the dataset\n", + "\n", + "Feature engineering improves the dataset's structure to better match what our model expects, and ensures that the model performs optimally by leveraging additional insights from raw data.\n", + "\n", + "We'll apply the following transformations using the `ending_club.feature_engineering()` function to optimize the dataset for predictive modeling in our application scorecard:\n", + "\n", + "- **WoE encoding**: Converts both numerical and categorical features into Weight of Evidence (WoE) values. WoE is a statistical measure used in scorecard modeling that quantifies the relationship between a predictor variable and the binary target variable. It calculates the ratio of the distribution of good outcomes to the distribution of bad outcomes for each category or bin of a feature. This transformation helps to ensure that the features are predictive and consistent in their contribution to the model.\n", + "- **Integration of WoE bins**: Ensures that the WoE transformed values are integrated throughout the dataset, replacing the original feature values while excluding the target variable from this transformation. This transformation is used to maintain a consistent scale and impact of each variable within the model, which helps make the predictions more stable and accurate." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fe_df = lending_club.feature_engineering(preprocess_df)\n", + "fe_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Split the feature engineered dataset\n", + "\n", + "With our dummy model imported and our independently preprocessed and feature engineered dataset ready to go, let's now **spilt our dataset into train and test** to start the validation testing process.\n", + "\n", + "Splitting our dataset into training and testing is essential for proper validation testing, as this helps assess how well the model generalizes to unseen data:\n", + "\n", + "- We begin by dividing our data, which is based on Weight of Evidence (WoE) features, into training and testing sets (`train_df`, `test_df`).\n", + "- With `lending_club.split`, we employ a simple random split, randomly allocating data points to each set to ensure a mix of examples in both." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Split the data\n", + "train_df, test_df = lending_club.split(fe_df, test_size=0.2)\n", + "\n", + "x_train = train_df.drop(lending_club.target_column, axis=1)\n", + "y_train = train_df[lending_club.target_column]\n", + "\n", + "x_test = test_df.drop(lending_club.target_column, axis=1)\n", + "y_test = test_df[lending_club.target_column]\n", + "\n", + "# Now let's apply the order of features from the champion model construction\n", + "x_train = x_train[cols_when_model_builds]\n", + "x_test = x_test[cols_when_model_builds]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cols_use = ['annual_inc_woe',\n", + " 'verification_status_woe',\n", + " 'emp_length_woe',\n", + " 'installment_woe',\n", + " 'term_woe',\n", + " 'home_ownership_woe',\n", + " 'purpose_woe',\n", + " 'open_acc_woe',\n", + " 'total_acc_woe',\n", + " 'int_rate_woe',\n", + " 'sub_grade_woe',\n", + " 'grade_woe','loan_status']\n", + "\n", + "\n", + "train_df = train_df[cols_use]\n", + "test_df = test_df[cols_use]\n", + "test_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Developing potential challenger models" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Train potential challenger models\n", + "\n", + "We're curious how alternate models compare to our champion model, so let's train two challenger models as basis for our testing.\n", + "\n", + "Our selected options below offer decreased complexity in terms of implementation — such as lessened manual preprocessing — which can reduce the amount of risk for implementation. However, model risk is not calculated in isolation from a single factor, but rather in consideration with trade-offs in predictive performance, ease of interpretability, and overall alignment with business objectives." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "#### Random forest classification model\n", + "\n", + "A *random forest classification model* is an ensemble machine learning algorithm that uses multiple decision trees to classify data. In ensemble learning, multiple models are combined to improve prediction accuracy and robustness.\n", + "\n", + "Random forest classification models generally have higher accuracy because they capture complex, non-linear relationships, but as a result they lack transparency in their predictions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Import the Random Forest Classification model\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "\n", + "# Create the model instance with 50 decision trees\n", + "rf_model = RandomForestClassifier(\n", + " n_estimators=50,\n", + " random_state=42,\n", + ")\n", + "\n", + "# Train the model\n", + "rf_model.fit(x_train, y_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "#### Logistic regression model\n", + "\n", + "A *logistic regression model* is a statistical machine learning algorithm that uses a linear equation (straight-line relationship between variables) and the logistic function (or sigmoid function, which maps any real-valued number to a range between `0` and `1`) to classify data. In statistical modeling, a single equation is used to estimate the probability of an outcome based on input features.\n", + "\n", + "Logistic regression models are simple and interpretable because they provide clear probability estimates and feature coefficients (numerical value that represents the influence of a particular input feature on the model's prediction), but they may struggle with capturing complex, non-linear relationships in the data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Import the Logistic Regression model\n", + "from sklearn.linear_model import LogisticRegression\n", + "\n", + "# Logistic Regression grid params\n", + "log_reg_params = {\n", + " \"penalty\": [\"l1\", \"l2\"],\n", + " \"C\": [0.001, 0.01, 0.1, 1, 10, 100, 1000],\n", + " \"solver\": [\"liblinear\"],\n", + "}\n", + "\n", + "# Grid search for Logistic Regression\n", + "from sklearn.model_selection import GridSearchCV\n", + "\n", + "grid_log_reg = GridSearchCV(LogisticRegression(), log_reg_params)\n", + "grid_log_reg.fit(x_train, y_train)\n", + "\n", + "# Logistic Regression best estimator\n", + "log_reg = grid_log_reg.best_estimator_\n", + "log_reg" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Extract predicted probabilities\n", + "\n", + "With our challenger models trained, let's extract the predicted probabilities from our three models:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Champion — Application scorecard model\n", + "train_xgb_prob = xgb_model.predict_proba(x_train)[:, 1]\n", + "test_xgb_prob = xgb_model.predict_proba(x_test)[:, 1]\n", + "\n", + "# Challenger — Random forest classification model\n", + "train_rf_prob = rf_model.predict_proba(x_train)[:, 1]\n", + "test_rf_prob = rf_model.predict_proba(x_test)[:, 1]\n", + "\n", + "# Challenger — Logistic regression model\n", + "train_log_prob = log_reg.predict_proba(x_train)[:, 1]\n", + "test_log_prob = log_reg.predict_proba(x_test)[:, 1]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "#### Compute binary predictions\n", + "\n", + "Next, we'll convert the probability predictions from our three models into a binary, based on a threshold of `0.3`:\n", + "\n", + "- If the probability is greater than `0.3`, the prediction becomes `1` (positive).\n", + "- Otherwise, it becomes `0` (negative)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cut_off_threshold = 0.3\n", + "\n", + "# Champion — Application scorecard model\n", + "train_xgb_binary_predictions = (train_xgb_prob > cut_off_threshold).astype(int)\n", + "test_xgb_binary_predictions = (test_xgb_prob > cut_off_threshold).astype(int)\n", + "\n", + "# Challenger — Random forest classification model\n", + "train_rf_binary_predictions = (train_rf_prob > cut_off_threshold).astype(int)\n", + "test_rf_binary_predictions = (test_rf_prob > cut_off_threshold).astype(int)\n", + "\n", + "# Challenger — Logistic regression model\n", + "train_log_binary_predictions = (train_log_prob > cut_off_threshold).astype(int)\n", + "test_log_binary_predictions = (test_log_prob > cut_off_threshold).astype(int)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Initializing the ValidMind objects" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Initialize the ValidMind datasets\n", + "\n", + "Before you can run tests, you'll need to connect your data with a ValidMind `Dataset` object. **This step is always necessary every time you want to connect a dataset to documentation and produce test results through ValidMind,** but you only need to do it once per dataset.\n", + "\n", + "Initialize a ValidMind dataset object using the [`init_dataset` function](https://docs.validmind.ai/validmind/validmind.html#init_dataset) from the ValidMind (`vm`) module. For this example, we'll pass in the following arguments:\n", + "\n", + "- **`dataset`** — The raw dataset that you want to provide as input to tests.\n", + "- **`input_id`** — A unique identifier that allows tracking what inputs are used when running each individual test.\n", + "- **`target_column`** — A required argument if tests require access to true values. This is the name of the target column in the dataset." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize the raw dataset\n", + "vm_raw_dataset = vm.init_dataset(\n", + " dataset=df,\n", + " input_id=\"raw_dataset\",\n", + " target_column=lending_club.target_column,\n", + ")\n", + "\n", + "# Initialize the preprocessed dataset\n", + "vm_preprocess_dataset = vm.init_dataset(\n", + " dataset=preprocess_df,\n", + " input_id=\"preprocess_dataset\",\n", + " target_column=lending_club.target_column,\n", + ")\n", + "\n", + "# Initialize the feature engineered dataset\n", + "vm_fe_dataset = vm.init_dataset(\n", + " dataset=fe_df,\n", + " input_id=\"fe_dataset\",\n", + " target_column=lending_club.target_column,\n", + ")\n", + "\n", + "# Initialize the training dataset\n", + "vm_train_ds = vm.init_dataset(\n", + " dataset=train_df,\n", + " input_id=\"train_dataset\",\n", + " target_column=lending_club.target_column,\n", + ")\n", + "\n", + "# Initialize the test dataset\n", + "vm_test_ds = vm.init_dataset(\n", + " dataset=test_df,\n", + " input_id=\"test_dataset\",\n", + " target_column=lending_club.target_column,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "After initialization, you can pass the ValidMind `Dataset` objects `vm_raw_dataset`, `vm_preprocess_dataset`, `vm_fe_dataset`, `vm_train_ds`, and `vm_test_ds` into any ValidMind tests." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Initialize the model objects\n", + "\n", + "You'll also need to initialize a ValidMind model object (`vm_model`) that can be passed to other functions for analysis and tests on the data for each of our three models.\n", + "\n", + "You simply initialize this model object with [`vm.init_model()`](https://docs.validmind.ai/validmind/validmind.html#init_model):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize the champion application scorecard model\n", + "vm_xgb_model = vm.init_model(\n", + " xgb_model,\n", + " input_id=\"xgb_model_developer_champion\",\n", + ")\n", + "\n", + "# Initialize the challenger random forest classification model\n", + "vm_rf_model = vm.init_model(\n", + " rf_model,\n", + " input_id=\"rf_model\",\n", + ")\n", + "\n", + "# Initialize the challenger logistic regression model\n", + "vm_log_model = vm.init_model(\n", + " log_reg,\n", + " input_id=\"log_model\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Assign predictions\n", + "\n", + "With our models registered, we'll move on to assigning both the predictive probabilities coming directly from each model's predictions, and the binary prediction after applying the cutoff threshold described in the Compute binary predictions step above.\n", + "\n", + "- The [`assign_predictions()` method](https://docs.validmind.ai/validmind/validmind/vm_models.html#VMDataset.assign_predictions) from the `Dataset` object can link existing predictions to any number of models.\n", + "- This method links the model's class prediction values and probabilities to our `vm_train_ds` and `vm_test_ds` datasets." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Champion — Application scorecard model\n", + "vm_train_ds.assign_predictions(\n", + " model=vm_xgb_model,\n", + " prediction_values=train_xgb_binary_predictions,\n", + " prediction_probabilities=train_xgb_prob,\n", + ")\n", + "\n", + "vm_test_ds.assign_predictions(\n", + " model=vm_xgb_model,\n", + " prediction_values=test_xgb_binary_predictions,\n", + " prediction_probabilities=test_xgb_prob,\n", + ")\n", + "\n", + "# Challenger — Random forest classification model\n", + "vm_train_ds.assign_predictions(\n", + " model=vm_rf_model,\n", + " prediction_values=train_rf_binary_predictions,\n", + " prediction_probabilities=train_rf_prob,\n", + ")\n", + "\n", + "vm_test_ds.assign_predictions(\n", + " model=vm_rf_model,\n", + " prediction_values=test_rf_binary_predictions,\n", + " prediction_probabilities=test_rf_prob,\n", + ")\n", + "\n", + "\n", + "# Challenger — Logistic regression model\n", + "vm_train_ds.assign_predictions(\n", + " model=vm_log_model,\n", + " prediction_values=train_log_binary_predictions,\n", + " prediction_probabilities=train_log_prob,\n", + ")\n", + "\n", + "vm_test_ds.assign_predictions(\n", + " model=vm_log_model,\n", + " prediction_values=test_log_binary_predictions,\n", + " prediction_probabilities=test_log_prob,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Compute credit risk scores\n", + "\n", + "Finally, we'll translate model predictions into actionable scores using probability estimates generated by our trained model:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Compute the scores\n", + "train_xgb_scores = lending_club.compute_scores(train_xgb_prob)\n", + "test_xgb_scores = lending_club.compute_scores(test_xgb_prob)\n", + "train_rf_scores = lending_club.compute_scores(train_rf_prob)\n", + "test_rf_scores = lending_club.compute_scores(test_rf_prob)\n", + "train_log_scores = lending_club.compute_scores(train_log_prob)\n", + "test_log_scores = lending_club.compute_scores(test_log_prob)\n", + "\n", + "# Assign scores to the datasets\n", + "vm_train_ds.add_extra_column(\"xgb_scores\", train_xgb_scores)\n", + "vm_test_ds.add_extra_column(\"xgb_scores\", test_xgb_scores)\n", + "vm_train_ds.add_extra_column(\"rf_scores\", train_rf_scores)\n", + "vm_test_ds.add_extra_column(\"rf_scores\", test_rf_scores)\n", + "vm_train_ds.add_extra_column(\"log_scores\", train_log_scores)\n", + "vm_test_ds.add_extra_column(\"log_scores\", test_log_scores)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Run data quality tests\n", + "\n", + "With everything ready to go, let's explore some of ValidMind's available tests. Using ValidMind’s repository of tests streamlines your validation testing, and helps you ensure that your models are being validated appropriately.\n", + "\n", + "We want to narrow down the tests we want to run from the selection provided by ValidMind, so we'll use the [`vm.tests.list_tasks_and_tags()` function](https://docs.validmind.ai/validmind/validmind/tests.html#list_tasks_and_tags) to list which `tags` are associated with each `task` type:\n", + "\n", + "- **`tasks`** represent the kind of modeling task associated with a test. Here we'll focus on `classification` tasks.\n", + "- **`tags`** are free-form descriptions providing more details about the test, for example, what category the test falls into. Here we'll focus on the `data_quality` tag." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vm.tests.list_tasks_and_tags()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Then we'll call [the `vm.tests.list_tests()` function](https://docs.validmind.ai/validmind/validmind/tests.html#list_tests) to list all the data quality tests for classification:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vm.tests.list_tests(\n", + " tags=[\"data_quality\"], task=\"classification\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
Want to learn more about navigating ValidMind tests?\n", + "

\n", + "Refer to our notebook outlining the utilities available for viewing and understanding available ValidMind tests: Explore tests
" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Run and log an individual data quality test\n", + "\n", + "Next, we'll use our previously initialized preprocessed dataset (`vm_preprocess_dataset`) as input to run an individual test, then log the result to the ValidMind Platform.\n", + "\n", + "- You run validation tests by calling [the `run_test` function](https://docs.validmind.ai/validmind/validmind/tests.html#run_test) provided by the `validmind.tests` module.\n", + "- Every test result returned by the `run_test()` function has a [`.log()` method](https://docs.validmind.ai/validmind/validmind/vm_models.html#TestResult.log) that can be used to send the test results to the ValidMind Platform.\n", + "\n", + "Here, we'll use the [`HighPearsonCorrelation` test](https://docs.validmind.ai/tests/data_validation/HighPearsonCorrelation.html) as an example:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vm.tests.run_test(\n", + " test_id=\"validmind.data_validation.HighPearsonCorrelation\",\n", + " inputs={\n", + " \"dataset\": vm_preprocess_dataset\n", + " }\n", + ").log()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
Note the output returned indicating that a test-driven block doesn't currently exist in your model's documentation for some test IDs. \n", + "

\n", + "That's expected, as when we run validations tests the results logged need to be manually added to your report as part of your compliance assessment process within the ValidMind Platform. You'll continue to see this message throughout this notebook as we run and log more tests.
" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Log multiple data quality tests\n", + "\n", + "Now that we understand how to run a test with ValidMind, we want to run all the tests that were returned for our `classification` tasks focusing on `data_quality`.\n", + "\n", + "We'll store the identified tests in `dq` in preparation for batch running these tests and logging their results to the ValidMind Platform:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dq = vm.tests.list_tests(tags=[\"data_quality\"], task=\"classification\",pretty=False)\n", + "dq" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "With our data quality tests stored, let's run our first batch of tests using the same preprocessed dataset (`vm_preprocess_dataset`) and log their results." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for test in dq:\n", + " vm.tests.run_test(\n", + " test,\n", + " inputs={\n", + " \"dataset\": vm_preprocess_dataset\n", + " }\n", + " ).log()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Run data quality comparison tests\n", + "\n", + "Next, let's reuse the tests in `dq` to perform comparison tests between the raw (`vm_raw_dataset`) and preprocessed (`vm_preprocess_dataset`) dataset, again logging the results to the ValidMind Platform:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for test in dq:\n", + " vm.tests.run_test(\n", + " test,\n", + " input_grid={\n", + " \"dataset\": [vm_raw_dataset,vm_preprocess_dataset]\n", + " }\n", + " ).log()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Run performance tests\n", + "\n", + "We'll also run some performance tests, beginning with independent testing of our champion application scorecard model, then moving on to our potential challenger models." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Identify performance tests\n", + "\n", + "Use `vm.tests.list_tests()` to this time identify all the model performance tests for classification:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "vm.tests.list_tests(tags=[\"model_performance\"], task=\"classification\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Run and log an individual performance test\n", + "\n", + "Before we run our batch of performance tests, we'll use our previously initialized testing dataset (`vm_test_ds`) as input to run an individual test, then log the result to the ValidMind Platform.\n", + "\n", + "When running individual tests, you can use a custom `result_id` to tag the individual result with a unique identifier by appending this `result_id` to the `test_id` with a `:` separator. We'll append an identifier for our champion model here (`xgboost_champion`):\n", + "\n", + "Here, we'll use the [`ClassifierPerformance` test](https://docs.validmind.ai/tests/model_validation/sklearn/ClassifierPerformance.html) as an example:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vm.tests.run_test(\n", + " test_id=\"validmind.model_validation.sklearn.ClassifierPerformance:xgboost_champion\",\n", + " inputs={\n", + " \"dataset\": vm_test_ds, \"model\" : vm_xgb_model\n", + " }\n", + ").log()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Log multiple performance tests\n", + "\n", + "We only want to run a few other tests that were returned for our `classification` tasks focusing on `model_performance`, so we'll isolate the specific tests we want to batch run in `mpt`:\n", + "\n", + "- `ClassifierPerformance`\n", + "- [`ConfusionMatrix`](https://docs.validmind.ai/tests/model_validation/sklearn/ConfusionMatrix.html)\n", + "- [`MinimumAccuracy`](https://docs.validmind.ai/tests/model_validation/sklearn/MinimumAccuracy.html)\n", + "- [`MinimumF1Score`](https://docs.validmind.ai/tests/model_validation/sklearn/MinimumF1Score.html)\n", + "- [`ROCCurve`](https://docs.validmind.ai/tests/model_validation/sklearn/ROCCurve.html)\n", + "\n", + "Note the custom `result_id`s appended to the `test_id`s for our champion model (`xgboost_champion`):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mpt = [\n", + " \"validmind.model_validation.sklearn.ClassifierPerformance:xgboost_champion\",\n", + " \"validmind.model_validation.sklearn.ConfusionMatrix:xgboost_champion\",\n", + " \"validmind.model_validation.sklearn.MinimumAccuracy:xgboost_champion\",\n", + " \"validmind.model_validation.sklearn.MinimumF1Score:xgboost_champion\",\n", + " \"validmind.model_validation.sklearn.ROCCurve:xgboost_champion\"\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Evaluate performance of the champion model\n", + "\n", + "Now, let's run and log our batch of model performance tests using our testing dataset (`vm_test_ds`) for our champion model:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for test in mpt:\n", + " vm.tests.run_test(\n", + " test,\n", + " inputs={\n", + " \"dataset\": vm_test_ds, \"model\" : vm_xgb_model\n", + " },\n", + " ).log()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Evaluate performance of challenger models\n", + "\n", + "We've now conducted similar tests as the model development team for our champion model, with the aim of verifying their test results.\n", + "\n", + "Next, let's see how our challenger models compare. We'll use the same batch of tests here as we did in `mpt`, but append a different `result_id` to indicate that these results should be associated with our challenger models:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mpt_chall = [\n", + " \"validmind.model_validation.sklearn.ClassifierPerformance:xgboost_champion_vs_challengers\",\n", + " \"validmind.model_validation.sklearn.ConfusionMatrix:xgboost_champion_vs_challengers\",\n", + " \"validmind.model_validation.sklearn.MinimumAccuracy:xgboost_champion_vs_challengers\",\n", + " \"validmind.model_validation.sklearn.MinimumF1Score:xgboost_champion_vs_challengers\",\n", + " \"validmind.model_validation.sklearn.ROCCurve:xgboost_champion_vs_challengers\"\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "#### Enable custom context for test descriptions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "When you run ValidMind tests, test descriptions are automatically generated with LLM using the test results, the test name, and the static test definitions provided in the test’s docstring. While this metadata offers valuable high-level overviews of tests, insights produced by the LLM-based descriptions may not always align with your specific use cases or incorporate organizational policy requirements.\n", + "\n", + "Before we run our next batch of tests, we'll include some custom use case context to focus on comparison testing going forward, improving the relevancy, insight, and format of the test descriptions returned. By default, custom context for LLM-generated descriptions is disabled, meaning that the output will not include any additional context. To enable custom use case context, set the `VALIDMIND_LLM_DESCRIPTIONS_CONTEXT_ENABLED` environment variable to `1`.\n", + "\n", + "This is a global setting that will affect all tests for your linked model:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "os.environ[\"VALIDMIND_LLM_DESCRIPTIONS_CONTEXT_ENABLED\"] = \"1\"" + ] + }, + { + "cell_type": "markdown", + "id": "0d1e90ba", + "metadata": {}, + "source": [ + "Enabling use case context allows you to pass in additional context to the LLM-generated text descriptions within `context`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "os.environ[\"VALIDMIND_LLM_DESCRIPTIONS_CONTEXT_ENABLED\"] = \"1\"\n", + "\n", + "context = \"\"\"\n", + "FORMAT FOR THE LLM DESCRIPTIONS: \n", + " **** is designed to .\n", + "\n", + " The test operates by \n", + "\n", + " The primary advantages of this test include \n", + "\n", + " Users should be aware that \n", + "\n", + " **Key Insights:**\n", + "\n", + " The test results reveal:\n", + "\n", + " - ****: \n", + " - ****: \n", + " ...\n", + "\n", + " Based on these results, \n", + "\n", + "ADDITIONAL INSTRUCTIONS:\n", + "\n", + " The champion model as the basis for comparison is called \"xgb_model_developer_champion\" and emphasis should be on the following:\n", + " - The metrics for the champion model compared against the challenger models\n", + " - Which model potentially outperforms the champion model based on the metrics, this should be highlighted and emphasized\n", + "\n", + "\n", + " For each metric in the test results, include in the test overview:\n", + " - The metric's purpose and what it measures\n", + " - Its mathematical formula\n", + " - The range of possible values\n", + " - What constitutes good/bad performance\n", + " - How to interpret different values\n", + "\n", + " Each insight should progressively cover:\n", + " 1. Overall scope and distribution\n", + " 2. Complete breakdown of all elements with specific values\n", + " 3. Natural groupings and patterns\n", + " 4. Comparative analysis between datasets/categories\n", + " 5. Stability and variations\n", + " 6. Notable relationships or dependencies\n", + "\n", + " Remember:\n", + " - Champion model (xgb_model_developer_champion) is the selection and challenger models are used to challenge the selection\n", + " - Keep all insights at the same level (no sub-bullets or nested structures)\n", + " - Make each insight complete and self-contained\n", + " - Include specific numerical values and ranges\n", + " - Cover all elements in the results comprehensively\n", + " - Maintain clear, concise language\n", + " - Use only \"- **Title**: Description\" format for insights\n", + " - Progress naturally from general to specific observations\n", + "\n", + "\"\"\".strip()\n", + "\n", + "os.environ[\"VALIDMIND_LLM_DESCRIPTIONS_CONTEXT\"] = context" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
Want to learn more about setting custom context for LLM-generated test descriptions?\n", + "

\n", + "Refer to our extended walkthrough notebook: Add context to LLM-generated test descriptions\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "#### Run performance comparison tests\n", + "\n", + "With the use case context set, we'll run each test in `mpt_chall` once for each model with the same `vm_test_ds` dataset to compare them:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for test in mpt_chall:\n", + " vm.tests.run_test(\n", + " test,\n", + " input_grid={\n", + " \"dataset\": [vm_test_ds], \"model\" : [vm_xgb_model,vm_log_model,vm_rf_model]\n", + " }\n", + " ).log()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
Based on the performance metrics, we can conclude that the random forest classification model is not a viable candidate for our use case and can be disregarded in our tests going forward.\n", + "

\n", + "In the next section, we'll dive a bit deeper into some tests comparing our champion application scorecard model and our remaining challenger logistic regression model, including tests that will allow us to customize parameters and thresholds for performance standards.
" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Adjust a ValidMind test\n", + "\n", + "Let's dig deeper into the `MinimumF1Score` test we ran previously in Run performance tests to ensure that the models maintain a minimum acceptable balance between *precision* and *recall*. Precision refers to how many out of the positive predictions made by the model were actually correct, and recall refers to how many out of the actual positive cases did the model correctly identify.\n", + "\n", + "Use `run_test()` with our testing dataset (`vm_test_ds`) to run the test in isolation again for our two remaining models without logging the result to have the output to compare with a subsequent iteration:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vm.tests.run_test(\n", + " \"validmind.model_validation.sklearn.MinimumF1Score:xgboost_champion_vs_challengers\",\n", + " input_grid={\n", + " \"dataset\": [vm_test_ds],\n", + " \"model\": [vm_xgb_model, vm_log_model]\n", + " },\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As `MinimumF1Score` allows us to customize parameters and thresholds for performance standards, let's adjust the threshold to see if it improves metrics:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "result = vm.tests.run_test(\n", + " \"validmind.model_validation.sklearn.MinimumF1Score:AdjThreshold\",\n", + " input_grid={\n", + " \"dataset\": [vm_test_ds],\n", + " \"model\": [vm_xgb_model, vm_log_model],\n", + " \"params\": {\"min_threshold\": 0.35}\n", + " },\n", + ").log()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Run diagnostic tests\n", + "\n", + "Next we want to inspect the robustness and stability testing comparison between our champion and challenger model.\n", + "\n", + "Use `list_tests()` to identify all the model diagnosis tests for classification:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vm.tests.list_tests(tags=[\"model_diagnosis\"], task=\"classification\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's see if models suffer from any *overfit* potentials and also where there are potential sub-segments of issues with the [`OverfitDiagnosis` test](https://docs.validmind.ai/tests/model_validation/sklearn/OverfitDiagnosis.html). \n", + "\n", + "Overfitting occurs when a model learns the training data too well, capturing not only the true pattern but noise and random fluctuations resulting in excellent performance on the training dataset but poor generalization to new, unseen data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vm.tests.run_test(\n", + " test_id=\"validmind.model_validation.sklearn.OverfitDiagnosis:Champion_vs_LogRegression\",\n", + " input_grid={\n", + " \"datasets\": [[vm_train_ds,vm_test_ds]],\n", + " \"model\" : [vm_xgb_model,vm_log_model]\n", + " }\n", + ").log()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's also conduct *robustness* and *stability* testing of the two models with the [`RobustnessDiagnosis` test](https://docs.validmind.ai/tests/model_validation/sklearn/RobustnessDiagnosis.html).\n", + "\n", + "Robustness refers to a model's ability to maintain consistent performance, and stability refers to a model's ability to produce consistent outputs over time across different data subsets.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vm.tests.run_test(\n", + " test_id=\"validmind.model_validation.sklearn.RobustnessDiagnosis:Champion_vs_LogRegression\",\n", + " input_grid={\n", + " \"datasets\": [[vm_train_ds,vm_test_ds]],\n", + " \"model\" : [vm_xgb_model,vm_log_model]\n", + " },\n", + ").log()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Run feature importance tests\n", + "\n", + "We want to verify the relative influence of different input features on our models' predictions, as well as inspect the differences between our champion and challenger model to see if a certain model offers more understandable or logical importance scores for features.\n", + "\n", + "Use `list_tests()` to identify all the feature importance tests for classification:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Store the feature importance tests\n", + "FI = vm.tests.list_tests(tags=[\"feature_importance\"], task=\"classification\",pretty=False)\n", + "FI" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Run and log our feature importance tests for both models for the testing dataset\n", + "for test in FI:\n", + " vm.tests.run_test(\n", + " \"\".join((test,':Champion_vs_LogisticRegression')),\n", + " input_grid={\n", + " \"dataset\": [vm_test_ds], \"model\" : [vm_xgb_model,vm_log_model]\n", + " },\n", + " ).log()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Implement a custom test\n", + "\n", + "Let's finish up testing by implementing a custom *inline test* that outputs a FICO score-type score. An inline test refers to a test written and executed within the same environment as the code being tested — in this case, right in this Jupyter Notebook — without requiring a separate test file or framework.\n", + "\n", + "The [`@vm.test` wrapper](https://docs.validmind.ai/validmind/validmind.html#test) allows you to create a reusable test:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import plotly.graph_objects as go\n", + "\n", + "@vm.test(\"my_custom_tests.ScoreToOdds\")\n", + "def score_to_odds_analysis(dataset, score_column='score', score_bands=[410, 440, 470]):\n", + " \"\"\"\n", + " Analyzes the relationship between score bands and odds (good:bad ratio).\n", + " Good odds = (1 - default_rate) / default_rate\n", + " \n", + " Higher scores should correspond to higher odds of being good.\n", + "\n", + " If there are multiple scores provided through score_column, this means that there are two different models and the scores reflect each model\n", + "\n", + " If there are more scores provided in the score_column then focus the assessment on the differences between the two scores and indicate through evidence which one is preferred.\n", + " \"\"\"\n", + " df = dataset.df\n", + " \n", + " # Create score bands\n", + " df['score_band'] = pd.cut(\n", + " df[score_column],\n", + " bins=[-np.inf] + score_bands + [np.inf],\n", + " labels=[f'<{score_bands[0]}'] + \n", + " [f'{score_bands[i]}-{score_bands[i+1]}' for i in range(len(score_bands)-1)] +\n", + " [f'>{score_bands[-1]}']\n", + " )\n", + " \n", + " # Calculate metrics per band\n", + " results = df.groupby('score_band').agg({\n", + " dataset.target_column: ['mean', 'count']\n", + " })\n", + " \n", + " results.columns = ['Default Rate', 'Total']\n", + " results['Good Count'] = results['Total'] - (results['Default Rate'] * results['Total'])\n", + " results['Bad Count'] = results['Default Rate'] * results['Total']\n", + " results['Odds'] = results['Good Count'] / results['Bad Count']\n", + " \n", + " # Create visualization\n", + " fig = go.Figure()\n", + " \n", + " # Add odds bars\n", + " fig.add_trace(go.Bar(\n", + " name='Odds (Good:Bad)',\n", + " x=results.index,\n", + " y=results['Odds'],\n", + " marker_color='blue'\n", + " ))\n", + " \n", + " fig.update_layout(\n", + " title='Score-to-Odds Analysis',\n", + " yaxis=dict(title='Odds Ratio (Good:Bad)'),\n", + " showlegend=False\n", + " )\n", + " \n", + " return fig" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "With the custom test available, run and log the test for our champion and challenger models with our testing dataset (`vm_test_ds`):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "result = vm.tests.run_test(\n", + " \"my_custom_tests.ScoreToOdds:Champion_vs_Challenger\",\n", + " inputs={\n", + " \"dataset\": vm_test_ds,\n", + " },\n", + " param_grid={\n", + " \"score_column\": [\"xgb_scores\",\"log_scores\"],\n", + " \"score_bands\": [[500, 540, 570]],\n", + " },\n", + ").log()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
Want to learn more about custom tests?\n", + "

\n", + "Refer to our in-depth introduction to custom tests: Implement custom tests
" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Verify test runs\n", + "\n", + "Our final task is to verify that all the tests provided by the model development team were run and reported accurately. Note the appended `result_ids` to delineate which dataset we ran the test with for the relevant tests.\n", + "\n", + "Here, we'll specify all the tests we'd like to independently rerun in a dictionary called `test_config`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "test_config = {\n", + " # Run with the raw dataset\n", + " 'validmind.data_validation.DatasetDescription:raw_data': {\n", + " 'inputs': {'dataset': 'raw_dataset'}\n", + " },\n", + " 'validmind.data_validation.DescriptiveStatistics:raw_data': {\n", + " 'inputs': {'dataset': 'raw_dataset'}\n", + " },\n", + " 'validmind.data_validation.MissingValues:raw_data': {\n", + " 'inputs': {'dataset': 'raw_dataset'},\n", + " 'params': {'min_threshold': 1}\n", + " },\n", + " 'validmind.data_validation.ClassImbalance:raw_data': {\n", + " 'inputs': {'dataset': 'raw_dataset'},\n", + " 'params': {'min_percent_threshold': 10}\n", + " },\n", + " 'validmind.data_validation.Duplicates:raw_data': {\n", + " 'inputs': {'dataset': 'raw_dataset'},\n", + " 'params': {'min_threshold': 1}\n", + " },\n", + " 'validmind.data_validation.HighCardinality:raw_data': {\n", + " 'inputs': {'dataset': 'raw_dataset'},\n", + " 'params': {\n", + " 'num_threshold': 100,\n", + " 'percent_threshold': 0.1,\n", + " 'threshold_type': 'percent'\n", + " }\n", + " },\n", + " 'validmind.data_validation.Skewness:raw_data': {\n", + " 'inputs': {'dataset': 'raw_dataset'},\n", + " 'params': {'max_threshold': 1}\n", + " },\n", + " 'validmind.data_validation.UniqueRows:raw_data': {\n", + " 'inputs': {'dataset': 'raw_dataset'},\n", + " 'params': {'min_percent_threshold': 1}\n", + " },\n", + " 'validmind.data_validation.TooManyZeroValues:raw_data': {\n", + " 'inputs': {'dataset': 'raw_dataset'},\n", + " 'params': {'max_percent_threshold': 0.03}\n", + " },\n", + " 'validmind.data_validation.IQROutliersTable:raw_data': {\n", + " 'inputs': {'dataset': 'raw_dataset'},\n", + " 'params': {'threshold': 5}\n", + " },\n", + " # Run with the preprocessed dataset\n", + " 'validmind.data_validation.DescriptiveStatistics:preprocessed_data': {\n", + " 'inputs': {'dataset': 'preprocess_dataset'}\n", + " },\n", + " 'validmind.data_validation.TabularDescriptionTables:preprocessed_data': {\n", + " 'inputs': {'dataset': 'preprocess_dataset'}\n", + " },\n", + " 'validmind.data_validation.MissingValues:preprocessed_data': {\n", + " 'inputs': {'dataset': 'preprocess_dataset'},\n", + " 'params': {'min_threshold': 1}\n", + " },\n", + " 'validmind.data_validation.TabularNumericalHistograms:preprocessed_data': {\n", + " 'inputs': {'dataset': 'preprocess_dataset'}\n", + " },\n", + " 'validmind.data_validation.TabularCategoricalBarPlots:preprocessed_data': {\n", + " 'inputs': {'dataset': 'preprocess_dataset'}\n", + " },\n", + " 'validmind.data_validation.TargetRateBarPlots:preprocessed_data': {\n", + " 'inputs': {'dataset': 'preprocess_dataset'},\n", + " 'params': {'default_column': 'loan_status'}\n", + " },\n", + " # Run with the training and test datasets\n", + " 'validmind.data_validation.DescriptiveStatistics:development_data': {\n", + " 'input_grid': {'dataset': ['train_dataset', 'test_dataset']}\n", + " },\n", + " 'validmind.data_validation.TabularDescriptionTables:development_data': {\n", + " 'input_grid': {'dataset': ['train_dataset', 'test_dataset']}\n", + " },\n", + " 'validmind.data_validation.ClassImbalance:development_data': {\n", + " 'input_grid': {'dataset': ['train_dataset', 'test_dataset']},\n", + " 'params': {'min_percent_threshold': 10}\n", + " },\n", + " 'validmind.data_validation.UniqueRows:development_data': {\n", + " 'input_grid': {'dataset': ['train_dataset', 'test_dataset']},\n", + " 'params': {'min_percent_threshold': 1}\n", + " },\n", + " 'validmind.data_validation.TabularNumericalHistograms:development_data': {\n", + " 'input_grid': {'dataset': ['train_dataset', 'test_dataset']}\n", + " },\n", + " 'validmind.data_validation.MutualInformation:development_data': {\n", + " 'input_grid': {'dataset': ['train_dataset', 'test_dataset']},\n", + " 'params': {'min_threshold': 0.01}\n", + " },\n", + " 'validmind.data_validation.PearsonCorrelationMatrix:development_data': {\n", + " 'input_grid': {'dataset': ['train_dataset', 'test_dataset']}\n", + " },\n", + " 'validmind.data_validation.HighPearsonCorrelation:development_data': {\n", + " 'input_grid': {'dataset': ['train_dataset', 'test_dataset']},\n", + " 'params': {'max_threshold': 0.3, 'top_n_correlations': 10}\n", + " },\n", + " 'validmind.model_validation.ModelMetadata': {\n", + " 'input_grid': {'model': ['xgb_model_developer_champion', 'rf_model']}\n", + " },\n", + " 'validmind.model_validation.sklearn.ModelParameters': {\n", + " 'input_grid': {'model': ['xgb_model_developer_champion', 'rf_model']}\n", + " },\n", + " 'validmind.model_validation.sklearn.ROCCurve': {\n", + " 'input_grid': {'dataset': ['train_dataset', 'test_dataset'], 'model': ['xgb_model_developer_champion']}\n", + " },\n", + " 'validmind.model_validation.sklearn.MinimumROCAUCScore': {\n", + " 'input_grid': {'dataset': ['train_dataset', 'test_dataset'], 'model': ['xgb_model_developer_champion']},\n", + " 'params': {'min_threshold': 0.5}\n", + " }\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Then batch run and log our tests in `test_config`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for t in test_config:\n", + " print(t)\n", + " try:\n", + " # Check if test has input_grid\n", + " if 'input_grid' in test_config[t]:\n", + " # For tests with input_grid, pass the input_grid configuration\n", + " if 'params' in test_config[t]:\n", + " vm.tests.run_test(t, input_grid=test_config[t]['input_grid'], params=test_config[t]['params']).log()\n", + " else:\n", + " vm.tests.run_test(t, input_grid=test_config[t]['input_grid']).log()\n", + " else:\n", + " # Original logic for regular inputs\n", + " if 'params' in test_config[t]:\n", + " vm.tests.run_test(t, inputs=test_config[t]['inputs'], params=test_config[t]['params']).log()\n", + " else:\n", + " vm.tests.run_test(t, inputs=test_config[t]['inputs']).log()\n", + " except Exception as e:\n", + " print(f\"Error running test {t}: {str(e)}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Next steps" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Work with your validation report\n", + "\n", + "Now that you've logged all your test results and verified the work done by the model development team, head to the ValidMind Platform to wrap up your validation report:\n", + "\n", + "1. From the **Inventory** in the ValidMind Platform, go to the model you connected to earlier.\n", + "\n", + "2. In the left sidebar that appears for your model, click **Validation Report**.\n", + "\n", + "Include your logged test results as evidence, create risk assessment notes, add findings, and assess compliance, then submit your report for review when it's ready. **Learn more:** [Preparing validation reports](https://docs.validmind.ai/guide/model-validation/preparing-validation-reports.html)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Discover more learning resources\n", + "\n", + "All notebook samples can be found in the following directories of the ValidMind Library GitHub repository:\n", + "\n", + "- [Code samples](https://github.com/validmind/validmind-library/tree/main/notebooks/code_samples)\n", + "- [How-to guides](https://github.com/validmind/validmind-library/tree/main/notebooks/how_to)\n", + "\n", + "Or, visit our [documentation](https://docs.validmind.ai/) to learn more about ValidMind." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Upgrade ValidMind\n", + "\n", + "
After installing ValidMind, you’ll want to periodically make sure you are on the latest version to access any new features and other enhancements.
\n", + "\n", + "Retrieve the information for the currently installed version of ValidMind:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip show validmind" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If the version returned is lower than the version indicated in our [production open-source code](https://github.com/validmind/validmind-library/blob/prod/validmind/__version__.py), restart your notebook and run:\n", + "\n", + "```bash\n", + "%pip install --upgrade validmind\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You may need to restart your kernel after running the upgrade package for changes to be applied." + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/code_samples/model_validation/xgb_model_champion.pkl b/notebooks/code_samples/model_validation/xgb_model_champion.pkl new file mode 100644 index 000000000..bfe7349b6 Binary files /dev/null and b/notebooks/code_samples/model_validation/xgb_model_champion.pkl differ diff --git a/notebooks/code_samples/nlp_and_llm/prompt_validation_demo.ipynb b/notebooks/code_samples/nlp_and_llm/prompt_validation_demo.ipynb index 0f88228e1..055028eca 100644 --- a/notebooks/code_samples/nlp_and_llm/prompt_validation_demo.ipynb +++ b/notebooks/code_samples/nlp_and_llm/prompt_validation_demo.ipynb @@ -66,7 +66,7 @@ "\n", "### New to ValidMind?\n", "\n", - "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models, find code samples, or read our developer reference.\n", + "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models and running tests, as well as find code samples and our Python Library API reference.\n", "\n", "
For access to all features available in this notebook, create a free ValidMind account.\n", "

\n", diff --git a/notebooks/code_samples/nlp_and_llm/rag_documentation_demo.ipynb b/notebooks/code_samples/nlp_and_llm/rag_documentation_demo.ipynb index f6942033e..dfc9ccc15 100644 --- a/notebooks/code_samples/nlp_and_llm/rag_documentation_demo.ipynb +++ b/notebooks/code_samples/nlp_and_llm/rag_documentation_demo.ipynb @@ -33,7 +33,7 @@ "\n", "### New to ValidMind?\n", "\n", - "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models, find code samples, or read our developer reference.\n", + "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models and running tests, as well as find code samples and our Python Library API reference.\n", "\n", "
For access to all features available in this notebook, create a free ValidMind account.\n", "

\n", diff --git a/notebooks/code_samples/ongoing_monitoring/application_scorecard_ongoing_monitoring.ipynb b/notebooks/code_samples/ongoing_monitoring/application_scorecard_ongoing_monitoring.ipynb index e4e48884d..04ca7aa1c 100644 --- a/notebooks/code_samples/ongoing_monitoring/application_scorecard_ongoing_monitoring.ipynb +++ b/notebooks/code_samples/ongoing_monitoring/application_scorecard_ongoing_monitoring.ipynb @@ -33,7 +33,7 @@ "\n", "### New to ValidMind?\n", "\n", - "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models, find code samples, or read our developer reference.\n", + "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models and running tests, as well as find code samples and our Python Library API reference.\n", "\n", "
For access to all features available in this notebook, create a free ValidMind account.\n", "

\n", diff --git a/notebooks/code_samples/ongoing_monitoring/quickstart_customer_churn_ongoing_monitoring.ipynb b/notebooks/code_samples/ongoing_monitoring/quickstart_customer_churn_ongoing_monitoring.ipynb index 156f3fb14..5396ee1aa 100644 --- a/notebooks/code_samples/ongoing_monitoring/quickstart_customer_churn_ongoing_monitoring.ipynb +++ b/notebooks/code_samples/ongoing_monitoring/quickstart_customer_churn_ongoing_monitoring.ipynb @@ -74,7 +74,7 @@ "\n", "### New to ValidMind?\n", "\n", - "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models, find code samples, or read our developer reference.\n", + "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models and running tests, as well as find code samples and our Python Library API reference.\n", "\n", "
For access to all features available in this notebook, create a free ValidMind account.\n", "

\n", diff --git a/notebooks/code_samples/quickstart_customer_churn_full_suite.ipynb b/notebooks/code_samples/quickstart_customer_churn_full_suite.ipynb index 21c1aa17e..d7a9c6b87 100644 --- a/notebooks/code_samples/quickstart_customer_churn_full_suite.ipynb +++ b/notebooks/code_samples/quickstart_customer_churn_full_suite.ipynb @@ -75,7 +75,7 @@ "\n", "### New to ValidMind?\n", "\n", - "If you haven't already seen our [Get started with the ValidMind Library](https://docs.validmind.ai/developer/get-started-validmind-library.html), we recommend you explore the available resources for developers at some point. There, you can learn more about documenting models, find code samples, or read our developer reference.\n", + "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models and running tests, as well as find code samples and our Python Library API reference.\n", "\n", "
For access to all features available in this notebook, create a free ValidMind account.\n", "

\n", @@ -545,9 +545,9 @@ }, "gpuClass": "standard", "kernelspec": { - "display_name": "Python 3", + "display_name": "ValidMind Library", "language": "python", - "name": "python3" + "name": "validmind" }, "language_info": { "codemirror_mode": { diff --git a/notebooks/code_samples/time_series/quickstart_time_series_full_suite.ipynb b/notebooks/code_samples/time_series/quickstart_time_series_full_suite.ipynb index edd3ca9b5..c674ffd42 100644 --- a/notebooks/code_samples/time_series/quickstart_time_series_full_suite.ipynb +++ b/notebooks/code_samples/time_series/quickstart_time_series_full_suite.ipynb @@ -77,7 +77,7 @@ "\n", "### New to ValidMind?\n", "\n", - "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models, find code samples, or read our developer reference.\n", + "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models and running tests, as well as find code samples and our Python Library API reference.\n", "\n", "
For access to all features available in this notebook, create a free ValidMind account.\n", "

\n", diff --git a/notebooks/code_samples/time_series/quickstart_time_series_high_code.ipynb b/notebooks/code_samples/time_series/quickstart_time_series_high_code.ipynb index 8873b8524..8418dcd0a 100644 --- a/notebooks/code_samples/time_series/quickstart_time_series_high_code.ipynb +++ b/notebooks/code_samples/time_series/quickstart_time_series_high_code.ipynb @@ -77,7 +77,7 @@ "\n", "### New to ValidMind?\n", "\n", - "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models, find code samples, or read our developer reference.\n", + "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models and running tests, as well as find code samples and our Python Library API reference.\n", "\n", "
For access to all features available in this notebook, create a free ValidMind account.\n", "

\n", diff --git a/notebooks/code_sharing/operational_deposit/operational_deposit_poc.ipynb b/notebooks/code_sharing/operational_deposit/operational_deposit_poc.ipynb index 9fc708325..b98aa9d67 100644 --- a/notebooks/code_sharing/operational_deposit/operational_deposit_poc.ipynb +++ b/notebooks/code_sharing/operational_deposit/operational_deposit_poc.ipynb @@ -37,7 +37,7 @@ "\n", "### New to ValidMind?\n", "\n", - "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models, find code samples, or read our developer reference.\n", + "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models and running tests, as well as find code samples and our Python Library API reference.\n", "\n", "
For access to all features available in this notebook, create a free ValidMind account.\n", "

\n", diff --git a/notebooks/code_sharing/output_templates/customizing_tests_with_output_templates.ipynb b/notebooks/code_sharing/output_templates/customizing_tests_with_output_templates.ipynb index 980529dd4..c787a8831 100644 --- a/notebooks/code_sharing/output_templates/customizing_tests_with_output_templates.ipynb +++ b/notebooks/code_sharing/output_templates/customizing_tests_with_output_templates.ipynb @@ -85,7 +85,7 @@ "\n", "### New to ValidMind?\n", "\n", - "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models, find code samples, or read our developer reference.\n", + "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models and running tests, as well as find code samples and our Python Library API reference.\n", "\n", "
For access to all features available in this notebook, create a free ValidMind account.\n", "

\n", diff --git a/notebooks/how_to/add_context_to_llm_descriptions.ipynb b/notebooks/how_to/add_context_to_llm_descriptions.ipynb index 6951e5285..45736d2c7 100644 --- a/notebooks/how_to/add_context_to_llm_descriptions.ipynb +++ b/notebooks/how_to/add_context_to_llm_descriptions.ipynb @@ -23,7 +23,7 @@ "- [Initialize the Python environment](#toc3_) \n", "- [Load the sample dataset](#toc4_) \n", " - [Preprocess the raw dataset](#toc4_1_) \n", - "- [Initialize the ValidMind objects](#toc5_) \n", + "- [Initializing the ValidMind objects](#toc5_) \n", " - [Initialize the datasets](#toc5_1_) \n", " - [Initialize a model object](#toc5_2_) \n", " - [Assign predictions to the datasets](#toc5_3_) \n", @@ -230,7 +230,7 @@ "source": [ "\n", "\n", - "## Initialize the ValidMind objects" + "## Initializing the ValidMind objects" ] }, { @@ -349,7 +349,7 @@ "\n", "By default, custom context for LLM-generated descriptions is disabled, meaning that the output will not include any additional context.\n", "\n", - "Let's generate an initial test description for the `DatasetDescription` test for comparision with later iterations:" + "Let's generate an initial test description for the `DatasetDescription` test for comparison with later iterations:" ] }, { @@ -376,7 +376,7 @@ "\n", "To enable custom use case context, set the `VALIDMIND_LLM_DESCRIPTIONS_CONTEXT_ENABLED` environment variable to `1`.\n", "\n", - "This is a global setting that will affect all tests for your linked model:" + "This is a global setting that will affect all tests for your linked model for the duration of your ValidMind Library session:" ] }, { @@ -431,7 +431,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "With the use case context set, generate an updated test description for the `DatasetDescription` test for comparision with default output:" + "With the use case context set, generate an updated test description for the `DatasetDescription` test for comparison with default output:" ] }, { @@ -458,7 +458,7 @@ "\n", "To disable custom use case context, set the `VALIDMIND_LLM_DESCRIPTIONS_CONTEXT_ENABLED` environment variable to `0`.\n", "\n", - "This is a global setting that will affect all tests for your linked model:" + "This is a global setting that will affect all tests for your linked model for the duration of your ValidMind Library session:" ] }, { @@ -474,7 +474,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "With the use case context disabled again, generate another test description for the `DatasetDescription` test for comparision with previous custom output:" + "With the use case context disabled again, generate another test description for the `DatasetDescription` test for comparison with previous custom output:" ] }, { diff --git a/notebooks/how_to/configure_dataset_features.ipynb b/notebooks/how_to/configure_dataset_features.ipynb index 9bf927740..2e381bd89 100644 --- a/notebooks/how_to/configure_dataset_features.ipynb +++ b/notebooks/how_to/configure_dataset_features.ipynb @@ -67,7 +67,7 @@ "\n", "### New to ValidMind?\n", "\n", - "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models, find code samples, or read our developer reference.\n", + "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models and running tests, as well as find code samples and our Python Library API reference.\n", "\n", "
For access to all features available in this notebook, create a free ValidMind account.\n", "

\n", diff --git a/notebooks/how_to/document_multiple_results_for_the_same_test.ipynb b/notebooks/how_to/document_multiple_results_for_the_same_test.ipynb index 8dc4ab10d..2529727af 100644 --- a/notebooks/how_to/document_multiple_results_for_the_same_test.ipynb +++ b/notebooks/how_to/document_multiple_results_for_the_same_test.ipynb @@ -82,7 +82,7 @@ "\n", "### New to ValidMind?\n", "\n", - "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models, find code samples, or read our developer reference.\n", + "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models and running tests, as well as find code samples and our Python Library API reference.\n", "\n", "
For access to all features available in this notebook, create a free ValidMind account.\n", "

\n", diff --git a/notebooks/how_to/explore_test_suites.ipynb b/notebooks/how_to/explore_test_suites.ipynb index 7cb5e2e49..4fb36d894 100644 --- a/notebooks/how_to/explore_test_suites.ipynb +++ b/notebooks/how_to/explore_test_suites.ipynb @@ -63,7 +63,7 @@ "\n", "### New to ValidMind?\n", "\n", - "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models, find code samples, or read our developer reference.\n", + "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models and running tests, as well as find code samples and our Python Library API reference.\n", "\n", "
For access to all features available in this notebook, create a free ValidMind account.\n", "

\n", diff --git a/notebooks/how_to/load_datasets_predictions.ipynb b/notebooks/how_to/load_datasets_predictions.ipynb index 81c4e0e98..d0f629307 100644 --- a/notebooks/how_to/load_datasets_predictions.ipynb +++ b/notebooks/how_to/load_datasets_predictions.ipynb @@ -79,7 +79,7 @@ "\n", "### New to ValidMind?\n", "\n", - "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models, find code samples, or read our developer reference.\n", + "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models and running tests, as well as find code samples and our Python Library API reference.\n", "\n", "
For access to all features available in this notebook, create a free ValidMind account.\n", "

\n", diff --git a/notebooks/how_to/log_metrics_over_time.ipynb b/notebooks/how_to/log_metrics_over_time.ipynb index d551d58ff..bbf4b94cb 100644 --- a/notebooks/how_to/log_metrics_over_time.ipynb +++ b/notebooks/how_to/log_metrics_over_time.ipynb @@ -78,7 +78,7 @@ "\n", "\n", "### New to ValidMind?\n", - "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models, find code samples, or read our developer reference.\n", + "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models and running tests, as well as find code samples and our Python Library API reference.\n", "\n", "
For access to all features available in this notebook, create a free ValidMind account.\n", "

\n", diff --git a/notebooks/how_to/run_documentation_sections.ipynb b/notebooks/how_to/run_documentation_sections.ipynb index b7b43e379..9066d637f 100644 --- a/notebooks/how_to/run_documentation_sections.ipynb +++ b/notebooks/how_to/run_documentation_sections.ipynb @@ -73,7 +73,7 @@ "\n", "### New to ValidMind?\n", "\n", - "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models, find code samples, or read our developer reference.\n", + "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models and running tests, as well as find code samples and our Python Library API reference.\n", "\n", "
For access to all features available in this notebook, create a free ValidMind account.\n", "

\n", diff --git a/notebooks/how_to/run_documentation_tests_with_config.ipynb b/notebooks/how_to/run_documentation_tests_with_config.ipynb index 0eea64a46..8ad291fe8 100644 --- a/notebooks/how_to/run_documentation_tests_with_config.ipynb +++ b/notebooks/how_to/run_documentation_tests_with_config.ipynb @@ -77,7 +77,7 @@ "\n", "### New to ValidMind?\n", "\n", - "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models, find code samples, or read our developer reference.\n", + "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models and running tests, as well as find code samples and our Python Library API reference.\n", "\n", "
For access to all features available in this notebook, create a free ValidMind account.\n", "

\n", diff --git a/notebooks/how_to/run_tests/1_run_dataset_based_tests.ipynb b/notebooks/how_to/run_tests/1_run_dataset_based_tests.ipynb index dfbc4a0de..acfb923f8 100644 --- a/notebooks/how_to/run_tests/1_run_dataset_based_tests.ipynb +++ b/notebooks/how_to/run_tests/1_run_dataset_based_tests.ipynb @@ -72,7 +72,7 @@ "\n", "\n", "### New to ValidMind?\n", - "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models, find code samples, or read our developer reference.\n", + "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models and running tests, as well as find code samples and our Python Library API reference.\n", "\n", "
For access to all features available in this notebook, create a free ValidMind account.\n", "

\n", diff --git a/notebooks/how_to/run_tests/2_run_comparison_tests.ipynb b/notebooks/how_to/run_tests/2_run_comparison_tests.ipynb index 9370fc98b..ab8880c4c 100644 --- a/notebooks/how_to/run_tests/2_run_comparison_tests.ipynb +++ b/notebooks/how_to/run_tests/2_run_comparison_tests.ipynb @@ -79,7 +79,7 @@ "\n", "\n", "### New to ValidMind?\n", - "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models, find code samples, or read our developer reference.\n", + "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models and running tests, as well as find code samples and our Python Library API reference.\n", "\n", "
For access to all features available in this notebook, create a free ValidMind account.\n", "

\n", diff --git a/notebooks/how_to/run_tests_that_require_multiple_datasets.ipynb b/notebooks/how_to/run_tests_that_require_multiple_datasets.ipynb index 9fde46220..184594bf2 100644 --- a/notebooks/how_to/run_tests_that_require_multiple_datasets.ipynb +++ b/notebooks/how_to/run_tests_that_require_multiple_datasets.ipynb @@ -75,7 +75,7 @@ "\n", "### New to ValidMind?\n", "\n", - "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models, find code samples, or read our developer reference.\n", + "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models and running tests, as well as find code samples and our Python Library API reference.\n", "\n", "
For access to all features available in this notebook, create a free ValidMind account.\n", "

\n", diff --git a/notebooks/how_to/run_unit_metrics.ipynb b/notebooks/how_to/run_unit_metrics.ipynb index c6469e7ac..163306ae3 100644 --- a/notebooks/how_to/run_unit_metrics.ipynb +++ b/notebooks/how_to/run_unit_metrics.ipynb @@ -104,7 +104,7 @@ "\n", "### New to ValidMind? \n", "\n", - "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models, find code samples, or read our developer reference.\n", + "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models and running tests, as well as find code samples and our Python Library API reference.\n", "\n", "
For access to all features available in this notebook, create a free ValidMind account.\n", "

\n", diff --git a/notebooks/how_to/use_dataset_model_objects.ipynb b/notebooks/how_to/use_dataset_model_objects.ipynb index 44ef1b151..abf449cb0 100644 --- a/notebooks/how_to/use_dataset_model_objects.ipynb +++ b/notebooks/how_to/use_dataset_model_objects.ipynb @@ -79,7 +79,7 @@ "\n", "### New to ValidMind?\n", "\n", - "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models, find code samples, or read our developer reference.\n", + "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models and running tests, as well as find code samples and our Python Library API reference.\n", "\n", "
For access to all features available in this notebook, create a free ValidMind account.\n", "

\n", diff --git a/notebooks/templates/about-validmind.ipynb b/notebooks/templates/about-validmind.ipynb index 1c135f268..e982e60d0 100644 --- a/notebooks/templates/about-validmind.ipynb +++ b/notebooks/templates/about-validmind.ipynb @@ -31,7 +31,7 @@ "source": [ "### New to ValidMind?\n", "\n", - "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models, find code samples, or read our developer reference.\n", + "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models and running tests, as well as find code samples and our Python Library API reference.\n", "\n", "
For access to all features available in this notebook, create a free ValidMind account.\n", "

\n", diff --git a/notebooks/tutorials/intro_for_model_developers.ipynb b/notebooks/tutorials/intro_for_model_developers.ipynb index 95e1b19f2..b3d62dae2 100644 --- a/notebooks/tutorials/intro_for_model_developers.ipynb +++ b/notebooks/tutorials/intro_for_model_developers.ipynb @@ -4,1789 +4,26 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# ValidMind Introduction for Model Developers\n", + "# ValidMind introduction for model developers\n", "\n", - "As a model developer, learn how the end-to-end documentation process works based on common scenarios you encounter in model development settings.\n", + "Learn how to use ValidMind for your end-to-end model documentation process based on common model development scenarios with our *ValidMind for model development* series of four introductory notebooks:\n", "\n", - "As a prerequisite, a model documentation template must be available on the ValidMind Platform. You can [view the available templates](https://docs.validmind.com/guide/model-documentation/swap-documentation-templates.html#view-current-templates) to see what templates been set up for your organization.\n", + "1. [101 Set up the ValidMind Library](/notebooks/tutorials/model_development/101-set_up_validmind.ipynb)\n", + "2. [102 Start the model development process](/notebooks/tutorials/model_development/102-start_development_process.ipynb)\n", + "3. [103 Integrate custom tests](/notebooks/tutorials/model_development/103-integrate_custom_tests.ipynb)\n", + "4. [104 Finalize testing and documentation](/notebooks/tutorials/model_development/104-finalize_testing_documentation.ipynb)\n", "\n", - "This notebook uses a binary classification model as an example, but the same principles shown here apply to other model types.\n", - "\n", - "\n", - "\n", - "## Overview of the notebook\n", - "\n", - "**1. Initializing the ValidMind Library**\n", - "\n", - "The ValidMind Library provides a rich collection of documentation tools and test suites, from documenting descriptions of datasets to validation and testing of models using a variety of open-source testing frameworks.\n", - "\n", - "**2. Start the model development process with raw data, run out-of-the box tests, and add evidence to model documentation**\n", - "\n", - "Learn how to access ValidMind's test repository of individual tests that you will use as building blocks to ensure a model is being built appropriately. The goal is to show how to run tests, investigate results, and add tests results or evidence to the documentation.\n", - "\n", - "For a full list of out-of-the-box tests, see [Test descriptions](https://docs.validmind.ai/developer/model-testing/test-descriptions.html) or try the interactive [Test sandbox](https://docs.validmind.ai/developer/model-testing/test-sandbox.html).\n", - "\n", - "**3. Implementing custom tests**\n", - "\n", - "Usually, model developers have their own custom tests and it is important to include this within the model documentation. We will show you how to include custom tests and then how they can be added to documentation as additional evidence.\n", - "\n", - "**4. Finalize testing and documentation**\n", - "\n", - "Learn how you can ensure that model documentation includes custom tests and how to make test configuration changes that apply to all tests in the model documentation template. At the end of this section you should have a fully documented model ready for review.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Contents\n", - "\n", - "- [Overview of the notebook](#toc1_)\n", - "- [About ValidMind](#toc2_)\n", - " - [Before you begin](#toc2_1_)\n", - " - [New to ValidMind?](#toc2_2_)\n", - " - [Key concepts](#toc2_3_)\n", - "- [1. Initializing the ValidMind Library](#toc4_)\n", - " - [Install the ValidMind Library](#toc4_1_)\n", - " - [Initialize the ValidMind Library](#toc4_2_)\n", - " - [Get your code snippet](#toc4_2_1_)\n", - " - [Verify & preview the documentation template](#toc4_3_)\n", - "- [2. Start the model development process with raw data, run out-of-the box tests, and add evidence to model documentation](#toc5_)\n", - " - [Initialize the ValidMind datasets](#toc5_1_)\n", - " - [Run some tabular data tests](#toc5_2_)\n", - " - [Utilize test output](#toc5_3_)\n", - " - [Documenting the results based on two datasets](#toc5_4_)\n", - " - [Run `run_documentation_tests()` using `vm_raw_dataset_preprocessed` as input](#toc5_4_1_)\n", - " - [Log the individual result of the high correlation test that used `vm_balanced_raw_dataset` (that had a highly correlated `Age` column) as input](#toc5_4_2_)\n", - " - [Add individual test results to model documentation](#toc5_5_)\n", - " - [Model Testing](#toc5_6_)\n", - " - [Initialize model evaluation objects and assigning predictions](#toc5_7_)\n", - " - [Run the model evaluation tests](#toc5_8_)\n", - "- [3. Implementing custom tests](#toc6_)\n", - " - [Create a confusion matrix plot](#toc6_1_)\n", - " - [Add parameters to custom tests](#toc6_2_)\n", - " - [Pass parameters to custom tests](#toc6_3_)\n", - " - [Log the confusion matrix results](#toc6_4_)\n", - " - [Using external test providers](#toc6_5_)\n", - " - [Create a folder of custom tests from existing inline tests](#toc6_5_1_)\n", - " - [Save an inline test to a file](#toc6_5_2_)\n", - " - [Define and register a `LocalTestProvider` that points to that folder](#toc6_5_3_)\n", - " - [Initializing a local test provider](#toc6_6_)\n", - " - [Run test provider tests](#toc6_6_1_)\n", - " - [Add the test results to your documentation](#toc6_6_2_)\n", - "- [4. Finalize testing and documentation](#toc7_)\n", - " - [Use `run_documentation_tests()` to ensure custom test results are included in your documentation](#toc7_1_)\n", - " - [Viewing and updating the configuration for the entire model documentation template](#toc7_2_)\n", - " - [Update the config](#toc7_2_1_)\n", - "- [Where to go from here](#toc8_)\n", - " - [Use cases](#toc8_1_)\n", - " - [More how-to guides and code samples](#toc8_2_)\n", - " - [Discover more learning resources](#toc8_3_)\n", - "- [Upgrade ValidMind](#toc9_)\n", - "\n", - "\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "## About ValidMind\n", - "\n", - "ValidMind is a suite of tools for managing model risk, including risk associated with AI and statistical models. You use the ValidMind Library to automate documentation and validation tests, and then use the ValidMind Platform to collaborate on model documentation. Together, these products simplify model risk management, facilitate compliance with regulations and institutional standards, and enhance collaboration between yourself and model validators.\n", - "\n", - "\n", - "\n", - "### Before you begin\n", - "\n", - "This notebook assumes you have basic familiarity with Python, including an understanding of how functions work. If you are new to Python, you can still run the notebook but we recommend further familiarizing yourself with the language.\n", - "\n", - "If you encounter errors due to missing modules in your Python environment, install the modules with `pip install`, and then re-run the notebook. For more help, refer to [Installing Python Modules](https://docs.python.org/3/installing/index.html).\n", - "\n", - "\n", - "\n", - "### New to ValidMind?\n", - "\n", - "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models, find code samples, or read our developer reference.\n", - "\n", - "
For access to all features available in this notebook, create a free ValidMind account.\n", + "
Or, take our Developer Fundamentals course which walks you through the basics of ValidMind paired with this notebook series.\n", "

\n", - "Signing up is FREE — Register with ValidMind
\n", - "\n", - "\n", - "\n", - "### Key concepts\n", - "\n", - "**Model documentation**: A structured and detailed record pertaining to a model, encompassing key components such as its underlying assumptions, methodologies, data sources, inputs, performance metrics, evaluations, limitations, and intended uses. It serves to ensure transparency, adherence to regulatory requirements, and a clear understanding of potential risks associated with the model’s application.\n", - "\n", - "**Documentation template**: Functions as a test suite and lays out the structure of model documentation, segmented into various sections and sub-sections. Documentation templates define the structure of your model documentation, specifying the tests that should be run, and how the results should be displayed.\n", - "\n", - "**Tests**: A function contained in the ValidMind Library, designed to run a specific quantitative test on the dataset or model. Tests are the building blocks of ValidMind, used to evaluate and document models and datasets, and can be run individually or as part of a suite defined by your model documentation template.\n", - "\n", - "**Custom tests**: Custom tests are functions that you define to evaluate your model or dataset. These functions can be registered via the ValidMind Library to be used with the ValidMind Platform.\n", - "\n", - "**Inputs**: Objects to be evaluated and documented in the ValidMind Library. They can be any of the following:\n", - "\n", - "- **model**: A single model that has been initialized in ValidMind with [`vm.init_model()`](https://docs.validmind.ai/validmind/validmind.html#init_model).\n", - "- **dataset**: Single dataset that has been initialized in ValidMind with [`vm.init_dataset()`](https://docs.validmind.ai/validmind/validmind.html#init_dataset).\n", - "- **models**: A list of ValidMind models - usually this is used when you want to compare multiple models in your custom test.\n", - "- **datasets**: A list of ValidMind datasets - usually this is used when you want to compare multiple datasets in your custom test. See this [example](https://docs.validmind.ai/notebooks/how_to/run_tests_that_require_multiple_datasets.html) for more information.\n", - "\n", - "**Parameters**: Additional arguments that can be passed when running a ValidMind test, used to pass additional information to a test, customize its behavior, or provide additional context.\n", - "\n", - "**Outputs**: Custom tests can return elements like tables or plots. Tables may be a list of dictionaries (each representing a row) or a pandas DataFrame. Plots may be matplotlib or plotly figures.\n", - "\n", - "**Test suites**: Collections of tests designed to run together to automate and generate model documentation end-to-end for specific use-cases.\n", - "\n", - "Example: the [`classifier_full_suite`](https://docs.validmind.ai/validmind/validmind/test_suites/classifier.html#ClassifierFullSuite) test suite runs tests from the [`tabular_dataset`](https://docs.validmind.ai/validmind/validmind/test_suites/tabular_datasets.html) and [`classifier`](https://docs.validmind.ai/validmind/validmind/test_suites/classifier.html) test suites to fully document the data and model sections for binary classification model use-cases.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "## 1. Initializing the ValidMind Library\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "### Install the ValidMind Library\n", - "\n", - "Please note the following recommended Python versions to use:\n", - "\n", - "- Python 3.8 <= x <= 3.11\n", - "\n", - "To install the library:\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "metadata": {} - }, - "outputs": [], - "source": [ - "%pip install -q validmind" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "### Initialize the ValidMind Library\n", - "\n", - "ValidMind generates a unique _code snippet_ for each registered model to connect with your developer environment. You initialize the ValidMind Library with this code snippet, which ensures that your documentation and tests are uploaded to the correct model when you run the notebook.\n", - "\n", - "\n", - "\n", - "### Get your code snippet\n", - "\n", - "1. In a browser, [log in to ValidMind](https://docs.validmind.ai/guide/configuration/log-in-to-validmind.html).\n", - "\n", - "2. In the left sidebar, navigate to **Model Inventory** and click **+ Register Model**.\n", - "\n", - "3. Enter the model details and click **Continue**. ([Need more help?](https://docs.validmind.ai/guide/model-inventory/register-models-in-inventory.html))\n", - "\n", - " For example, to register a model for use with this notebook, select:\n", - "\n", - " - Documentation template: `Binary classification`\n", - " - Use case: `Marketing/Sales - Attrition/Churn Management`\n", - "\n", - " You can fill in other options according to your preference.\n", - "\n", - "4. Go to **Getting Started** and click **Copy snippet to clipboard**.\n", - "\n", - "Next, [load your model identifier credentials from an `.env` file](https://docs.validmind.ai/developer/model-documentation/store-credentials-in-env-file.html) or replace the placeholder with your own code snippet:\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "metadata": {} - }, - "outputs": [], - "source": [ - "# Load your model identifier credentials from an `.env` file\n", - "\n", - "%load_ext dotenv\n", - "%dotenv .env\n", - "\n", - "# Or replace with your code snippet\n", - "\n", - "import validmind as vm\n", - "\n", - "vm.init(\n", - " # api_host=\"...\",\n", - " # api_key=\"...\",\n", - " # api_secret=\"...\",\n", - " # model=\"...\",\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "### Verify & preview the documentation template\n", - "\n", - "Let's verify that you have connected to ValidMind and that the appropriate template is selected. A template predefines sections for your model documentation and provides a general outline to follow, making the documentation process much easier.\n", - "\n", - "You will upload documentation and test results for this template later on. For now, take a look at the structure that the template provides with the `vm.preview_template()` function from the ValidMind library and note the empty sections:\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "metadata": {} - }, - "outputs": [], - "source": [ - "vm.preview_template()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Before learning how to run tests, let's explore the list of all available tests in the ValidMind Library. You can see that the documentation template for this model has references to some of the test IDs listed below.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "metadata": {} - }, - "outputs": [], - "source": [ - "vm.tests.list_tests()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "## 2. Start the model development process with raw data, run out-of-the box tests, and add evidence to model documentation\n", - "\n", - "In this section you learn how to explore the individual tests available in ValidMind and how to run them and change parameters as necessary. You will use a public dataset from Kaggle that models a bank customer churn prediction use case. The target column, `Exited` has a value of `1` when a customer has churned and `0` otherwise.\n", - "\n", - "You can find more information about this dataset [here](https://www.kaggle.com/datasets/shantanudhakadd/bank-customer-churn-prediction).\n", - "\n", - "The ValidMind Library provides a wrapper to automatically load the dataset as a Pandas DataFrame object.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "metadata": {} - }, - "outputs": [], - "source": [ - "from validmind.datasets.classification import customer_churn as demo_dataset\n", - "\n", - "print(\n", - " f\"Loaded demo dataset with: \\n\\n\\t• Target column: '{demo_dataset.target_column}' \\n\\t• Class labels: {demo_dataset.class_labels}\"\n", - ")\n", - "\n", - "raw_df = demo_dataset.load_data()\n", - "raw_df.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's do some data quality assessments by running a few individual tests related to data assessment. You will use the `vm.tests.list_tests()` function introduced above in combination with `vm.tests.list_tags()` and `vm.tests.list_tasks()` to find which prebuilt tests are relevant for data quality assessment.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "metadata": {} - }, - "outputs": [], - "source": [ - "# Get the list of available tags\n", - "sorted(vm.tests.list_tags())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "metadata": {} - }, - "outputs": [], - "source": [ - "# Get the list of available task types\n", - "sorted(vm.tests.list_tasks())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can pass `tags` and `tasks` as parameters to the `vm.tests.list_tests()` function to filter the tests based on the tags and task types. For example, to find tests related to tabular data quality for classification models, you can call `list_tests()` like this:\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "metadata": {} - }, - "outputs": [], - "source": [ - "vm.tests.list_tests(task=\"classification\", tags=[\"tabular_data\", \"data_quality\"])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "### Initialize the ValidMind datasets\n", - "\n", - "Now, assume we have identified some tests we want to run with regards to the data we are intending to use. The next step is to connect your data with a ValidMind `Dataset` object. This step is always necessary every time you want to connect a dataset to documentation and produce test results through ValidMind. You only need to do it one time per dataset.\n", - "\n", - "You can initialize a ValidMind dataset object using the [`init_dataset`](https://docs.validmind.ai/validmind/validmind.html#init_dataset) function from the ValidMind (`vm`) module.\n", - "\n", - "This function takes a number of arguments:\n", - "\n", - "- `dataset` — the raw dataset that you want to provide as input to tests\n", - "- `input_id` - a unique identifier that allows tracking what inputs are used when running each individual test\n", - "- `target_column` — a required argument if tests require access to true values. This is the name of the target column in the dataset\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "metadata": {} - }, - "outputs": [], - "source": [ - "# vm_raw_dataset is now a VMDataset object that you can pass to any ValidMind test\n", - "vm_raw_dataset = vm.init_dataset(\n", - " dataset=raw_df,\n", - " input_id=\"raw_dataset\",\n", - " target_column=\"Exited\",\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "### Run some tabular data tests\n", - "\n", - "Individual tests can be easily run by calling the `run_test` function provided by the `validmind.tests` module. The function takes the following arguments:\n", - "\n", - "- `test_id`: The ID of the test to run. To find a particular test and get its ID, refer to the [explore_tests](../how_to/explore_tests.ipynb) notebook. Look above for example after running 'vm.test_suites.describe_suite' as column 'Test ID' will contain the id.\n", - "- `params`: A dictionary of parameters for the test. These will override any `default_params` set in the test definition. Refer to the [explore_tests](../how_to/explore_tests.ipynb) notebook to find the default parameters for a test. See below for examples.\n", - "\n", - "The inputs expected by a test can also be found in the test definition. Let's take `validmind.data_validation.DescriptiveStatistics` as an example. Note that the output of the `describe_test()` function below shows that this test expects a `dataset` as input:\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "metadata": {} - }, - "outputs": [], - "source": [ - "vm.tests.describe_test(\"validmind.data_validation.DescriptiveStatistics\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now, let's run a few tests to assess the quality of the dataset.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "metadata": {} - }, - "outputs": [], - "source": [ - "result = vm.tests.run_test(\n", - " test_id=\"validmind.data_validation.DescriptiveStatistics\",\n", - " inputs={\"dataset\": vm_raw_dataset},\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "metadata": {} - }, - "outputs": [], - "source": [ - "result2 = vm.tests.run_test(\n", - " test_id=\"validmind.data_validation.ClassImbalance\",\n", - " inputs={\"dataset\": vm_raw_dataset},\n", - " params={\"min_percent_threshold\": 30},\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can see that the class imbalance test did not pass according to the value of `min_percent_threshold` we have set. Here is how you can re-run the test on some processed data to address this data quality issue. In this case we apply a very simple rebalancing technique to the dataset.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "metadata": {} - }, - "outputs": [], - "source": [ - "import pandas as pd\n", - "\n", - "raw_copy_df = raw_df.sample(frac=1) # Create a copy of the raw dataset\n", - "\n", - "# Create a balanced dataset with the same number of exited and not exited customers\n", - "exited_df = raw_copy_df.loc[raw_copy_df[\"Exited\"] == 1]\n", - "not_exited_df = raw_copy_df.loc[raw_copy_df[\"Exited\"] == 0].sample(n=exited_df.shape[0])\n", - "\n", - "balanced_raw_df = pd.concat([exited_df, not_exited_df])\n", - "balanced_raw_df = balanced_raw_df.sample(frac=1, random_state=42)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "With this new raw dataset, you can re-run the individual test to see if it passes the class imbalance test requirement. Remember to register new VM Dataset object since that is the type of input required by `run_test()`:\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "metadata": {} - }, - "outputs": [], - "source": [ - "# Register new data and now 'balanced_raw_dataset' is the new dataset object of interest\n", - "vm_balanced_raw_dataset = vm.init_dataset(\n", - " dataset=balanced_raw_df,\n", - " input_id=\"balanced_raw_dataset\",\n", - " target_column=\"Exited\",\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "metadata": {} - }, - "outputs": [], - "source": [ - "result = vm.tests.run_test(\n", - " test_id=\"validmind.data_validation.ClassImbalance\",\n", - " inputs={\"dataset\": vm_balanced_raw_dataset},\n", - " params={\"min_percent_threshold\": 30},\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "### Utilize test output\n", - "\n", - "Here is an example for how you can utilize the output from a ValidMind test for futher use, for example, if you want to remove highly correlated features. The example below shows how you can get the list of features with the highest correlation coefficients and use them to reduce the final list of features for modeling.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "metadata": {} - }, - "outputs": [], - "source": [ - "corr_result = vm.tests.run_test(\n", - " test_id=\"validmind.data_validation.HighPearsonCorrelation\",\n", - " params={\"max_threshold\": 0.3},\n", - " inputs={\"dataset\": vm_balanced_raw_dataset},\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's assume we want to remove highly correlated features from the dataset. `corr_result` is an object of type `TestResult`. We can inspect the result object to see what the test has produced." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "metadata": {} - }, - "outputs": [], - "source": [ - "print(type(corr_result))\n", - "print(\"Result ID: \", corr_result.result_id)\n", - "print(\"Params: \", corr_result.params)\n", - "print(\"Passed: \", corr_result.passed)\n", - "print(\"Tables: \", corr_result.tables)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's check out the table in the result and extract a list of features that failed the test:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "metadata": {} - }, - "outputs": [], - "source": [ - "features_df = corr_result.tables[0].data\n", - "features_df" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Remove the highly correlated features and create a new VM dataset object. Note the use of different `input_id`s. This allows tracking the inputs used when running each individual test.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "metadata": {} - }, - "outputs": [], - "source": [ - "high_correlation_features = features_df[features_df[\"Pass/Fail\"] == \"Fail\"][\"Columns\"].tolist()\n", - "high_correlation_features" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Extract the feature names from the list of strings (e.g. '(Age, Exited)' -> 'Age')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "high_correlation_features = [feature.split(\",\")[0].strip(\"()\") for feature in high_correlation_features]\n", - "high_correlation_features" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "metadata": { - "metadata": {} - }, - "outputs": [], - "source": [ - "# Remove the highly correlated features from the dataset\n", - "balanced_raw_no_age_df = balanced_raw_df.drop(columns=high_correlation_features)\n", - "\n", - "# Re-initialize the dataset object\n", - "vm_raw_dataset_preprocessed = vm.init_dataset(\n", - " dataset=balanced_raw_no_age_df,\n", - " input_id=\"raw_dataset_preprocessed\",\n", - " target_column=\"Exited\",\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Re-running the test with the reduced feature set should pass the test. You can also plot the correlation matrix to visualize the new correlation between features:\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "metadata": {} - }, - "outputs": [], - "source": [ - "corr_result = vm.tests.run_test(\n", - " test_id=\"validmind.data_validation.HighPearsonCorrelation\",\n", - " params={\"max_threshold\": 0.3},\n", - " inputs={\"dataset\": vm_raw_dataset_preprocessed},\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "metadata": {} - }, - "outputs": [], - "source": [ - "corr_result = vm.tests.run_test(\n", - " test_id=\"validmind.data_validation.PearsonCorrelationMatrix\",\n", - " inputs={\"dataset\": vm_raw_dataset_preprocessed},\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "### Documenting the results based on two datasets\n", - "\n", - "We have now done some analysis on two different datasets and we should able to document why certain things were done to the raw data with testing to support it. Every test result returned by the `run_test()` function has a `.log()` method that can be used to log the test results to ValidMind. When logging individual results to ValidMind you need to manually add those results in a specific section of the model documentation.\n", - "\n", - "When using `run_documentation_tests()`, it's possible to automatically populate a section with the results of all tests that were registered in the documentation template.\n", - "\n", - "To show how to add individual results to any documentation section, we're going to populate the entire `data_preparation` section of the documentation using the clean `vm_raw_dataset_preprocessed` dataset as input, and then we're going to document an additional result for the highly correlated dataset `vm_balanced_raw_dataset`. The following two steps will accomplish this:\n", - "\n", - "1. Run `run_documentation_tests()` using `vm_raw_dataset_preprocessed` as input. This populates the entire data preparation section for every test that is already part of the documentation template.\n", - "2. Log the individual result of the high correlation test that used `vm_balanced_raw_dataset` (that had a highly correlated `Age` column) as input\n", - "\n", - "After adding the result of step #2 to the documentation you will be able to explain the changes made to the raw data by editing the default description of the test result within the ValidMind Platform.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "#### Run `run_documentation_tests()` using `vm_raw_dataset_preprocessed` as input\n", - "\n", - "`run_documentation_tests()` allows you to run multiple tests at once and log the results to the documentation. The function takes the following arguments:\n", - "\n", - "- `inputs`: any inputs to be passed to the tests\n", - "- `config`: a dictionary `:` that allows configuring each test individually. Each test config has the following form:\n", - " - `params`: individual test parameters\n", - " - `inputs`: individual test inputs. When passed, this overrides any inputs passed from the `run_documentation_tests()` function\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "metadata": {} - }, - "outputs": [], - "source": [ - "test_config = {\n", - " \"validmind.data_validation.ClassImbalance\": {\n", - " \"params\": {\"min_percent_threshold\": 30},\n", - " },\n", - " \"validmind.data_validation.HighPearsonCorrelation\": {\n", - " \"params\": {\"max_threshold\": 0.3},\n", - " },\n", - "}\n", - "\n", - "tests_suite = vm.run_documentation_tests(\n", - " inputs={\n", - " \"dataset\": vm_raw_dataset_preprocessed,\n", - " },\n", - " config=test_config,\n", - " section=[\"data_preparation\"],\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "#### Log the individual result of the high correlation test that used `vm_balanced_raw_dataset` (that had a highly correlated `Age` column) as input\n", - "\n", - "Here you can use a custom `result_id` to tag the individual result with a unique identifier. This `result_id` can be appended to `test_id` with a `:` separator. The `balanced_raw_dataset` result identifier will correspond to the `balanced_raw_dataset` input, the dataset that still has the `Age` column.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "metadata": {} - }, - "outputs": [], - "source": [ - "result = vm.tests.run_test(\n", - " test_id=\"validmind.data_validation.HighPearsonCorrelation:balanced_raw_dataset\",\n", - " params={\"max_threshold\": 0.3},\n", - " inputs={\"dataset\": vm_balanced_raw_dataset},\n", - ")\n", - "result.log()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "### Add individual test results to model documentation\n", - "\n", - "You can now visit the documentation page for the model you connected to at the beginning of this notebook and add a new content block in the relevant section.\n", - "\n", - "To do this, go to the documentation page of your model and navigate to the `Data Preparation` -> `Correlations and Interactions` section. Then hover after the \"Pearson Correlation Matrix\" content block to reveal the `+` button as shown in the screenshot below.\n", - "\n", - "![screenshot showing insert button for test-driven blocks](../images/insert-test-driven-block-correlations.png)\n", - "\n", - "Click on the `+` button and select `Test-Driven Block`. This will open a dialog where you can select `Threshold Test` as the type of the test-driven content block, and then select `High Pearson Correlation Vm Raw Dataset Test`. This will show a preview of the result and it should match the results shown above.\n", - "\n", - "![screenshot showing the selected test result in the dialog](../images/selecting-high-pearson-correlation-test.png)\n", - "\n", - "Finally, click on the `Insert block` button to add the test result to the documentation. You'll now see two individual results for the high correlation test in the `Correlations and Interactions` section of the documentation. To finalize the documentation, you can edit the test result's description block to explain the changes made to the raw data and the reasons behind them as we can see in the screenshot below.\n", - "\n", - "![screenshot showing the high pearson correlation block](../images/high-pearson-correlation-block.png)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "### Model Testing\n", - "\n", - "We have focused so far on the data assessment and pre-processing that usually occurs prior to any models being built. Now we are going to assume we have built a model and we want to incorporate some model results in our documentation.\n", - "\n", - "Let's train a simple logistic regression model on the dataset and evaluate its performance. You will use the `LogisticRegression` class from the `sklearn.linear_model` and use ValidMind tests to evaluate the model's performance.\n", - "\n", - "Before training the model, we need to encode the categorical features in the dataset. You will use the `OneHotEncoder` class from the `sklearn.preprocessing` module to encode the categorical features. The categorical features in the dataset are `Geography` and `Gender`.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "metadata": {} - }, - "outputs": [], - "source": [ - "balanced_raw_no_age_df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "metadata": {} - }, - "outputs": [], - "source": [ - "balanced_raw_no_age_df = pd.get_dummies(\n", - " balanced_raw_no_age_df, columns=[\"Geography\", \"Gender\"], drop_first=True\n", - ")\n", - "balanced_raw_no_age_df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "metadata": { - "metadata": {} - }, - "outputs": [], - "source": [ - "from sklearn.linear_model import LogisticRegression\n", - "from sklearn.model_selection import train_test_split\n", - "\n", - "# Split the input and target variables\n", - "X = balanced_raw_no_age_df.drop(\"Exited\", axis=1)\n", - "y = balanced_raw_no_age_df[\"Exited\"]\n", - "X_train, X_test, y_train, y_test = train_test_split(\n", - " X,\n", - " y,\n", - " test_size=0.2,\n", - " random_state=42,\n", - ")\n", - "\n", - "# Logistic Regression grid params\n", - "log_reg_params = {\n", - " \"penalty\": [\"l1\", \"l2\"],\n", - " \"C\": [0.001, 0.01, 0.1, 1, 10, 100, 1000],\n", - " \"solver\": [\"liblinear\"],\n", - "}\n", - "\n", - "# Grid search for Logistic Regression\n", - "from sklearn.model_selection import GridSearchCV\n", - "\n", - "grid_log_reg = GridSearchCV(LogisticRegression(), log_reg_params)\n", - "grid_log_reg.fit(X_train, y_train)\n", - "\n", - "# Logistic Regression best estimator\n", - "log_reg = grid_log_reg.best_estimator_" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "### Initialize model evaluation objects and assigning predictions\n", - "\n", - "The last step for evaluating the model's performance is to initialize the ValidMind `Dataset` and `Model` objects and assign model predictions to each dataset. You will use the `init_dataset`, `init_model` and `assign_predictions` functions to initialize these objects.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "metadata": { - "metadata": {} - }, - "outputs": [], - "source": [ - "train_df = X_train\n", - "train_df[\"Exited\"] = y_train\n", - "test_df = X_test\n", - "test_df[\"Exited\"] = y_test\n", - "\n", - "vm_train_ds = vm.init_dataset(\n", - " input_id=\"train_dataset_final\",\n", - " dataset=train_df,\n", - " target_column=\"Exited\",\n", - ")\n", - "\n", - "vm_test_ds = vm.init_dataset(\n", - " input_id=\"test_dataset_final\",\n", - " dataset=test_df,\n", - " target_column=\"Exited\",\n", - ")\n", - "\n", - "# Register the model\n", - "vm_model = vm.init_model(log_reg, input_id=\"log_reg_model_v1\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Once the model has been registered you can assign model predictions to the training and test datasets. The `assign_predictions()` method from the `Dataset` object can link existing predictions to any number of models. If no prediction values are passed, the method will compute predictions automatically:\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "metadata": {} - }, - "outputs": [], - "source": [ - "vm_train_ds.assign_predictions(model=vm_model)\n", - "vm_test_ds.assign_predictions(model=vm_model)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "### Run the model evaluation tests\n", - "\n", - "In this part, we focus on running the tests within the model development section of the model documentation. Only tests associated with this section will be executed, and the corresponding results will be updated in the model documentation. In the example below, you will focus on only running tests for the `model development` section of the document.\n", - "\n", - "Note the additional config that is passed to `run_documentation_tests()`. This allows you to override inputs or params in certain tests. In our case, we want to explicitly use the `vm_train_ds` for the `validmind.model_validation.sklearn.ClassifierPerformance:in_sample` test, since it's supposed to run on the training dataset and not the test dataset.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "metadata": {} - }, - "outputs": [], - "source": [ - "test_config = {\n", - " \"validmind.model_validation.sklearn.ClassifierPerformance:in_sample\": {\n", - " \"inputs\": {\n", - " \"dataset\": vm_train_ds,\n", - " \"model\": vm_model,\n", - " },\n", - " }\n", - "}\n", - "results = vm.run_documentation_tests(\n", - " section=[\"model_development\"],\n", - " inputs={\n", - " \"dataset\": vm_test_ds, # Any test that requires a single dataset will use vm_test_ds\n", - " \"model\": vm_model,\n", - " \"datasets\": (\n", - " vm_train_ds,\n", - " vm_test_ds,\n", - " ), # Any test that requires multiple datasets will use vm_train_ds and vm_test_ds\n", - " },\n", - " config=test_config,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "## 3. Implementing custom tests\n", - "\n", - "This section assumes that model developers already have a repository of custom made tests that they consider critical to include in the documentation. Here we provide details on how to easily integrate custom tests with ValidMind.\n", - "\n", - "For a more in-depth introduction to custom tests, refer to this [notebook](../code_samples/custom_tests/implement_custom_tests.ipynb).\n", - "\n", - "A custom test is any function that takes a set of inputs and parameters as arguments and returns one or more outputs. The function can be as simple or as complex as you need it to be. It can use external libraries, make API calls, or do anything else that you can do in Python. The only requirement is that the function signature and return values can be \"understood\" and handled by the ValidMind Library. As such, custom tests offer added flexibility by extending the default tests provided by ValidMind, enabling you to document any type of model or use case.\n", - "\n", - "In the following example, you will learn how to implement a custom `inline` test that calculates the confusion matrix for a binary classification model. You will see that the custom test function is just a regular Python function that can include and require any Python library as you see fit.\n", - "\n", - "**NOTE**: in the context of Jupyter notebooks, we will use the word `inline` to refer to functions (or code) defined in the same notebook where they are used (this one) and not in a separate file, as we will see later with test providers.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "### Create a confusion matrix plot\n", - "\n", - "To understand how to create a custom test from anything, let's first create a confusion matrix plot using the `confusion_matrix` function from the `sklearn.metrics` module.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "metadata": {} - }, - "outputs": [], - "source": [ - "import matplotlib.pyplot as plt\n", - "from sklearn import metrics\n", - "\n", - "# Get the predicted classes\n", - "y_pred = log_reg.predict(vm_test_ds.x)\n", - "\n", - "confusion_matrix = metrics.confusion_matrix(y_test, y_pred)\n", - "\n", - "cm_display = metrics.ConfusionMatrixDisplay(\n", - " confusion_matrix=confusion_matrix, display_labels=[False, True]\n", - ")\n", - "cm_display.plot()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We will now create a @vm.test wrapper that will allow you to create a reusable test. Note the following changes in the code below:\n", - "\n", - "- The function `confusion_matrix` takes two arguments `dataset` and `model`. This is a `VMDataset` and `VMModel` object respectively.\n", - " - `VMDataset` objects allow you to access the dataset's true (target) values by accessing the `.y` attribute.\n", - " - `VMDataset` objects allow you to access the predictions for a given model by accessing the `.y_pred()` method.\n", - "- The function docstring provides a description of what the test does. This will be displayed along with the result in this notebook as well as in the ValidMind Platform.\n", - "- The function body calculates the confusion matrix using the `sklearn.metrics.confusion_matrix` function as we just did above.\n", - "- The function then returns the `ConfusionMatrixDisplay.figure_` object - this is important as the ValidMind Library expects the output of the custom test to be a plot or a table.\n", - "- The `@vm.test` decorator is doing the work of creating a wrapper around the function that will allow it to be run by the ValidMind Library. It also registers the test so it can be found by the ID `my_custom_tests.ConfusionMatrix` (see the section below on how test IDs work in ValidMind and why this format is important)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "metadata": { - "metadata": {} - }, - "outputs": [], - "source": [ - "@vm.test(\"my_custom_tests.ConfusionMatrix\")\n", - "def confusion_matrix(dataset, model):\n", - " \"\"\"The confusion matrix is a table that is often used to describe the performance of a classification model on a set of data for which the true values are known.\n", - "\n", - " The confusion matrix is a 2x2 table that contains 4 values:\n", - "\n", - " - True Positive (TP): the number of correct positive predictions\n", - " - True Negative (TN): the number of correct negative predictions\n", - " - False Positive (FP): the number of incorrect positive predictions\n", - " - False Negative (FN): the number of incorrect negative predictions\n", - "\n", - " The confusion matrix can be used to assess the holistic performance of a classification model by showing the accuracy, precision, recall, and F1 score of the model on a single figure.\n", - " \"\"\"\n", - " y_true = dataset.y\n", - " y_pred = dataset.y_pred(model=model)\n", - "\n", - " confusion_matrix = metrics.confusion_matrix(y_true, y_pred)\n", - "\n", - " cm_display = metrics.ConfusionMatrixDisplay(\n", - " confusion_matrix=confusion_matrix, display_labels=[False, True]\n", - " )\n", - " cm_display.plot()\n", - "\n", - " plt.close() # close the plot to avoid displaying it\n", - "\n", - " return cm_display.figure_ # return the figure object itself" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can now run the newly created custom test on both the training and test datasets using the `run_test()` function:\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "metadata": {} - }, - "outputs": [], - "source": [ - "# Training dataset\n", - "result = vm.tests.run_test(\n", - " \"my_custom_tests.ConfusionMatrix:training_dataset\",\n", - " inputs={\"model\": vm_model, \"dataset\": vm_train_ds},\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "metadata": {} - }, - "outputs": [], - "source": [ - "# Test dataset\n", - "result = vm.tests.run_test(\n", - " \"my_custom_tests.ConfusionMatrix:test_dataset\",\n", - " inputs={\"model\": vm_model, \"dataset\": vm_test_ds},\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "### Add parameters to custom tests\n", - "\n", - "Custom tests can take parameters just like any other function. Let's modify the `confusion_matrix` function to take an additional parameter `normalize` that will allow you to normalize the confusion matrix.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 57, - "metadata": { - "metadata": {} - }, - "outputs": [], - "source": [ - "@vm.test(\"my_custom_tests.ConfusionMatrix\")\n", - "def confusion_matrix(dataset, model, normalize=False):\n", - " \"\"\"The confusion matrix is a table that is often used to describe the performance of a classification model on a set of data for which the true values are known.\n", - "\n", - " The confusion matrix is a 2x2 table that contains 4 values:\n", - "\n", - " - True Positive (TP): the number of correct positive predictions\n", - " - True Negative (TN): the number of correct negative predictions\n", - " - False Positive (FP): the number of incorrect positive predictions\n", - " - False Negative (FN): the number of incorrect negative predictions\n", - "\n", - " The confusion matrix can be used to assess the holistic performance of a classification model by showing the accuracy, precision, recall, and F1 score of the model on a single figure.\n", - " \"\"\"\n", - " y_true = dataset.y\n", - " y_pred = dataset.y_pred(model=model)\n", - "\n", - " if normalize:\n", - " confusion_matrix = metrics.confusion_matrix(y_true, y_pred, normalize=\"all\")\n", - " else:\n", - " confusion_matrix = metrics.confusion_matrix(y_true, y_pred)\n", - "\n", - " cm_display = metrics.ConfusionMatrixDisplay(\n", - " confusion_matrix=confusion_matrix, display_labels=[False, True]\n", - " )\n", - " cm_display.plot()\n", - "\n", - " plt.close() # close the plot to avoid displaying it\n", - "\n", - " return cm_display.figure_ # return the figure object itself" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "### Pass parameters to custom tests\n", - "\n", - "You can pass parameters to custom tests by providing a dictionary of parameters to the `run_test()` function. The parameters will override any default parameters set in the custom test definition. Note that `dataset` and `model` are still passed as `inputs`. Since these are `VMDataset` or `VMModel` inputs, they have a special meaning. When declaring a `dataset`, `model`, `datasets` or `models` argument in a custom test function, the ValidMind Library will expect these get passed as `inputs` to `run_test()` (or `run_documentation_tests()` instead).\n", - "\n", - "Re-running the confusion matrix with `normalize=True` looks like this:\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "metadata": {} - }, - "outputs": [], - "source": [ - "# Test dataset with normalize=True\n", - "result = vm.tests.run_test(\n", - " \"my_custom_tests.ConfusionMatrix:test_dataset_normalized\",\n", - " inputs={\"model\": vm_model, \"dataset\": vm_test_ds},\n", - " params={\"normalize\": True},\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "### Log the confusion matrix results\n", - "\n", - "As you saw in the pearson correlation example, you can log any result to the ValidMind Platform with the `.log()` method of the result object. This will allow you to add the result to the documentation.\n", - "\n", - "You can now do the same for the confusion matrix results.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 59, - "metadata": { - "metadata": {} - }, - "outputs": [], - "source": [ - "result.log()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "### Using external test providers\n", - "\n", - "Creating inline custom tests with a function is a great way to customize your model documentation. However, sometimes you may want to reuse the same set of tests across multiple models and share them with developers in your organization. In this case, you can create a custom test provider that will allow you to load custom tests from a local folder or a git repository.\n", - "\n", - "In this section you will learn how to declare a local filesystem test provider that allows loading tests from a local folder following these high level steps:\n", - "\n", - "1. Create a folder of custom tests from existing, inline tests (tests that exists in your active Jupyter notebook)\n", - "2. Save an inline test to a file\n", - "3. Define and register a `LocalTestProvider` that points to that folder\n", - "4. Run test provider tests\n", - "5. Add the test results to your documentation\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "#### Create a folder of custom tests from existing inline tests\n", - "\n", - "Here you will create a new folder that will contain reusable, custom tests. The following code snippet will create a new `my_tests` directory in the current working directory if it doesn't exist.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 60, - "metadata": { - "metadata": {} - }, - "outputs": [], - "source": [ - "tests_folder = \"my_tests\"\n", - "\n", - "import os\n", - "\n", - "# create tests folder\n", - "os.makedirs(tests_folder, exist_ok=True)\n", - "\n", - "# remove existing tests\n", - "for f in os.listdir(tests_folder):\n", - " # remove files and pycache\n", - " if f.endswith(\".py\") or f == \"__pycache__\":\n", - " os.system(f\"rm -rf {tests_folder}/{f}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "After running the command above, you should see a new directory next to this notebook file:\n", - "\n", - "![screenshot showing my_tests directory](../images/my_tests_directory.png)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "#### Save an inline test to a file\n", - "\n", - "The `@vm.test` decorator that was used above to register these as one-off custom tests also adds a convenience method to the function object that allows you to simply call `.save()` to save it to a file. This will save the function to a Python file to a path you specify. In this case, you can pass the variable `tests_folder` to save it to the custom tests folder we created.\n", - "\n", - "Normally, this will get you started by creating the file and saving the function code with the correct name. But it won't automatically add any import or other functions/variables outside of the function that are needed for the test to run. The `save()` method allows you to pass an optional `imports` argument that will ensure the necessary imports are added to the file.\n", - "\n", - "For the `confusion_matrix` test, note the imports that are required for the function to run properly:\n", - "\n", - "```python\n", - "import matplotlib.pyplot as plt\n", - "from sklearn import metrics\n", - "```\n", - "\n", - "You can pass these imports to the `save()` method to ensure they are included in the file with the following command:\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "metadata": {} - }, - "outputs": [], - "source": [ - "confusion_matrix.save(\n", - " tests_folder,\n", - " imports=[\"import matplotlib.pyplot as plt\", \"from sklearn import metrics\"],\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "##### What happened?\n", - "\n", - "The `save()` method saved the `confusion_matrix` function to a file named `ConfusionMatrix.py` in the `my_tests` folder. Note that the new file provides some context on the origin of the test, which is useful for traceability.\n", - "\n", - "```\n", - "# Saved from __main__.confusion_matrix\n", - "# Original Test ID: my_custom_tests.ConfusionMatrix\n", - "# New Test ID: .ConfusionMatrix\n", - "```\n", - "\n", - "Additionally, the new test function has been stripped off its decorator, as it now resides in a file that will be loaded by the test provider:\n", - "\n", - "```python\n", - "def ConfusionMatrix(dataset, model, normalize=False):\n", - "```\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "#### Define and register a `LocalTestProvider` that points to that folder\n", - "\n", - "With the `my_tests` folder now having a sample custom test, you can now initialize a test provider that will tell the ValidMind Library where to find these tests. ValidMind offers out-of-the-box test providers for local tests (i.e. tests in a folder) or a Github provider for tests in a Github repository. You can also create your own test provider by creating a class that has a `load_test` method that takes a test ID and returns the test function matching that ID.\n", - "\n", - "The most important attribute for a test provider is its `namespace`. This is a string that will be used to prefix test IDs in model documentation. This allows you to have multiple test providers with tests that can even share the same ID, but are distinguished by their namespace.\n", - "\n", - "An extended introduction to test providers can be found in [this](../code_samples/custom_tests/integrate_external_test_providers.ipynb) notebook.\n", - "\n", - "\n", - "\n", - "### Initializing a local test provider\n", - "\n", - "For most use-cases, the local test provider should be sufficient. This test provider allows you load custom tests from a designated directory. Let's go ahead and see how we can do this with our custom tests.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 62, - "metadata": { - "metadata": {} - }, - "outputs": [], - "source": [ - "from validmind.tests import LocalTestProvider\n", - "\n", - "# initialize the test provider with the tests folder we created earlier\n", - "my_test_provider = LocalTestProvider(tests_folder)\n", - "\n", - "vm.tests.register_test_provider(\n", - " namespace=\"my_test_provider\",\n", - " test_provider=my_test_provider,\n", - ")\n", - "# `my_test_provider.load_test()` will be called for any test ID that starts with `my_test_provider`\n", - "# e.g. `my_test_provider.ConfusionMatrix` will look for a function named `ConfusionMatrix` in `my_tests/ConfusionMatrix.py` file" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "#### Run test provider tests\n", - "\n", - "Now that you have set up the test provider, you can run any test that's located in the tests folder by using the `run_test()` method as with any other test. For tests that reside in a test provider directory, the test ID will be the `namespace` specified when registering the provider, followed by the path to the test file relative to the tests folder. For example, the Confusion Matrix test we created earlier will have the test ID `my_test_provider.ConfusionMatrix`. You could organize the tests in subfolders, say `classification` and `regression`, and the test ID for the Confusion Matrix test would then be `my_test_provider.classification.ConfusionMatrix`.\n", - "\n", - "Let's go ahead and re-run the confusion matrix test by using the test ID `my_test_provider.ConfusionMatrix`. This should load the test from the test provider and run it as before.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "metadata": {} - }, - "outputs": [], - "source": [ - "result = vm.tests.run_test(\n", - " \"my_test_provider.ConfusionMatrix\",\n", - " inputs={\"model\": vm_model, \"dataset\": vm_test_ds},\n", - " params={\"normalize\": True},\n", - ")\n", - "\n", - "result.log()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "#### Add the test results to your documentation\n", - "\n", - "You have already seen how to add individual results to the model documentation using the ValidMind Platform. Let's repeat the process and add the confusion matrix to the `Model Development` -> `Model Evaluation` section of the documentation. The \"add test driven block\" dialog should now show the new test result coming from the test provider:\n", - "\n", - "![screenshot showing confusion matrix result](../images/insert-test-driven-block-custom-confusion-matrix.png)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "## 4. Finalize testing and documentation\n", - "\n", - "In this section we cover how to finalize the testing and documentation of your model by focusing on:\n", - "\n", - "1. Using `run_documentation_tests()` to ensure custom test results are included in your documentation\n", - "2. Viewing and updating the configuration for the entire model documentation template\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "### Use `run_documentation_tests()` to ensure custom test results are included in your documentation\n", - "\n", - "After adding test driven blocks to your model documentation, changes should persist and become available every time you call `vm.preview_template()`. However, you need to reload the connection to the ValidMind Platform if you have added test driven blocks when the connection was already established.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 64, - "metadata": { - "metadata": {} - }, - "outputs": [], - "source": [ - "vm.reload()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now, run `preview_template()` and verify that the new confusion matrix test you added is included in the proper section.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "metadata": {} - }, - "outputs": [], - "source": [ - "vm.preview_template()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Since the test ID is now registered in the document you can now run tests for an entire section and all additional custom tests should be loaded without issues. Let's run all tests in the `model_evaluation` section of the documentation. Note that we have been running the sample custom confusion matrix with `normalize=True` to demonstrate the ability to provide custom parameters.\n", - "\n", - "In the `Run the model evaluation tests` section above you learned how to assign inputs to individual tests with `run_documentation_tests()`. Assigning parametesr is similar, you only need to provide assign a `params` dictionary to a given test ID, `my_test_provider.ConfusionMatrix` in this case.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "metadata": {} - }, - "outputs": [], - "source": [ - "test_config = {\n", - " \"validmind.model_validation.sklearn.ClassifierPerformance:in_sample\": {\n", - " \"inputs\": {\n", - " \"dataset\": vm_train_ds,\n", - " \"model\": vm_model,\n", - " },\n", - " },\n", - " \"my_test_provider.ConfusionMatrix\": {\n", - " \"params\": {\"normalize\": True},\n", - " },\n", - "}\n", - "results = vm.run_documentation_tests(\n", - " section=[\"model_evaluation\"],\n", - " inputs={\n", - " \"dataset\": vm_test_ds, # Any test that requires a single dataset will use vm_test_ds\n", - " \"model\": vm_model,\n", - " \"datasets\": (\n", - " vm_train_ds,\n", - " vm_test_ds,\n", - " ), # Any test that requires multiple datasets will use vm_train_ds and vm_test_ds\n", - " },\n", - " config=test_config,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "### Viewing and updating the configuration for the entire model documentation template\n", - "\n", - "The ValidMind Library provides a utility function called `vm.get_test_suite().get_default_config()` that allows you to render the default configuration for the entire documentation template. This configuration will contain all the test IDs and their default parameters. You can then modify this configuration as needed and pass it to `run_documentation_tests()` to run all tests in the documentation template if needed. You also have the option to continue running tests for one section at a time, `get_default_config()` still provides a useful reference for providing default parametes to every test.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "metadata": {} - }, - "outputs": [], - "source": [ - "import json\n", - "\n", - "model_test_suite = vm.get_test_suite()\n", - "config = model_test_suite.get_default_config()\n", - "print(\"Suite Config: \\n\", json.dumps(config, indent=2))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "#### Update the config\n", - "\n", - "Note that the default config does not assign any inputs to a test, this is expected. You can assign inputs to individual tests as needed, depending on the datasets and models you want to pass to individual tests. The `config` dictionary, as a mapping of test IDs to test configurations, allows you to do this.\n", - "\n", - "For this particular documentation template (binary classification), the ValidMind Library provides a sample configuration that can be used to populate the entire model documentation using the following inputs as placeholders:\n", - "\n", - "- A `raw_dataset` raw dataset\n", - "- A `train_dataset` training dataset\n", - "- A `test_dataset` test dataset\n", - "- A trained `model` instance\n", - "\n", - "As part of updating the `config` you will need to ensure the correct `input_id`s are used in the final config passed to `run_documentation_tests()`.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "metadata": {} - }, - "outputs": [], - "source": [ - "from validmind.datasets.classification import customer_churn\n", - "from validmind.utils import preview_test_config\n", - "\n", - "test_config = customer_churn.get_demo_test_config()\n", - "preview_test_config(test_config)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Using this sample configuration, let's finish populating model documentation by running all tests for the `model_development` section of the documentation. Recall that the training and test datasets in our exercise have the following `input_id` values:\n", - "\n", - "- `train_dataset_final` for the training dataset\n", - "- `test_dataset_final` for the test dataset\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "metadata": {} - }, - "outputs": [], - "source": [ - "config = {\n", - " \"validmind.model_validation.ModelMetadata\": {\n", - " \"inputs\": {\"model\": \"log_reg_model_v1\"},\n", - " },\n", - " \"validmind.data_validation.DatasetSplit\": {\n", - " \"inputs\": {\"datasets\": [\"train_dataset_final\", \"test_dataset_final\"]},\n", - " },\n", - " \"validmind.model_validation.sklearn.PopulationStabilityIndex\": {\n", - " \"inputs\": {\n", - " \"model\": \"log_reg_model_v1\",\n", - " \"datasets\": [\"train_dataset_final\", \"test_dataset_final\"],\n", - " },\n", - " \"params\": {\"num_bins\": 10, \"mode\": \"fixed\"},\n", - " },\n", - " \"validmind.model_validation.sklearn.ConfusionMatrix\": {\n", - " \"inputs\": {\"model\": \"log_reg_model_v1\", \"dataset\": \"test_dataset_final\"},\n", - " },\n", - " \"my_test_provider.ConfusionMatrix\": {\n", - " \"inputs\": {\"dataset\": \"test_dataset_final\", \"model\": \"log_reg_model_v1\"},\n", - " },\n", - " \"validmind.model_validation.sklearn.ClassifierPerformance:in_sample\": {\n", - " \"inputs\": {\"model\": \"log_reg_model_v1\", \"dataset\": \"train_dataset_final\"}\n", - " },\n", - " \"validmind.model_validation.sklearn.ClassifierPerformance:out_of_sample\": {\n", - " \"inputs\": {\"model\": \"log_reg_model_v1\", \"dataset\": \"test_dataset_final\"}\n", - " },\n", - " \"validmind.model_validation.sklearn.PrecisionRecallCurve\": {\n", - " \"inputs\": {\"model\": \"log_reg_model_v1\", \"dataset\": \"test_dataset_final\"},\n", - " },\n", - " \"validmind.model_validation.sklearn.ROCCurve\": {\n", - " \"inputs\": {\"model\": \"log_reg_model_v1\", \"dataset\": \"test_dataset_final\"},\n", - " },\n", - " \"validmind.model_validation.sklearn.TrainingTestDegradation\": {\n", - " \"inputs\": {\n", - " \"model\": \"log_reg_model_v1\",\n", - " \"datasets\": [\"train_dataset_final\", \"test_dataset_final\"],\n", - " },\n", - " \"params\": {\n", - " \"metrics\": [\"accuracy\", \"precision\", \"recall\", \"f1\"],\n", - " \"max_threshold\": 0.1,\n", - " },\n", - " },\n", - " \"validmind.model_validation.sklearn.MinimumAccuracy\": {\n", - " \"inputs\": {\"model\": \"log_reg_model_v1\", \"dataset\": \"test_dataset_final\"},\n", - " \"params\": {\"min_threshold\": 0.7},\n", - " },\n", - " \"validmind.model_validation.sklearn.MinimumF1Score\": {\n", - " \"inputs\": {\"model\": \"log_reg_model_v1\", \"dataset\": \"test_dataset_final\"},\n", - " \"params\": {\"min_threshold\": 0.5},\n", - " },\n", - " \"validmind.model_validation.sklearn.MinimumROCAUCScore\": {\n", - " \"inputs\": {\"model\": \"log_reg_model_v1\", \"dataset\": \"test_dataset_final\"},\n", - " \"params\": {\"min_threshold\": 0.5},\n", - " },\n", - " \"validmind.model_validation.sklearn.PermutationFeatureImportance\": {\n", - " \"inputs\": {\"model\": \"log_reg_model_v1\", \"dataset\": \"test_dataset_final\"},\n", - " },\n", - " \"validmind.model_validation.sklearn.SHAPGlobalImportance\": {\n", - " \"inputs\": {\"model\": \"log_reg_model_v1\", \"dataset\": \"test_dataset_final\"},\n", - " \"params\": {\"kernel_explainer_samples\": 10},\n", - " },\n", - " \"validmind.model_validation.sklearn.WeakspotsDiagnosis\": {\n", - " \"inputs\": {\n", - " \"model\": \"log_reg_model_v1\",\n", - " \"datasets\": [\"train_dataset_final\", \"test_dataset_final\"],\n", - " },\n", - " \"params\": {\n", - " \"thresholds\": {\"accuracy\": 0.75, \"precision\": 0.5, \"recall\": 0.5, \"f1\": 0.7}\n", - " },\n", - " },\n", - " \"validmind.model_validation.sklearn.OverfitDiagnosis\": {\n", - " \"inputs\": {\n", - " \"model\": \"log_reg_model_v1\",\n", - " \"datasets\": [\"train_dataset_final\", \"test_dataset_final\"],\n", - " },\n", - " \"params\": {\"cut_off_percentage\": 4},\n", - " },\n", - " \"validmind.model_validation.sklearn.RobustnessDiagnosis\": {\n", - " \"inputs\": {\n", - " \"model\": \"log_reg_model_v1\",\n", - " \"datasets\": [\"train_dataset_final\", \"test_dataset_final\"],\n", - " },\n", - " \"params\": {\n", - " \"scaling_factor_std_dev_list\": [0.0, 0.1, 0.2, 0.3, 0.4, 0.5],\n", - " \"accuracy_decay_threshold\": 4,\n", - " },\n", - " },\n", - "}\n", - "\n", - "\n", - "full_suite = vm.run_documentation_tests(\n", - " section=\"model_development\",\n", - " config=config,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "## Where to go from here\n", - "\n", - "In this notebook you have learned the end-to-end process to document a model with the ValidMind Library, running through some very common scenarios in a typical model development setting:\n", - "\n", - "- Running out-of-the-box tests\n", - "- Documenting your model by adding evidence to model documentation\n", - "- Extending the capabilities of the ValidMind Library by implementing custom tests\n", - "- Ensuring that the documentation is complete by running all tests in the documentation template\n", - "\n", - "As a next step, you can explore the following notebooks to get a deeper understanding on how the ValidMind Library allows you generate model documentation for any use case:\n", - "\n", - "\n", - "\n", - "### Use cases\n", - "\n", - "- [Application scorecard demo](../code_samples/credit_risk/application_scorecard_demo.ipynb)\n", - "- [Linear regression documentation demo](../code_samples/regression/quickstart_regression_full_suite.ipynb)\n", - "- [LLM model documentation demo](../code_samples/nlp_and_llm/foundation_models_integration_demo.ipynb)\n", - "\n", - "\n", - "\n", - "### More how-to guides and code samples\n", - "\n", - "- [Explore available tests in detail](../how_to/explore_tests.ipynb)\n", - "- [In-depth guide for implementing custom tests](../code_samples/custom_tests/implement_custom_tests.ipynb)\n", - "- [In-depth guide to external test providers](../code_samples/custom_tests/integrate_external_test_providers.ipynb)\n", - "- [Configuring dataset features](../how_to/configure_dataset_features.ipynb)\n", - "- [Introduction to unit and composite metrics](../how_to/run_unit_metrics.ipynb)\n", - "\n", - "\n", - "\n", - "### Discover more learning resources\n", - "\n", - "All notebook samples can be found in the following directories of the ValidMind Library GitHub repository:\n", - "\n", - "- [Code samples](https://github.com/validmind/validmind-library/tree/main/notebooks/code_samples)\n", - "- [How-to guides](https://github.com/validmind/validmind-library/tree/main/notebooks/how_to)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "## Upgrade ValidMind\n", - "\n", - "
After installing ValidMind, you’ll want to periodically make sure you are on the latest version to access any new features and other enhancements.
\n", - "\n", - "Retrieve the information for the currently installed version of ValidMind:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%pip show validmind" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If the version returned is lower than the version indicated in our [production open-source code](https://github.com/validmind/validmind-library/blob/prod/validmind/__version__.py), restart your notebook and run:\n", - "\n", - "```bash\n", - "%pip install --upgrade validmind\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You may need to restart your kernel after running the upgrade package for changes to be applied." + "Training is FREE — Register now!
" ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "ValidMind Library", "language": "python", - "name": "python3" + "name": "validmind" }, "language_info": { "codemirror_mode": { @@ -1798,7 +35,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.5" + "version": "3.10.13" } }, "nbformat": 4, diff --git a/notebooks/tutorials/model_development/101-set_up_validmind.ipynb b/notebooks/tutorials/model_development/101-set_up_validmind.ipynb index 9a5936350..1a316cbba 100644 --- a/notebooks/tutorials/model_development/101-set_up_validmind.ipynb +++ b/notebooks/tutorials/model_development/101-set_up_validmind.ipynb @@ -101,7 +101,7 @@ "\n", "### New to ValidMind?\n", "\n", - "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models, find code samples, or read our developer reference.\n", + "If you haven't already seen our documentation on the [ValidMind Library](https://docs.validmind.ai/developer/validmind-library.html), we recommend you begin by exploring the available resources in this section. There, you can learn more about documenting models and running tests, as well as find code samples and our Python Library API reference.\n", "\n", "
For access to all features available in this notebook, create a free ValidMind account.\n", "

\n", diff --git a/notebooks/tutorials/model_development/102-start_development_process.ipynb b/notebooks/tutorials/model_development/102-start_development_process.ipynb index 4cdfa0247..68c637d2c 100644 --- a/notebooks/tutorials/model_development/102-start_development_process.ipynb +++ b/notebooks/tutorials/model_development/102-start_development_process.ipynb @@ -70,6 +70,15 @@ "Refer to the first notebook in this series: 101 Set up ValidMind
\n" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Setting up" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -199,6 +208,15 @@ "vm.tests.list_tests(task=\"classification\", tags=[\"tabular_data\", \"data_quality\"])" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
Want to learn more about navigating ValidMind tests?\n", + "

\n", + "Refer to our notebook outlining the utilities available for viewing and understanding available ValidMind tests: Explore tests
" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -243,11 +261,7 @@ "You run individual tests by calling [the `run_test` function](https://docs.validmind.ai/validmind/validmind/tests.html#run_test) provided by the `validmind.tests` module. For the examples below, we'll pass in the following arguments:\n", "\n", "- **`test_id`** — The ID of the test to run, as seen in the `ID` column when you run `list_tests`. \n", - "- **`params`** — A dictionary of parameters for the test. These will override any `default_params` set in the test definition. \n", - "\n", - "
Want to learn more about ValidMind tests?\n", - "

\n", - "Refer to our notebook that includes code samples and usage of key functions: Explore tests
" + "- **`params`** — A dictionary of parameters for the test. These will override any `default_params` set in the test definition. " ] }, { @@ -660,7 +674,7 @@ "\n", "2. In the left sidebar that appears for your model, click **Documentation**.\n", "\n", - "3. Locate the Data Preparation section and click on **2.3 Correlations and Interactions** to expand that section.\n", + "3. Locate the Data Preparation section and click on **2.3. Correlations and Interactions** to expand that section.\n", "\n", "4. Hover under the Pearson Correlation Matrix content block until a horizontal dashed line with a **+** button appears, indicating that you can insert a new block.\n", "\n", @@ -679,7 +693,7 @@ "\n", "6. Finally, click **Insert 1 Test Result to Document** to add the test result to the documentation.\n", "\n", - " Confirm that the individual results for the high correlation test has been correctly inserted into section **2.3 Correlations and Interactions** of the documentation.\n", + " Confirm that the individual results for the high correlation test has been correctly inserted into section **2.3. Correlations and Interactions** of the documentation.\n", "\n", "7. Finalize the documentation by editing the test result's description block to explain the changes you made to the raw data and the reasons behind them as shown in the screenshot below:\n", "\n", diff --git a/notebooks/tutorials/model_development/103-integrate_custom_tests.ipynb b/notebooks/tutorials/model_development/103-integrate_custom_tests.ipynb index bcde58c62..31cd0758f 100644 --- a/notebooks/tutorials/model_development/103-integrate_custom_tests.ipynb +++ b/notebooks/tutorials/model_development/103-integrate_custom_tests.ipynb @@ -65,7 +65,7 @@ "\n", "## Prerequisites\n", "\n", - "In order to log test results or evidence to your model documentation with this notebook, you'll need to first have:\n", + "In order to integrate custom tests with your model documentation with this notebook, you'll need to first have:\n", "\n", "- [ ] Registered a model within the ValidMind Platform with a predefined documentation template\n", "- [ ] Installed the ValidMind Library in your local environment, allowing you to access all its features\n", @@ -905,7 +905,7 @@ "\n", "2. In the left sidebar that appears for your model, click **Documentation**.\n", "\n", - "3. Locate the Data Preparation section and click on **3.2 Model Evaluation** to expand that section.\n", + "3. Locate the Data Preparation section and click on **3.2. Model Evaluation** to expand that section.\n", "\n", "4. Hover under the Pearson Correlation Matrix content block until a horizontal dashed line with a **+** button appears, indicating that you can insert a new block.\n", "\n", @@ -922,7 +922,7 @@ "\n", "6. Finally, click **Insert 2 Test Results to Document** to add the test results to the documentation.\n", "\n", - " Confirm that the two individual results for the confusion matrix tests have been correctly inserted into section **3.2 Model Evaluation** of the documentation." + " Confirm that the two individual results for the confusion matrix tests have been correctly inserted into section **3.2. Model Evaluation** of the documentation." ] }, { diff --git a/notebooks/tutorials/model_development/104-finalize_testing_documentation.ipynb b/notebooks/tutorials/model_development/104-finalize_testing_documentation.ipynb index fd17733fc..3ec70b841 100644 --- a/notebooks/tutorials/model_development/104-finalize_testing_documentation.ipynb +++ b/notebooks/tutorials/model_development/104-finalize_testing_documentation.ipynb @@ -148,7 +148,7 @@ "\n", "### Import sample dataset\n", "\n", - "Next, we'll import the same public [Bank Customer Churn Prediction](https://www.kaggle.com/datasets/shantanudhakadd/bank-customer-churn-prediction) dataset from Kaggle we used in the last notebook so that we have something to work with:" + "Next, we'll import the same public [Bank Customer Churn Prediction](https://www.kaggle.com/datasets/shantanudhakadd/bank-customer-churn-prediction) dataset from Kaggle we used in the last notebooks so that we have something to work with:" ] }, { @@ -929,6 +929,7 @@ "#### More how-to guides and code samples\n", "\n", "- [Explore available tests in detail](../../how_to/explore_tests.ipynb)\n", + "- [In-depth guide on running dataset based tests](../../how_to/run_tests/1_run_dataset_based_tests.ipynb)\n", "- [In-depth guide for implementing custom tests](../../code_samples/custom_tests/implement_custom_tests.ipynb)\n", "- [In-depth guide to external test providers](../../code_samples/custom_tests/integrate_external_test_providers.ipynb)\n", "- [Configuring dataset features](../../how_to/configure_dataset_features.ipynb)\n", diff --git a/pyproject.toml b/pyproject.toml index 3ea1b9f93..f0fe307de 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ description = "ValidMind Library" license = "Commercial License" name = "validmind" readme = "README.pypi.md" -version = "2.8.13" +version = "2.8.14" [tool.poetry.dependencies] aiohttp = {extras = ["speedups"], version = "*"} diff --git a/validmind/__version__.py b/validmind/__version__.py index 76aba1042..781278681 100644 --- a/validmind/__version__.py +++ b/validmind/__version__.py @@ -1 +1 @@ -__version__ = "2.8.13" +__version__ = "2.8.14"