Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

1,869 changes: 1,869 additions & 0 deletions notebooks/code_samples/credit_risk/application_scorecard_with_ml.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ description = "ValidMind Library"
license = "Commercial License"
name = "validmind"
readme = "README.pypi.md"
version = "2.7.3"
version = "2.7.4"

[tool.poetry.dependencies]
aiohttp = {extras = ["speedups"], version = "*"}
Expand Down
5 changes: 5 additions & 0 deletions tests/test_integration_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@
"validmind.data_validation.ProtectedClassesCombination",
"validmind.data_validation.ProtectedClassesDisparity",
"validmind.data_validation.ProtectedClassesThresholdOptimizer",
# The customer churn classification dataset contains a string column 'Germany' which is not supported by the MutualInformation test
"validmind.data_validation.MutualInformation",
# The required column 'score' is not present in the dataset with input_id test_dataset
"validmind.data_validation.ScoreBandDefaultRates",
"validmind.model_validation.sklearn.ScoreProbabilityAlignment",
]
SKIPPED_TESTS = []
SUCCESSFUL_TESTS = []
Expand Down
19 changes: 11 additions & 8 deletions tests/unit_tests/data_validation/test_TooManyZeroValues.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def setUp(self):
)

def test_too_many_zeros_default_threshold(self):
# Test with default threshold (3%)
# Test with default threshold (0.03 or 0.03%)
results, passed = TooManyZeroValues(self.vm_dataset)

# Check return types
Expand All @@ -47,22 +47,25 @@ def test_too_many_zeros_default_threshold(self):
# Check results structure
self.assertEqual(len(results), 2) # Should only check numeric columns
for result in results:
self.assertIn("Column", result)
self.assertIn("Variable", result)
self.assertIn("Row Count", result)
self.assertIn("Number of Zero Values", result)
self.assertIn("Percentage of Zero Values (%)", result)
self.assertIn("Pass/Fail", result)

# Verify specific results
few_zeros_result = next(r for r in results if r["Column"] == "few_zeros")
many_zeros_result = next(r for r in results if r["Column"] == "many_zeros")
few_zeros_result = next(r for r in results if r["Variable"] == "few_zeros")
many_zeros_result = next(r for r in results if r["Variable"] == "many_zeros")

self.assertEqual(few_zeros_result["Pass/Fail"], "Pass") # 1% should pass
self.assertEqual(many_zeros_result["Pass/Fail"], "Fail") # 5% should fail
self.assertFalse(passed) # Overall test should fail due to many_zeros
# 1% should fail as it's > 0.03%
self.assertEqual(few_zeros_result["Pass/Fail"], "Fail")
# 5% should fail as it's > 0.03%
self.assertEqual(many_zeros_result["Pass/Fail"], "Fail")
self.assertFalse(passed) # Overall test should fail as both columns fail

def test_custom_threshold(self):
# Test with higher threshold (6%)
results, passed = TooManyZeroValues(self.vm_dataset, max_percent_threshold=0.06)
results, passed = TooManyZeroValues(self.vm_dataset, max_percent_threshold=6)

# Both columns should pass with higher threshold
self.assertTrue(passed)
Expand Down
9 changes: 6 additions & 3 deletions tests/unit_tests/data_validation/test_UniqueRows.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,10 @@ def test_unique_rows_default_threshold(self):

def test_low_uniqueness(self):
# Test dataset with low uniqueness
results, passed = UniqueRows(self.vm_dataset_duplicates)
# Set threshold to 5% to make the test fail
results, passed = UniqueRows(
self.vm_dataset_duplicates, min_percent_threshold=5
)

# Check return types
self.assertIsInstance(results, list)
Expand All @@ -68,6 +71,6 @@ def test_low_uniqueness(self):
self.assertIn("Percentage of Unique Values (%)", result)
self.assertIn("Pass/Fail", result)

# Should fail with low uniqueness
# Should fail with uniqueness below 5%
self.assertFalse(passed)
self.assertTrue(any(row["Pass/Fail"] == "Fail" for row in results))
self.assertTrue(all(row["Pass/Fail"] == "Fail" for row in results))
2 changes: 1 addition & 1 deletion validmind/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "2.7.3"
__version__ = "2.7.4"
Loading
Loading