diff --git a/notebooks/how_to/explore_tests.ipynb b/notebooks/how_to/explore_tests.ipynb index 672c98fc3..73b548fc7 100644 --- a/notebooks/how_to/explore_tests.ipynb +++ b/notebooks/how_to/explore_tests.ipynb @@ -81,1786 +81,2178 @@ "data": { "text/html": [ "\n", - "\n", + "
\n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
IDNameDescriptionRequired InputsParamsTagsTasksIDNameDescriptionHas FigureHas TableRequired InputsParamsTagsTasks
validmind.data_validation.ACFandPACFPlotAC Fand PACF PlotAnalyzes time series data using Autocorrelation Function (ACF) and Partial Autocorrelation Function (PACF) plots to...['dataset']{}['time_series_data', 'forecasting', 'statistical_test', 'visualization']['regression']
validmind.data_validation.ADFADFAssesses the stationarity of a time series dataset using the Augmented Dickey-Fuller (ADF) test....['dataset']{}['time_series_data', 'statsmodels', 'forecasting', 'statistical_test', 'stationarity']['regression']
validmind.data_validation.AutoARAuto ARAutomatically identifies the optimal Autoregressive (AR) order for a time series using BIC and AIC criteria....['dataset']{'max_ar_order': {'type': 'int', 'default': 3}}['time_series_data', 'statsmodels', 'forecasting', 'statistical_test']['regression']
validmind.data_validation.AutoMAAuto MAAutomatically selects the optimal Moving Average (MA) order for each variable in a time series dataset based on...['dataset']{'max_ma_order': {'type': 'int', 'default': 3}}['time_series_data', 'statsmodels', 'forecasting', 'statistical_test']['regression']
validmind.data_validation.AutoStationarityAuto StationarityAutomates Augmented Dickey-Fuller test to assess stationarity across multiple time series in a DataFrame....['dataset']{'max_order': {'type': 'int', 'default': 5}, 'threshold': {'type': 'float', 'default': 0.05}}['time_series_data', 'statsmodels', 'forecasting', 'statistical_test']['regression']
validmind.data_validation.BivariateScatterPlotsBivariate Scatter PlotsGenerates bivariate scatterplots to visually inspect relationships between pairs of numerical predictor variables...['dataset']{}['tabular_data', 'numerical_data', 'visualization']['classification']
validmind.data_validation.BoxPierceBox PierceDetects autocorrelation in time-series data through the Box-Pierce test to validate model performance....['dataset']{}['time_series_data', 'forecasting', 'statistical_test', 'statsmodels']['regression']
validmind.data_validation.ChiSquaredFeaturesTableChi Squared Features TableAssesses the statistical association between categorical features and a target variable using the Chi-Squared test....['dataset']{'p_threshold': {'type': '_empty', 'default': 0.05}}['tabular_data', 'categorical_data', 'statistical_test']['classification']
validmind.data_validation.ClassImbalanceClass ImbalanceEvaluates and quantifies class distribution imbalance in a dataset used by a machine learning model....['dataset']{'min_percent_threshold': {'type': 'int', 'default': 10}}['tabular_data', 'binary_classification', 'multiclass_classification', 'data_quality']['classification']
validmind.data_validation.DatasetDescriptionDataset DescriptionProvides comprehensive analysis and statistical summaries of each column in a machine learning model's dataset....['dataset']{}['tabular_data', 'time_series_data', 'text_data']['classification', 'regression', 'text_classification', 'text_summarization']
validmind.data_validation.DatasetSplitDataset SplitEvaluates and visualizes the distribution proportions among training, testing, and validation datasets of an ML...['datasets']{}['tabular_data', 'time_series_data', 'text_data']['classification', 'regression', 'text_classification', 'text_summarization']
validmind.data_validation.DescriptiveStatisticsDescriptive StatisticsPerforms a detailed descriptive statistical analysis of both numerical and categorical data within a model's...['dataset']{}['tabular_data', 'time_series_data', 'data_quality']['classification', 'regression']
validmind.data_validation.DickeyFullerGLSDickey Fuller GLSAssesses stationarity in time series data using the Dickey-Fuller GLS test to determine the order of integration....['dataset']{}['time_series_data', 'forecasting', 'unit_root_test']['regression']
validmind.data_validation.DuplicatesDuplicatesTests dataset for duplicate entries, ensuring model reliability via data quality verification....['dataset']{'min_threshold': {'type': '_empty', 'default': 1}}['tabular_data', 'data_quality', 'text_data']['classification', 'regression']
validmind.data_validation.EngleGrangerCointEngle Granger CointAssesses the degree of co-movement between pairs of time series data using the Engle-Granger cointegration test....['dataset']{'threshold': {'type': 'float', 'default': 0.05}}['time_series_data', 'statistical_test', 'forecasting']['regression']
validmind.data_validation.FeatureTargetCorrelationPlotFeature Target Correlation PlotVisualizes the correlation between input features and the model's target output in a color-coded horizontal bar...['dataset']{'fig_height': {'type': '_empty', 'default': 600}}['tabular_data', 'visualization', 'correlation']['classification', 'regression']
validmind.data_validation.HighCardinalityHigh CardinalityAssesses the number of unique values in categorical columns to detect high cardinality and potential overfitting....['dataset']{'num_threshold': {'type': 'int', 'default': 100}, 'percent_threshold': {'type': 'float', 'default': 0.1}, 'threshold_type': {'type': 'str', 'default': 'percent'}}['tabular_data', 'data_quality', 'categorical_data']['classification', 'regression']
validmind.data_validation.HighPearsonCorrelationHigh Pearson CorrelationIdentifies highly correlated feature pairs in a dataset suggesting feature redundancy or multicollinearity....['dataset']{'max_threshold': {'type': 'float', 'default': 0.3}, 'top_n_correlations': {'type': 'int', 'default': 10}, 'feature_columns': {'type': 'list', 'default': None}}['tabular_data', 'data_quality', 'correlation']['classification', 'regression']
validmind.data_validation.IQROutliersBarPlotIQR Outliers Bar PlotVisualizes outlier distribution across percentiles in numerical data using the Interquartile Range (IQR) method....['dataset']{'threshold': {'type': 'float', 'default': 1.5}, 'fig_width': {'type': 'int', 'default': 800}}['tabular_data', 'visualization', 'numerical_data']['classification', 'regression']
validmind.data_validation.IQROutliersTableIQR Outliers TableDetermines and summarizes outliers in numerical features using the Interquartile Range method....['dataset']{'threshold': {'type': 'float', 'default': 1.5}}['tabular_data', 'numerical_data']['classification', 'regression']
validmind.data_validation.IsolationForestOutliersIsolation Forest OutliersDetects outliers in a dataset using the Isolation Forest algorithm and visualizes results through scatter plots....['dataset']{'random_state': {'type': 'int', 'default': 0}, 'contamination': {'type': 'float', 'default': 0.1}, 'feature_columns': {'type': 'list', 'default': None}}['tabular_data', 'anomaly_detection']['classification']
validmind.data_validation.JarqueBeraJarque BeraAssesses normality of dataset features in an ML model using the Jarque-Bera test....['dataset']{}['tabular_data', 'data_distribution', 'statistical_test', 'statsmodels']['classification', 'regression']
validmind.data_validation.KPSSKPSSAssesses the stationarity of time-series data in a machine learning model using the KPSS unit root test....['dataset']{}['time_series_data', 'stationarity', 'unit_root_test', 'statsmodels']['data_validation']
validmind.data_validation.LJungBoxL Jung BoxAssesses autocorrelations in dataset features by performing a Ljung-Box test on each feature....['dataset']{}['time_series_data', 'forecasting', 'statistical_test', 'statsmodels']['regression']
validmind.data_validation.LaggedCorrelationHeatmapLagged Correlation HeatmapAssesses and visualizes correlation between target variable and lagged independent variables in a time-series...['dataset']{'num_lags': {'type': 'int', 'default': 10}}['time_series_data', 'visualization']['regression']
validmind.data_validation.MissingValuesMissing ValuesEvaluates dataset quality by ensuring missing value ratio across all features does not exceed a set threshold....['dataset']{'min_threshold': {'type': 'int', 'default': 1}}['tabular_data', 'data_quality']['classification', 'regression']
validmind.data_validation.MissingValuesBarPlotMissing Values Bar PlotAssesses the percentage and distribution of missing values in the dataset via a bar plot, with emphasis on...['dataset']{'threshold': {'type': 'int', 'default': 80}, 'fig_height': {'type': 'int', 'default': 600}}['tabular_data', 'data_quality', 'visualization']['classification', 'regression']
validmind.data_validation.MutualInformationMutual InformationCalculates mutual information scores between features and target variable to evaluate feature relevance....['dataset']{'min_threshold': {'type': 'float', 'default': 0.01}, 'task': {'type': 'str', 'default': 'classification'}}['feature_selection', 'data_analysis']['classification', 'regression']
validmind.data_validation.PearsonCorrelationMatrixPearson Correlation MatrixEvaluates linear dependency between numerical variables in a dataset via a Pearson Correlation coefficient heat map....['dataset']{}['tabular_data', 'numerical_data', 'correlation']['classification', 'regression']
validmind.data_validation.PhillipsPerronArchPhillips Perron ArchAssesses the stationarity of time series data in each feature of the ML model using the Phillips-Perron test....['dataset']{}['time_series_data', 'forecasting', 'statistical_test', 'unit_root_test']['regression']
validmind.data_validation.ProtectedClassesDescriptionProtected Classes DescriptionVisualizes the distribution of protected classes in the dataset relative to the target variable...['dataset']{'protected_classes': {'type': '_empty', 'default': None}}['bias_and_fairness', 'descriptive_statistics']['classification', 'regression']
validmind.data_validation.RollingStatsPlotRolling Stats PlotEvaluates the stationarity of time series data by plotting its rolling mean and standard deviation over a specified...['dataset']{'window_size': {'type': 'int', 'default': 12}}['time_series_data', 'visualization', 'stationarity']['regression']
validmind.data_validation.RunsTestRuns TestExecutes Runs Test on ML model to detect non-random patterns in output data sequence....['dataset']{}['tabular_data', 'statistical_test', 'statsmodels']['classification', 'regression']
validmind.data_validation.ScatterPlotScatter PlotAssesses visual relationships, patterns, and outliers among features in a dataset through scatter plot matrices....['dataset']{}['tabular_data', 'visualization']['classification', 'regression']
validmind.data_validation.ScoreBandDefaultRatesScore Band Default RatesAnalyzes default rates and population distribution across credit score bands....['dataset', 'model']{'score_column': {'type': 'str', 'default': 'score'}, 'score_bands': {'type': 'list', 'default': None}}['visualization', 'credit_risk', 'scorecard']['classification']
validmind.data_validation.SeasonalDecomposeSeasonal DecomposeAssesses patterns and seasonality in a time series dataset by decomposing its features into foundational components....['dataset']{'seasonal_model': {'type': 'str', 'default': 'additive'}}['time_series_data', 'seasonality', 'statsmodels']['regression']
validmind.data_validation.ShapiroWilkShapiro WilkEvaluates feature-wise normality of training data using the Shapiro-Wilk test....['dataset']{}['tabular_data', 'data_distribution', 'statistical_test']['classification', 'regression']
validmind.data_validation.SkewnessSkewnessEvaluates the skewness of numerical data in a dataset to check against a defined threshold, aiming to ensure data...['dataset']{'max_threshold': {'type': '_empty', 'default': 1}}['data_quality', 'tabular_data']['classification', 'regression']
validmind.data_validation.SpreadPlotSpread PlotAssesses potential correlations between pairs of time series variables through visualization to enhance...['dataset']{}['time_series_data', 'visualization']['regression']
validmind.data_validation.TabularCategoricalBarPlotsTabular Categorical Bar PlotsGenerates and visualizes bar plots for each category in categorical features to evaluate the dataset's composition....['dataset']{}['tabular_data', 'visualization']['classification', 'regression']
validmind.data_validation.TabularDateTimeHistogramsTabular Date Time HistogramsGenerates histograms to provide graphical insight into the distribution of time intervals in a model's datetime...['dataset']{}['time_series_data', 'visualization']['classification', 'regression']
validmind.data_validation.TabularDescriptionTablesTabular Description TablesSummarizes key descriptive statistics for numerical, categorical, and datetime variables in a dataset....['dataset']{}['tabular_data']['classification', 'regression']
validmind.data_validation.TabularNumericalHistogramsTabular Numerical HistogramsGenerates histograms for each numerical feature in a dataset to provide visual insights into data distribution and...['dataset']{}['tabular_data', 'visualization']['classification', 'regression']
validmind.data_validation.TargetRateBarPlotsTarget Rate Bar PlotsGenerates bar plots visualizing the default rates of categorical features for a classification machine learning...['dataset']{}['tabular_data', 'visualization', 'categorical_data']['classification']
validmind.data_validation.TimeSeriesDescriptionTime Series DescriptionGenerates a detailed analysis for the provided time series dataset, summarizing key statistics to identify trends,...['dataset']{}['time_series_data', 'analysis']['regression']
validmind.data_validation.TimeSeriesDescriptiveStatisticsTime Series Descriptive StatisticsEvaluates the descriptive statistics of a time series dataset to identify trends, patterns, and data quality issues....['dataset']{}['time_series_data', 'analysis']['regression']
validmind.data_validation.TimeSeriesFrequencyTime Series FrequencyEvaluates consistency of time series data frequency and generates a frequency plot....['dataset']{}['time_series_data']['regression']
validmind.data_validation.TimeSeriesHistogramTime Series HistogramVisualizes distribution of time-series data using histograms and Kernel Density Estimation (KDE) lines....['dataset']{'nbins': {'type': '_empty', 'default': 30}}['data_validation', 'visualization', 'time_series_data']['regression', 'time_series_forecasting']
validmind.data_validation.TimeSeriesLinePlotTime Series Line PlotGenerates and analyses time-series data through line plots revealing trends, patterns, anomalies over time....['dataset']{}['time_series_data', 'visualization']['regression']
validmind.data_validation.TimeSeriesMissingValuesTime Series Missing ValuesValidates time-series data quality by confirming the count of missing values is below a certain threshold....['dataset']{'min_threshold': {'type': 'int', 'default': 1}}['time_series_data']['regression']
validmind.data_validation.TimeSeriesOutliersTime Series OutliersIdentifies and visualizes outliers in time-series data using the z-score method....['dataset']{'zscore_threshold': {'type': 'int', 'default': 3}}['time_series_data']['regression']
validmind.data_validation.TooManyZeroValuesToo Many Zero ValuesIdentifies numerical columns in a dataset that contain an excessive number of zero values, defined by a threshold...['dataset']{'max_percent_threshold': {'type': 'float', 'default': 0.03}}['tabular_data']['regression', 'classification']
validmind.data_validation.UniqueRowsUnique RowsVerifies the diversity of the dataset by ensuring that the count of unique rows exceeds a prescribed threshold....['dataset']{'min_percent_threshold': {'type': 'float', 'default': 1}}['tabular_data']['regression', 'classification']
validmind.data_validation.WOEBinPlotsWOE Bin PlotsGenerates visualizations of Weight of Evidence (WoE) and Information Value (IV) for understanding predictive power...['dataset']{'breaks_adj': {'type': 'list', 'default': None}, 'fig_height': {'type': 'int', 'default': 600}, 'fig_width': {'type': 'int', 'default': 500}}['tabular_data', 'visualization', 'categorical_data']['classification']
validmind.data_validation.WOEBinTableWOE Bin TableAssesses the Weight of Evidence (WoE) and Information Value (IV) of each feature to evaluate its predictive power...['dataset']{'breaks_adj': {'type': 'list', 'default': None}}['tabular_data', 'categorical_data']['classification']
validmind.data_validation.ZivotAndrewsArchZivot Andrews ArchEvaluates the order of integration and stationarity of time series data using the Zivot-Andrews unit root test....['dataset']{}['time_series_data', 'stationarity', 'unit_root_test']['regression']
validmind.data_validation.nlp.CommonWordsCommon WordsAssesses the most frequent non-stopwords in a text column for identifying prevalent language patterns....['dataset']{}['nlp', 'text_data', 'visualization', 'frequency_analysis']['text_classification', 'text_summarization']
validmind.data_validation.nlp.HashtagsHashtagsAssesses hashtag frequency in a text column, highlighting usage trends and potential dataset bias or spam....['dataset']{'top_hashtags': {'type': 'int', 'default': 25}}['nlp', 'text_data', 'visualization', 'frequency_analysis']['text_classification', 'text_summarization']
validmind.data_validation.nlp.LanguageDetectionLanguage DetectionAssesses the diversity of languages in a textual dataset by detecting and visualizing the distribution of languages....['dataset']{}['nlp', 'text_data', 'visualization']['text_classification', 'text_summarization']
validmind.data_validation.nlp.MentionsMentionsCalculates and visualizes frequencies of '@' prefixed mentions in a text-based dataset for NLP model analysis....['dataset']{'top_mentions': {'type': 'int', 'default': 25}}['nlp', 'text_data', 'visualization', 'frequency_analysis']['text_classification', 'text_summarization']
validmind.data_validation.nlp.PolarityAndSubjectivityPolarity And SubjectivityAnalyzes the polarity and subjectivity of text data within a given dataset to visualize the sentiment distribution....['dataset']{'threshold_subjectivity': {'type': '_empty', 'default': 0.5}, 'threshold_polarity': {'type': '_empty', 'default': 0}}['nlp', 'text_data', 'data_validation']['nlp']
validmind.data_validation.nlp.PunctuationsPunctuationsAnalyzes and visualizes the frequency distribution of punctuation usage in a given text dataset....['dataset']{'count_mode': {'type': '_empty', 'default': 'token'}}['nlp', 'text_data', 'visualization', 'frequency_analysis']['text_classification', 'text_summarization', 'nlp']
validmind.data_validation.nlp.SentimentSentimentAnalyzes the sentiment of text data within a dataset using the VADER sentiment analysis tool....['dataset']{}['nlp', 'text_data', 'data_validation']['nlp']
validmind.data_validation.nlp.StopWordsStop WordsEvaluates and visualizes the frequency of English stop words in a text dataset against a defined threshold....['dataset']{'min_percent_threshold': {'type': 'float', 'default': 0.5}, 'num_words': {'type': 'int', 'default': 25}}['nlp', 'text_data', 'frequency_analysis', 'visualization']['text_classification', 'text_summarization']
validmind.data_validation.nlp.TextDescriptionText DescriptionConducts comprehensive textual analysis on a dataset using NLTK to evaluate various parameters and generate...['dataset']{'unwanted_tokens': {'type': 'set', 'default': {\"s'\", \"'s\", ' ', 'mr', \"''\", 'dollar', 'dr', 'mrs', '``', 's', 'us', 'ms'}}, 'lang': {'type': 'str', 'default': 'english'}}['nlp', 'text_data', 'visualization']['text_classification', 'text_summarization']
validmind.data_validation.nlp.ToxicityToxicityAssesses the toxicity of text data within a dataset to visualize the distribution of toxicity scores....['dataset']{}['nlp', 'text_data', 'data_validation']['nlp']
validmind.model_validation.BertScoreBert ScoreAssesses the quality of machine-generated text using BERTScore metrics and visualizes results through histograms...['dataset', 'model']{'evaluation_model': {'type': '_empty', 'default': 'distilbert-base-uncased'}}['nlp', 'text_data', 'visualization']['text_classification', 'text_summarization']
validmind.model_validation.BleuScoreBleu ScoreEvaluates the quality of machine-generated text using BLEU metrics and visualizes the results through histograms...['dataset', 'model']{}['nlp', 'text_data', 'visualization']['text_classification', 'text_summarization']
validmind.model_validation.ClusterSizeDistributionCluster Size DistributionAssesses the performance of clustering models by comparing the distribution of cluster sizes in model predictions...['dataset', 'model']{}['sklearn', 'model_performance']['clustering']
validmind.model_validation.ContextualRecallContextual RecallEvaluates a Natural Language Generation model's ability to generate contextually relevant and factually correct...['dataset', 'model']{}['nlp', 'text_data', 'visualization']['text_classification', 'text_summarization']
validmind.model_validation.FeaturesAUCFeatures AUCEvaluates the discriminatory power of each individual feature within a binary classification model by calculating...['dataset']{'fontsize': {'type': 'int', 'default': 12}, 'figure_height': {'type': 'int', 'default': 500}}['feature_importance', 'AUC', 'visualization']['classification']
validmind.model_validation.MeteorScoreMeteor ScoreAssesses the quality of machine-generated translations by comparing them to human-produced references using the...['dataset', 'model']{}['nlp', 'text_data', 'visualization']['text_classification', 'text_summarization']
validmind.model_validation.ModelMetadataModel MetadataCompare metadata of different models and generate a summary table with the results....['model']{}['model_training', 'metadata']['regression', 'time_series_forecasting']
validmind.model_validation.ModelPredictionResidualsModel Prediction ResidualsAssesses normality and behavior of residuals in regression models through visualization and statistical tests....['dataset', 'model']{'nbins': {'type': '_empty', 'default': 100}, 'p_value_threshold': {'type': '_empty', 'default': 0.05}, 'start_date': {'type': '_empty', 'default': None}, 'end_date': {'type': '_empty', 'default': None}}['regression']['residual_analysis', 'visualization']
validmind.model_validation.RegardScoreRegard ScoreAssesses the sentiment and potential biases in text generated by NLP models by computing and visualizing regard...['dataset', 'model']{}['nlp', 'text_data', 'visualization']['text_classification', 'text_summarization']
validmind.model_validation.RegressionResidualsPlotRegression Residuals PlotEvaluates regression model performance using residual distribution and actual vs. predicted plots....['model', 'dataset']{'bin_size': {'type': 'float', 'default': 0.1}}['model_performance', 'visualization']['regression']
validmind.model_validation.RougeScoreRouge ScoreAssesses the quality of machine-generated text using ROUGE metrics and visualizes the results to provide...['dataset', 'model']{'metric': {'type': '_empty', 'default': 'rouge-1'}}['nlp', 'text_data', 'visualization']['text_classification', 'text_summarization']
validmind.model_validation.TimeSeriesPredictionWithCITime Series Prediction With CIAssesses predictive accuracy and uncertainty in time series models, highlighting breaches beyond confidence...['dataset', 'model']{'confidence': {'type': '_empty', 'default': 0.95}}['model_predictions', 'visualization']['regression', 'time_series_forecasting']
validmind.model_validation.TimeSeriesPredictionsPlotTime Series Predictions PlotPlot actual vs predicted values for time series data and generate a visual comparison for the model....['dataset', 'model']{}['model_predictions', 'visualization']['regression', 'time_series_forecasting']
validmind.model_validation.TimeSeriesR2SquareBySegmentsTime Series R2 Square By SegmentsEvaluates the R-Squared values of regression models over specified time segments in time series data to assess...['dataset', 'model']{'segments': {'type': '_empty', 'default': None}}['model_performance', 'sklearn']['regression', 'time_series_forecasting']
validmind.model_validation.TokenDisparityToken DisparityEvaluates the token disparity between reference and generated texts, visualizing the results through histograms and...['dataset', 'model']{}['nlp', 'text_data', 'visualization']['text_classification', 'text_summarization']
validmind.model_validation.ToxicityScoreToxicity ScoreAssesses the toxicity levels of texts generated by NLP models to identify and mitigate harmful or offensive content....['dataset', 'model']{}['nlp', 'text_data', 'visualization']['text_classification', 'text_summarization']
validmind.model_validation.embeddings.ClusterDistributionCluster DistributionAssesses the distribution of text embeddings across clusters produced by a model using KMeans clustering....['model', 'dataset']{'num_clusters': {'type': 'int', 'default': 5}}['llm', 'text_data', 'embeddings', 'visualization']['feature_extraction']
validmind.model_validation.embeddings.CosineSimilarityComparisonCosine Similarity ComparisonAssesses the similarity between embeddings generated by different models using Cosine Similarity, providing both...['dataset', 'models']{}['visualization', 'dimensionality_reduction', 'embeddings']['text_qa', 'text_generation', 'text_summarization']
validmind.model_validation.embeddings.CosineSimilarityDistributionCosine Similarity DistributionAssesses the similarity between predicted text embeddings from a model using a Cosine Similarity distribution...['dataset', 'model']{}['llm', 'text_data', 'embeddings', 'visualization']['feature_extraction']
validmind.model_validation.embeddings.CosineSimilarityHeatmapCosine Similarity HeatmapGenerates an interactive heatmap to visualize the cosine similarities among embeddings derived from a given model....['dataset', 'model']{'title': {'type': '_empty', 'default': 'Cosine Similarity Matrix'}, 'color': {'type': '_empty', 'default': 'Cosine Similarity'}, 'xaxis_title': {'type': '_empty', 'default': 'Index'}, 'yaxis_title': {'type': '_empty', 'default': 'Index'}, 'color_scale': {'type': '_empty', 'default': 'Blues'}}['visualization', 'dimensionality_reduction', 'embeddings']['text_qa', 'text_generation', 'text_summarization']
validmind.model_validation.embeddings.DescriptiveAnalyticsDescriptive AnalyticsEvaluates statistical properties of text embeddings in an ML model via mean, median, and standard deviation...['dataset', 'model']{}['llm', 'text_data', 'embeddings', 'visualization']['feature_extraction']
validmind.model_validation.embeddings.EmbeddingsVisualization2DEmbeddings Visualization2 DVisualizes 2D representation of text embeddings generated by a model using t-SNE technique....['model', 'dataset']{'cluster_column': {'type': None, 'default': None}, 'perplexity': {'type': 'int', 'default': 30}}['llm', 'text_data', 'embeddings', 'visualization']['feature_extraction']
validmind.model_validation.embeddings.EuclideanDistanceComparisonEuclidean Distance ComparisonAssesses and visualizes the dissimilarity between model embeddings using Euclidean distance, providing insights...['dataset', 'models']{}['visualization', 'dimensionality_reduction', 'embeddings']['text_qa', 'text_generation', 'text_summarization']
validmind.model_validation.embeddings.EuclideanDistanceHeatmapEuclidean Distance HeatmapGenerates an interactive heatmap to visualize the Euclidean distances among embeddings derived from a given model....['dataset', 'model']{'title': {'type': '_empty', 'default': 'Euclidean Distance Matrix'}, 'color': {'type': '_empty', 'default': 'Euclidean Distance'}, 'xaxis_title': {'type': '_empty', 'default': 'Index'}, 'yaxis_title': {'type': '_empty', 'default': 'Index'}, 'color_scale': {'type': '_empty', 'default': 'Blues'}}['visualization', 'dimensionality_reduction', 'embeddings']['text_qa', 'text_generation', 'text_summarization']
validmind.model_validation.embeddings.PCAComponentsPairwisePlotsPCA Components Pairwise PlotsGenerates scatter plots for pairwise combinations of principal component analysis (PCA) components of model...['dataset', 'model']{'n_components': {'type': '_empty', 'default': 3}}['visualization', 'dimensionality_reduction', 'embeddings']['text_qa', 'text_generation', 'text_summarization']
validmind.model_validation.embeddings.StabilityAnalysisKeywordStability Analysis KeywordEvaluates robustness of embedding models to keyword swaps in the test dataset....['dataset', 'model']{'keyword_dict': {'type': None, 'default': None}, 'mean_similarity_threshold': {'type': 'float', 'default': 0.7}}['llm', 'text_data', 'embeddings', 'visualization']['feature_extraction']
validmind.model_validation.embeddings.StabilityAnalysisRandomNoiseStability Analysis Random NoiseAssesses the robustness of text embeddings models to random noise introduced via text perturbations....['dataset', 'model']{'probability': {'type': 'float', 'default': 0.02}, 'mean_similarity_threshold': {'type': 'float', 'default': 0.7}}['llm', 'text_data', 'embeddings', 'visualization']['feature_extraction']
validmind.model_validation.embeddings.StabilityAnalysisSynonymsStability Analysis SynonymsEvaluates the stability of text embeddings models when words in test data are replaced by their synonyms randomly....['dataset', 'model']{'probability': {'type': 'float', 'default': 0.02}, 'mean_similarity_threshold': {'type': 'float', 'default': 0.7}}['llm', 'text_data', 'embeddings', 'visualization']['feature_extraction']
validmind.model_validation.embeddings.StabilityAnalysisTranslationStability Analysis TranslationEvaluates robustness of text embeddings models to noise introduced by translating the original text to another...['dataset', 'model']{'source_lang': {'type': 'str', 'default': 'en'}, 'target_lang': {'type': 'str', 'default': 'fr'}, 'mean_similarity_threshold': {'type': 'float', 'default': 0.7}}['llm', 'text_data', 'embeddings', 'visualization']['feature_extraction']
validmind.model_validation.embeddings.TSNEComponentsPairwisePlotsTSNE Components Pairwise PlotsCreates scatter plots for pairwise combinations of t-SNE components to visualize embeddings and highlight potential...['dataset', 'model']{'n_components': {'type': '_empty', 'default': 2}, 'perplexity': {'type': '_empty', 'default': 30}, 'title': {'type': '_empty', 'default': 't-SNE'}}['visualization', 'dimensionality_reduction', 'embeddings']['text_qa', 'text_generation', 'text_summarization']
validmind.model_validation.ragas.AnswerCorrectnessAnswer CorrectnessEvaluates the correctness of answers in a dataset with respect to the provided ground...['dataset']{'user_input_column': {'type': '_empty', 'default': 'user_input'}, 'response_column': {'type': '_empty', 'default': 'response'}, 'reference_column': {'type': '_empty', 'default': 'reference'}}['ragas', 'llm']['text_qa', 'text_generation', 'text_summarization']
validmind.model_validation.ragas.AspectCriticAspect CriticEvaluates generations against the following aspects: harmfulness, maliciousness,...['dataset']{'user_input_column': {'type': '_empty', 'default': 'user_input'}, 'response_column': {'type': '_empty', 'default': 'response'}, 'retrieved_contexts_column': {'type': '_empty', 'default': None}, 'aspects': {'type': 'list', 'default': ['coherence', 'conciseness', 'correctness', 'harmfulness', 'maliciousness']}, 'additional_aspects': {'type': 'list', 'default': None}}['ragas', 'llm', 'qualitative']['text_summarization', 'text_generation', 'text_qa']
validmind.model_validation.ragas.ContextEntityRecallContext Entity RecallEvaluates the context entity recall for dataset entries and visualizes the results....['dataset']{'retrieved_contexts_column': {'type': 'str', 'default': 'retrieved_contexts'}, 'reference_column': {'type': 'str', 'default': 'reference'}}['ragas', 'llm', 'retrieval_performance']['text_qa', 'text_generation', 'text_summarization']
validmind.model_validation.ragas.ContextPrecisionContext PrecisionContext Precision is a metric that evaluates whether all of the ground-truth...['dataset']{'user_input_column': {'type': 'str', 'default': 'user_input'}, 'retrieved_contexts_column': {'type': 'str', 'default': 'retrieved_contexts'}, 'reference_column': {'type': 'str', 'default': 'reference'}}['ragas', 'llm', 'retrieval_performance']['text_qa', 'text_generation', 'text_summarization', 'text_classification']
validmind.model_validation.ragas.ContextPrecisionWithoutReferenceContext Precision Without ReferenceContext Precision Without Reference is a metric used to evaluate the relevance of...['dataset']{'user_input_column': {'type': 'str', 'default': 'user_input'}, 'retrieved_contexts_column': {'type': 'str', 'default': 'retrieved_contexts'}, 'response_column': {'type': 'str', 'default': 'response'}}['ragas', 'llm', 'retrieval_performance']['text_qa', 'text_generation', 'text_summarization', 'text_classification']
validmind.model_validation.ragas.ContextRecallContext RecallContext recall measures the extent to which the retrieved context aligns with the...['dataset']{'user_input_column': {'type': 'str', 'default': 'user_input'}, 'retrieved_contexts_column': {'type': 'str', 'default': 'retrieved_contexts'}, 'reference_column': {'type': 'str', 'default': 'reference'}}['ragas', 'llm', 'retrieval_performance']['text_qa', 'text_generation', 'text_summarization', 'text_classification']
validmind.model_validation.ragas.FaithfulnessFaithfulnessEvaluates the faithfulness of the generated answers with respect to retrieved contexts....['dataset']{'user_input_column': {'type': '_empty', 'default': 'user_input'}, 'response_column': {'type': '_empty', 'default': 'response'}, 'retrieved_contexts_column': {'type': '_empty', 'default': 'retrieved_contexts'}}['ragas', 'llm', 'rag_performance']['text_qa', 'text_generation', 'text_summarization']
validmind.model_validation.ragas.NoiseSensitivityNoise SensitivityAssesses the sensitivity of a Large Language Model (LLM) to noise in retrieved context by measuring how often it...['dataset']{'response_column': {'type': '_empty', 'default': 'response'}, 'retrieved_contexts_column': {'type': '_empty', 'default': 'retrieved_contexts'}, 'reference_column': {'type': '_empty', 'default': 'reference'}, 'focus': {'type': '_empty', 'default': 'relevant'}, 'user_input_column': {'type': '_empty', 'default': 'user_input'}}['ragas', 'llm', 'rag_performance']['text_qa', 'text_generation', 'text_summarization']
validmind.model_validation.ragas.ResponseRelevancyResponse RelevancyAssesses how pertinent the generated answer is to the given prompt....['dataset']{'user_input_column': {'type': '_empty', 'default': 'user_input'}, 'retrieved_contexts_column': {'type': '_empty', 'default': None}, 'response_column': {'type': '_empty', 'default': 'response'}}['ragas', 'llm', 'rag_performance']['text_qa', 'text_generation', 'text_summarization']
validmind.model_validation.ragas.SemanticSimilaritySemantic SimilarityCalculates the semantic similarity between generated responses and ground truths...['dataset']{'response_column': {'type': '_empty', 'default': 'response'}, 'reference_column': {'type': '_empty', 'default': 'reference'}}['ragas', 'llm']['text_qa', 'text_generation', 'text_summarization']
validmind.model_validation.sklearn.AdjustedMutualInformationAdjusted Mutual InformationEvaluates clustering model performance by measuring mutual information between true and predicted labels, adjusting...['model', 'dataset']{}['sklearn', 'model_performance', 'clustering']['clustering']
validmind.model_validation.sklearn.AdjustedRandIndexAdjusted Rand IndexMeasures the similarity between two data clusters using the Adjusted Rand Index (ARI) metric in clustering machine...['model', 'dataset']{}['sklearn', 'model_performance', 'clustering']['clustering']
validmind.model_validation.sklearn.CalibrationCurveCalibration CurveEvaluates the calibration of probability estimates by comparing predicted probabilities against observed...['model', 'dataset']{'n_bins': {'type': 'int', 'default': 10}}['sklearn', 'model_performance', 'classification']['classification']
validmind.model_validation.sklearn.ClassifierPerformanceClassifier PerformanceEvaluates performance of binary or multiclass classification models using precision, recall, F1-Score, accuracy,...['dataset', 'model']{'average': {'type': 'str', 'default': 'macro'}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance']['classification', 'text_classification']
validmind.model_validation.sklearn.ClassifierThresholdOptimizationClassifier Threshold OptimizationAnalyzes and visualizes different threshold optimization methods for binary classification models....['dataset', 'model']{'methods': {'type': None, 'default': None}, 'target_recall': {'type': None, 'default': None}}['model_validation', 'threshold_optimization', 'classification_metrics']['classification']
validmind.model_validation.sklearn.ClusterCosineSimilarityCluster Cosine SimilarityMeasures the intra-cluster similarity of a clustering model using cosine similarity....['model', 'dataset']{}['sklearn', 'model_performance', 'clustering']['clustering']
validmind.model_validation.sklearn.ClusterPerformanceMetricsCluster Performance MetricsEvaluates the performance of clustering machine learning models using multiple established metrics....['model', 'dataset']{}['sklearn', 'model_performance', 'clustering']['clustering']
validmind.model_validation.sklearn.CompletenessScoreCompleteness ScoreEvaluates a clustering model's capacity to categorize instances from a single class into the same cluster....['model', 'dataset']{}['sklearn', 'model_performance', 'clustering']['clustering']
validmind.model_validation.sklearn.ConfusionMatrixConfusion MatrixEvaluates and visually represents the classification ML model's predictive performance using a Confusion Matrix...['dataset', 'model']{'threshold': {'type': 'float', 'default': 0.5}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.model_validation.sklearn.FeatureImportanceFeature ImportanceCompute feature importance scores for a given model and generate a summary table...['dataset', 'model']{'num_features': {'type': 'int', 'default': 3}}['model_explainability', 'sklearn']['regression', 'time_series_forecasting']
validmind.model_validation.sklearn.FowlkesMallowsScoreFowlkes Mallows ScoreEvaluates the similarity between predicted and actual cluster assignments in a model using the Fowlkes-Mallows...['dataset', 'model']{}['sklearn', 'model_performance']['clustering']
validmind.model_validation.sklearn.HomogeneityScoreHomogeneity ScoreAssesses clustering homogeneity by comparing true and predicted labels, scoring from 0 (heterogeneous) to 1...['dataset', 'model']{}['sklearn', 'model_performance']['clustering']
validmind.model_validation.sklearn.HyperParametersTuningHyper Parameters TuningPerforms exhaustive grid search over specified parameter ranges to find optimal model configurations...['model', 'dataset']{'param_grid': {'type': 'dict', 'default': None}, 'scoring': {'type': None, 'default': None}, 'thresholds': {'type': None, 'default': None}, 'fit_params': {'type': 'dict', 'default': None}}['sklearn', 'model_performance']['clustering', 'classification']
validmind.model_validation.sklearn.KMeansClustersOptimizationK Means Clusters OptimizationOptimizes the number of clusters in K-means models using Elbow and Silhouette methods....['model', 'dataset']{'n_clusters': {'type': None, 'default': None}}['sklearn', 'model_performance', 'kmeans']['clustering']
validmind.model_validation.sklearn.MinimumAccuracyMinimum AccuracyChecks if the model's prediction accuracy meets or surpasses a specified threshold....['dataset', 'model']{'min_threshold': {'type': 'float', 'default': 0.7}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance']['classification', 'text_classification']
validmind.model_validation.sklearn.MinimumF1ScoreMinimum F1 ScoreAssesses if the model's F1 score on the validation set meets a predefined minimum threshold, ensuring balanced...['dataset', 'model']{'min_threshold': {'type': 'float', 'default': 0.5}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance']['classification', 'text_classification']
validmind.model_validation.sklearn.MinimumROCAUCScoreMinimum ROCAUC ScoreValidates model by checking if the ROC AUC score meets or surpasses a specified threshold....['dataset', 'model']{'min_threshold': {'type': 'float', 'default': 0.5}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance']['classification', 'text_classification']
validmind.model_validation.sklearn.ModelParametersModel ParametersExtracts and displays model parameters in a structured format for transparency and reproducibility....['model']{'model_params': {'type': '_empty', 'default': None}}['model_training', 'metadata']['classification', 'regression']
validmind.model_validation.sklearn.ModelsPerformanceComparisonModels Performance ComparisonEvaluates and compares the performance of multiple Machine Learning models using various metrics like accuracy,...['dataset', 'models']{}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance', 'model_comparison']['classification', 'text_classification']
validmind.model_validation.sklearn.OverfitDiagnosisOverfit DiagnosisAssesses potential overfitting in a model's predictions, identifying regions where performance between training and...['model', 'datasets']{'metric': {'type': 'str', 'default': None}, 'cut_off_threshold': {'type': 'float', 'default': 0.04}}['sklearn', 'binary_classification', 'multiclass_classification', 'linear_regression', 'model_diagnosis']['classification', 'regression']
validmind.model_validation.sklearn.PermutationFeatureImportancePermutation Feature ImportanceAssesses the significance of each feature in a model by evaluating the impact on model performance when feature...['model', 'dataset']{'fontsize': {'type': None, 'default': None}, 'figure_height': {'type': None, 'default': None}}['sklearn', 'binary_classification', 'multiclass_classification', 'feature_importance', 'visualization']['classification', 'text_classification']
validmind.model_validation.sklearn.PopulationStabilityIndexPopulation Stability IndexAssesses the Population Stability Index (PSI) to quantify the stability of an ML model's predictions across...['datasets', 'model']{'num_bins': {'type': 'int', 'default': 10}, 'mode': {'type': 'str', 'default': 'fixed'}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance']['classification', 'text_classification']
validmind.model_validation.sklearn.PrecisionRecallCurvePrecision Recall CurveEvaluates the precision-recall trade-off for binary classification models and visualizes the Precision-Recall curve....['model', 'dataset']{}['sklearn', 'binary_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.model_validation.sklearn.ROCCurveROC CurveEvaluates binary classification model performance by generating and plotting the Receiver Operating Characteristic...['model', 'dataset']{}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.model_validation.sklearn.RegressionErrorsRegression ErrorsAssesses the performance and error distribution of a regression model using various error metrics....['model', 'dataset']{}['sklearn', 'model_performance']['regression', 'classification']
validmind.model_validation.sklearn.RegressionErrorsComparisonRegression Errors ComparisonAssesses multiple regression error metrics to compare model performance across different datasets, emphasizing...['datasets', 'models']{}['model_performance', 'sklearn']['regression', 'time_series_forecasting']
validmind.model_validation.sklearn.RegressionPerformanceRegression PerformanceEvaluates the performance of a regression model using five different metrics: MAE, MSE, RMSE, MAPE, and MBD....['model', 'dataset']{}['sklearn', 'model_performance']['regression']
validmind.model_validation.sklearn.RegressionR2SquareRegression R2 SquareAssesses the overall goodness-of-fit of a regression model by evaluating R-squared (R2) and Adjusted R-squared (Adj...['dataset', 'model']{}['sklearn', 'model_performance']['regression']
validmind.model_validation.sklearn.RegressionR2SquareComparisonRegression R2 Square ComparisonCompares R-Squared and Adjusted R-Squared values for different regression models across multiple datasets to assess...['datasets', 'models']{}['model_performance', 'sklearn']['regression', 'time_series_forecasting']
validmind.model_validation.sklearn.RobustnessDiagnosisRobustness DiagnosisAssesses the robustness of a machine learning model by evaluating performance decay under noisy conditions....['datasets', 'model']{'metric': {'type': 'str', 'default': None}, 'scaling_factor_std_dev_list': {'type': None, 'default': [0.1, 0.2, 0.3, 0.4, 0.5]}, 'performance_decay_threshold': {'type': 'float', 'default': 0.05}}['sklearn', 'model_diagnosis', 'visualization']['classification', 'regression']
validmind.model_validation.sklearn.SHAPGlobalImportanceSHAP Global ImportanceEvaluates and visualizes global feature importance using SHAP values for model explanation and risk identification....['model', 'dataset']{'kernel_explainer_samples': {'type': 'int', 'default': 10}, 'tree_or_linear_explainer_samples': {'type': 'int', 'default': 200}, 'class_of_interest': {'type': None, 'default': None}}['sklearn', 'binary_classification', 'multiclass_classification', 'feature_importance', 'visualization']['classification', 'text_classification']
validmind.model_validation.sklearn.ScoreProbabilityAlignmentScore Probability AlignmentAnalyzes the alignment between credit scores and predicted probabilities....['model', 'dataset']{'score_column': {'type': 'str', 'default': 'score'}, 'n_bins': {'type': 'int', 'default': 10}}['visualization', 'credit_risk', 'calibration']['classification']
validmind.model_validation.sklearn.SilhouettePlotSilhouette PlotCalculates and visualizes Silhouette Score, assessing the degree of data point suitability to its cluster in ML...['model', 'dataset']{}['sklearn', 'model_performance']['clustering']
validmind.model_validation.sklearn.TrainingTestDegradationTraining Test DegradationTests if model performance degradation between training and test datasets exceeds a predefined threshold....['datasets', 'model']{'max_threshold': {'type': 'float', 'default': 0.1}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.model_validation.sklearn.VMeasureV MeasureEvaluates homogeneity and completeness of a clustering model using the V Measure Score....['dataset', 'model']{}['sklearn', 'model_performance']['clustering']
validmind.model_validation.sklearn.WeakspotsDiagnosisWeakspots DiagnosisIdentifies and visualizes weak spots in a machine learning model's performance across various sections of the...['datasets', 'model']{'features_columns': {'type': None, 'default': None}, 'metrics': {'type': None, 'default': None}, 'thresholds': {'type': None, 'default': None}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_diagnosis', 'visualization']['classification', 'text_classification']
validmind.model_validation.statsmodels.AutoARIMAAuto ARIMAEvaluates ARIMA models for time-series forecasting, ranking them using Bayesian and Akaike Information Criteria....['model', 'dataset']{}['time_series_data', 'forecasting', 'model_selection', 'statsmodels']['regression']
validmind.model_validation.statsmodels.CumulativePredictionProbabilitiesCumulative Prediction ProbabilitiesVisualizes cumulative probabilities of positive and negative classes for both training and testing in classification models....['dataset', 'model']{'title': {'type': '_empty', 'default': 'Cumulative Probabilities'}}['visualization', 'credit_risk']['classification']
validmind.model_validation.statsmodels.DurbinWatsonTestDurbin Watson TestAssesses autocorrelation in time series data features using the Durbin-Watson statistic....['dataset', 'model']{'threshold': {'type': '_empty', 'default': [1.5, 2.5]}}['time_series_data', 'forecasting', 'statistical_test', 'statsmodels']['regression']
validmind.model_validation.statsmodels.GINITableGINI TableEvaluates classification model performance using AUC, GINI, and KS metrics for training and test datasets....['dataset', 'model']{}['model_performance']['classification']
validmind.model_validation.statsmodels.KolmogorovSmirnovKolmogorov SmirnovAssesses whether each feature in the dataset aligns with a normal distribution using the Kolmogorov-Smirnov test....['model', 'dataset']{'dist': {'type': 'str', 'default': 'norm'}}['tabular_data', 'data_distribution', 'statistical_test', 'statsmodels']['classification', 'regression']
validmind.model_validation.statsmodels.LillieforsLillieforsAssesses the normality of feature distributions in an ML model's training dataset using the Lilliefors test....['dataset']{}['tabular_data', 'data_distribution', 'statistical_test', 'statsmodels']['classification', 'regression']
validmind.model_validation.statsmodels.PredictionProbabilitiesHistogramPrediction Probabilities HistogramAssesses the predictive probability distribution for binary classification to evaluate model performance and...['dataset', 'model']{'title': {'type': '_empty', 'default': 'Histogram of Predictive Probabilities'}}['visualization', 'credit_risk']['classification']
validmind.model_validation.statsmodels.RegressionCoeffsRegression CoeffsAssesses the significance and uncertainty of predictor variables in a regression model through visualization of...['model']{}['tabular_data', 'visualization', 'model_training']['regression']
validmind.model_validation.statsmodels.RegressionFeatureSignificanceRegression Feature SignificanceAssesses and visualizes the statistical significance of features in a regression model....['model']{'fontsize': {'type': 'int', 'default': 10}, 'p_threshold': {'type': 'float', 'default': 0.05}}['statistical_test', 'model_interpretation', 'visualization', 'feature_importance']['regression']
validmind.model_validation.statsmodels.RegressionModelForecastPlotRegression Model Forecast PlotGenerates plots to visually compare the forecasted outcomes of a regression model against actual observed values over...['model', 'dataset']{'start_date': {'type': None, 'default': None}, 'end_date': {'type': None, 'default': None}}['time_series_data', 'forecasting', 'visualization']['regression']
validmind.model_validation.statsmodels.RegressionModelForecastPlotLevelsRegression Model Forecast Plot LevelsAssesses the alignment between forecasted and observed values in regression models through visual plots...['model', 'dataset']{}['time_series_data', 'forecasting', 'visualization']['regression']
validmind.model_validation.statsmodels.RegressionModelSensitivityPlotRegression Model Sensitivity PlotAssesses the sensitivity of a regression model to changes in independent variables by applying shocks and...['dataset', 'model']{'shocks': {'type': None, 'default': [0.1]}, 'transformation': {'type': None, 'default': None}}['senstivity_analysis', 'visualization']['regression']
validmind.model_validation.statsmodels.RegressionModelSummaryRegression Model SummaryEvaluates regression model performance using metrics including R-Squared, Adjusted R-Squared, MSE, and RMSE....['dataset', 'model']{}['model_performance', 'regression']['regression']
validmind.model_validation.statsmodels.RegressionPermutationFeatureImportanceRegression Permutation Feature ImportanceAssesses the significance of each feature in a model by evaluating the impact on model performance when feature...['dataset', 'model']{'fontsize': {'type': 'int', 'default': 12}, 'figure_height': {'type': 'int', 'default': 500}}['statsmodels', 'feature_importance', 'visualization']['regression']
validmind.model_validation.statsmodels.ScorecardHistogramScorecard HistogramThe Scorecard Histogram test evaluates the distribution of credit scores between default and non-default instances,...['dataset']{'title': {'type': '_empty', 'default': 'Histogram of Scores'}, 'score_column': {'type': '_empty', 'default': 'score'}}['visualization', 'credit_risk', 'logistic_regression']['classification']
validmind.ongoing_monitoring.CalibrationCurveDriftCalibration Curve DriftEvaluates changes in probability calibration between reference and monitoring datasets....['datasets', 'model']{'n_bins': {'type': 'int', 'default': 10}, 'drift_pct_threshold': {'type': 'float', 'default': 20}}['sklearn', 'binary_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.ongoing_monitoring.ClassDiscriminationDriftClass Discrimination DriftCompares classification discrimination metrics between reference and monitoring datasets....['datasets', 'model']{'drift_pct_threshold': {'type': '_empty', 'default': 20}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance']['classification', 'text_classification']
validmind.ongoing_monitoring.ClassImbalanceDriftClass Imbalance DriftEvaluates drift in class distribution between reference and monitoring datasets....['datasets']{'drift_pct_threshold': {'type': 'float', 'default': 5.0}, 'title': {'type': 'str', 'default': 'Class Distribution Drift'}}['tabular_data', 'binary_classification', 'multiclass_classification']['classification']
validmind.ongoing_monitoring.ClassificationAccuracyDriftClassification Accuracy DriftCompares classification accuracy metrics between reference and monitoring datasets....['datasets', 'model']{'drift_pct_threshold': {'type': '_empty', 'default': 20}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance']['classification', 'text_classification']
validmind.ongoing_monitoring.ConfusionMatrixDriftConfusion Matrix DriftCompares confusion matrix metrics between reference and monitoring datasets....['datasets', 'model']{'drift_pct_threshold': {'type': '_empty', 'default': 20}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance']['classification', 'text_classification']
validmind.ongoing_monitoring.CumulativePredictionProbabilitiesDriftCumulative Prediction Probabilities DriftCompares cumulative prediction probability distributions between reference and monitoring datasets....['datasets', 'model']{}['visualization', 'credit_risk']['classification']
validmind.ongoing_monitoring.FeatureDriftFeature DriftEvaluates changes in feature distribution over time to identify potential model drift....['datasets']{'bins': {'type': '_empty', 'default': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]}, 'feature_columns': {'type': '_empty', 'default': None}, 'psi_threshold': {'type': '_empty', 'default': 0.2}}['visualization']['monitoring']
validmind.ongoing_monitoring.PredictionAcrossEachFeaturePrediction Across Each FeatureAssesses differences in model predictions across individual features between reference and monitoring datasets...['datasets', 'model']{}['visualization']['monitoring']
validmind.ongoing_monitoring.PredictionCorrelationPrediction CorrelationAssesses correlation changes between model predictions from reference and monitoring datasets to detect potential...['datasets', 'model']{'drift_pct_threshold': {'type': '_empty', 'default': 20}}['visualization']['monitoring']
validmind.ongoing_monitoring.PredictionProbabilitiesHistogramDriftPrediction Probabilities Histogram DriftCompares prediction probability distributions between reference and monitoring datasets....['datasets', 'model']{'title': {'type': '_empty', 'default': 'Prediction Probabilities Histogram Drift'}, 'drift_pct_threshold': {'type': 'float', 'default': 20.0}}['visualization', 'credit_risk']['classification']
validmind.ongoing_monitoring.PredictionQuantilesAcrossFeaturesPrediction Quantiles Across FeaturesAssesses differences in model prediction distributions across individual features between reference...['datasets', 'model']{}['visualization']['monitoring']
validmind.ongoing_monitoring.ROCCurveDriftROC Curve DriftCompares ROC curves between reference and monitoring datasets....['datasets', 'model']{}['sklearn', 'binary_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.ongoing_monitoring.ScoreBandsDriftScore Bands DriftAnalyzes drift in population distribution and default rates across score bands....['datasets', 'model']{'score_column': {'type': 'str', 'default': 'score'}, 'score_bands': {'type': 'list', 'default': None}, 'drift_threshold': {'type': 'float', 'default': 20.0}}['visualization', 'credit_risk', 'scorecard']['classification']
validmind.ongoing_monitoring.ScorecardHistogramDriftScorecard Histogram DriftCompares score distributions between reference and monitoring datasets for each class....['datasets']{'score_column': {'type': 'str', 'default': 'score'}, 'title': {'type': 'str', 'default': 'Scorecard Histogram Drift'}, 'drift_pct_threshold': {'type': 'float', 'default': 20.0}}['visualization', 'credit_risk', 'logistic_regression']['classification']
validmind.ongoing_monitoring.TargetPredictionDistributionPlotTarget Prediction Distribution PlotAssesses differences in prediction distributions between a reference dataset and a monitoring dataset to identify...['datasets', 'model']{'drift_pct_threshold': {'type': '_empty', 'default': 20}}['visualization']['monitoring']
validmind.prompt_validation.BiasBiasAssesses potential bias in a Large Language Model by analyzing the distribution and order of exemplars in the...['model']{'min_threshold': {'type': '_empty', 'default': 7}}['llm', 'few_shot']['text_classification', 'text_summarization']
validmind.prompt_validation.ClarityClarityEvaluates and scores the clarity of prompts in a Large Language Model based on specified guidelines....['model']{'min_threshold': {'type': '_empty', 'default': 7}}['llm', 'zero_shot', 'few_shot']['text_classification', 'text_summarization']
validmind.prompt_validation.ConcisenessConcisenessAnalyzes and grades the conciseness of prompts provided to a Large Language Model....['model']{'min_threshold': {'type': '_empty', 'default': 7}}['llm', 'zero_shot', 'few_shot']['text_classification', 'text_summarization']
validmind.prompt_validation.DelimitationDelimitationEvaluates the proper use of delimiters in prompts provided to Large Language Models....['model']{'min_threshold': {'type': '_empty', 'default': 7}}['llm', 'zero_shot', 'few_shot']['text_classification', 'text_summarization']
validmind.prompt_validation.NegativeInstructionNegative InstructionEvaluates and grades the use of affirmative, proactive language over negative instructions in LLM prompts....['model']{'min_threshold': {'type': '_empty', 'default': 7}}['llm', 'zero_shot', 'few_shot']['text_classification', 'text_summarization']
validmind.prompt_validation.RobustnessRobustnessAssesses the robustness of prompts provided to a Large Language Model under varying conditions and contexts. This test...['model', 'dataset']{'num_tests': {'type': '_empty', 'default': 10}}['llm', 'zero_shot', 'few_shot']['text_classification', 'text_summarization']
validmind.prompt_validation.SpecificitySpecificityEvaluates and scores the specificity of prompts provided to a Large Language Model (LLM), based on clarity, detail,...['model']{'min_threshold': {'type': '_empty', 'default': 7}}['llm', 'zero_shot', 'few_shot']['text_classification', 'text_summarization']
validmind.unit_metrics.classification.AccuracyAccuracyCalculates the accuracy of a model['dataset', 'model']{}['classification']['classification']
validmind.unit_metrics.classification.F1F1Calculates the F1 score for a classification model.['model', 'dataset']{}['classification']['classification']
validmind.unit_metrics.classification.PrecisionPrecisionCalculates the precision for a classification model.['model', 'dataset']{}['classification']['classification']
validmind.unit_metrics.classification.ROC_AUCROC AUCCalculates the ROC AUC for a classification model.['model', 'dataset']{}['classification']['classification']
validmind.unit_metrics.classification.RecallRecallCalculates the recall for a classification model.['model', 'dataset']{}['classification']['classification']
validmind.unit_metrics.regression.AdjustedRSquaredScoreAdjusted R Squared ScoreCalculates the adjusted R-squared score for a regression model.['model', 'dataset']{}['regression']['regression']
validmind.unit_metrics.regression.GiniCoefficientGini CoefficientCalculates the Gini coefficient for a regression model.['dataset', 'model']{}['regression']['regression']
validmind.unit_metrics.regression.HuberLossHuber LossCalculates the Huber loss for a regression model.['model', 'dataset']{}['regression']['regression']
validmind.unit_metrics.regression.KolmogorovSmirnovStatisticKolmogorov Smirnov StatisticCalculates the Kolmogorov-Smirnov statistic for a regression model.['dataset', 'model']{}['regression']['regression']
validmind.unit_metrics.regression.MeanAbsoluteErrorMean Absolute ErrorCalculates the mean absolute error for a regression model.['model', 'dataset']{}['regression']['regression']
validmind.unit_metrics.regression.MeanAbsolutePercentageErrorMean Absolute Percentage ErrorCalculates the mean absolute percentage error for a regression model.['model', 'dataset']{}['regression']['regression']
validmind.unit_metrics.regression.MeanBiasDeviationMean Bias DeviationCalculates the mean bias deviation for a regression model.['model', 'dataset']{}['regression']['regression']
validmind.unit_metrics.regression.MeanSquaredErrorMean Squared ErrorCalculates the mean squared error for a regression model.['model', 'dataset']{}['regression']['regression']
validmind.unit_metrics.regression.QuantileLossQuantile LossCalculates the quantile loss for a regression model.['model', 'dataset']{'quantile': {'type': '_empty', 'default': 0.5}}['regression']['regression']
validmind.unit_metrics.regression.RSquaredScoreR Squared ScoreCalculates the R-squared score for a regression model.['model', 'dataset']{}['regression']['regression']
validmind.unit_metrics.regression.RootMeanSquaredErrorRoot Mean Squared ErrorCalculates the root mean squared error for a regression model.['model', 'dataset']{}['regression']['regression']validmind.data_validation.ACFandPACFPlotAC Fand PACF PlotAnalyzes time series data using Autocorrelation Function (ACF) and Partial Autocorrelation Function (PACF) plots to...TrueFalse['dataset']{}['time_series_data', 'forecasting', 'statistical_test', 'visualization']['regression']
validmind.data_validation.ADFADFAssesses the stationarity of a time series dataset using the Augmented Dickey-Fuller (ADF) test....FalseTrue['dataset']{}['time_series_data', 'statsmodels', 'forecasting', 'statistical_test', 'stationarity']['regression']
validmind.data_validation.AutoARAuto ARAutomatically identifies the optimal Autoregressive (AR) order for a time series using BIC and AIC criteria....FalseTrue['dataset']{'max_ar_order': {'type': 'int', 'default': 3}}['time_series_data', 'statsmodels', 'forecasting', 'statistical_test']['regression']
validmind.data_validation.AutoMAAuto MAAutomatically selects the optimal Moving Average (MA) order for each variable in a time series dataset based on...FalseTrue['dataset']{'max_ma_order': {'type': 'int', 'default': 3}}['time_series_data', 'statsmodels', 'forecasting', 'statistical_test']['regression']
validmind.data_validation.AutoStationarityAuto StationarityAutomates Augmented Dickey-Fuller test to assess stationarity across multiple time series in a DataFrame....FalseTrue['dataset']{'max_order': {'type': 'int', 'default': 5}, 'threshold': {'type': 'float', 'default': 0.05}}['time_series_data', 'statsmodels', 'forecasting', 'statistical_test']['regression']
validmind.data_validation.BivariateScatterPlotsBivariate Scatter PlotsGenerates bivariate scatterplots to visually inspect relationships between pairs of numerical predictor variables...TrueFalse['dataset']{}['tabular_data', 'numerical_data', 'visualization']['classification']
validmind.data_validation.BoxPierceBox PierceDetects autocorrelation in time-series data through the Box-Pierce test to validate model performance....FalseTrue['dataset']{}['time_series_data', 'forecasting', 'statistical_test', 'statsmodels']['regression']
validmind.data_validation.ChiSquaredFeaturesTableChi Squared Features TableAssesses the statistical association between categorical features and a target variable using the Chi-Squared test....FalseTrue['dataset']{'p_threshold': {'type': '_empty', 'default': 0.05}}['tabular_data', 'categorical_data', 'statistical_test']['classification']
validmind.data_validation.ClassImbalanceClass ImbalanceEvaluates and quantifies class distribution imbalance in a dataset used by a machine learning model....TrueTrue['dataset']{'min_percent_threshold': {'type': 'int', 'default': 10}}['tabular_data', 'binary_classification', 'multiclass_classification', 'data_quality']['classification']
validmind.data_validation.DatasetDescriptionDataset DescriptionProvides comprehensive analysis and statistical summaries of each column in a machine learning model's dataset....FalseTrue['dataset']{}['tabular_data', 'time_series_data', 'text_data']['classification', 'regression', 'text_classification', 'text_summarization']
validmind.data_validation.DatasetSplitDataset SplitEvaluates and visualizes the distribution proportions among training, testing, and validation datasets of an ML...FalseTrue['datasets']{}['tabular_data', 'time_series_data', 'text_data']['classification', 'regression', 'text_classification', 'text_summarization']
validmind.data_validation.DescriptiveStatisticsDescriptive StatisticsPerforms a detailed descriptive statistical analysis of both numerical and categorical data within a model's...FalseTrue['dataset']{}['tabular_data', 'time_series_data', 'data_quality']['classification', 'regression']
validmind.data_validation.DickeyFullerGLSDickey Fuller GLSAssesses stationarity in time series data using the Dickey-Fuller GLS test to determine the order of integration....FalseTrue['dataset']{}['time_series_data', 'forecasting', 'unit_root_test']['regression']
validmind.data_validation.DuplicatesDuplicatesTests dataset for duplicate entries, ensuring model reliability via data quality verification....FalseTrue['dataset']{'min_threshold': {'type': '_empty', 'default': 1}}['tabular_data', 'data_quality', 'text_data']['classification', 'regression']
validmind.data_validation.EngleGrangerCointEngle Granger CointAssesses the degree of co-movement between pairs of time series data using the Engle-Granger cointegration test....FalseTrue['dataset']{'threshold': {'type': 'float', 'default': 0.05}}['time_series_data', 'statistical_test', 'forecasting']['regression']
validmind.data_validation.FeatureTargetCorrelationPlotFeature Target Correlation PlotVisualizes the correlation between input features and the model's target output in a color-coded horizontal bar...TrueFalse['dataset']{'fig_height': {'type': '_empty', 'default': 600}}['tabular_data', 'visualization', 'correlation']['classification', 'regression']
validmind.data_validation.HighCardinalityHigh CardinalityAssesses the number of unique values in categorical columns to detect high cardinality and potential overfitting....FalseTrue['dataset']{'num_threshold': {'type': 'int', 'default': 100}, 'percent_threshold': {'type': 'float', 'default': 0.1}, 'threshold_type': {'type': 'str', 'default': 'percent'}}['tabular_data', 'data_quality', 'categorical_data']['classification', 'regression']
validmind.data_validation.HighPearsonCorrelationHigh Pearson CorrelationIdentifies highly correlated feature pairs in a dataset suggesting feature redundancy or multicollinearity....FalseTrue['dataset']{'max_threshold': {'type': 'float', 'default': 0.3}, 'top_n_correlations': {'type': 'int', 'default': 10}, 'feature_columns': {'type': 'list', 'default': None}}['tabular_data', 'data_quality', 'correlation']['classification', 'regression']
validmind.data_validation.IQROutliersBarPlotIQR Outliers Bar PlotVisualizes outlier distribution across percentiles in numerical data using the Interquartile Range (IQR) method....TrueFalse['dataset']{'threshold': {'type': 'float', 'default': 1.5}, 'fig_width': {'type': 'int', 'default': 800}}['tabular_data', 'visualization', 'numerical_data']['classification', 'regression']
validmind.data_validation.IQROutliersTableIQR Outliers TableDetermines and summarizes outliers in numerical features using the Interquartile Range method....FalseTrue['dataset']{'threshold': {'type': 'float', 'default': 1.5}}['tabular_data', 'numerical_data']['classification', 'regression']
validmind.data_validation.IsolationForestOutliersIsolation Forest OutliersDetects outliers in a dataset using the Isolation Forest algorithm and visualizes results through scatter plots....TrueFalse['dataset']{'random_state': {'type': 'int', 'default': 0}, 'contamination': {'type': 'float', 'default': 0.1}, 'feature_columns': {'type': 'list', 'default': None}}['tabular_data', 'anomaly_detection']['classification']
validmind.data_validation.JarqueBeraJarque BeraAssesses normality of dataset features in an ML model using the Jarque-Bera test....FalseTrue['dataset']{}['tabular_data', 'data_distribution', 'statistical_test', 'statsmodels']['classification', 'regression']
validmind.data_validation.KPSSKPSSAssesses the stationarity of time-series data in a machine learning model using the KPSS unit root test....FalseTrue['dataset']{}['time_series_data', 'stationarity', 'unit_root_test', 'statsmodels']['data_validation']
validmind.data_validation.LJungBoxL Jung BoxAssesses autocorrelations in dataset features by performing a Ljung-Box test on each feature....FalseTrue['dataset']{}['time_series_data', 'forecasting', 'statistical_test', 'statsmodels']['regression']
validmind.data_validation.LaggedCorrelationHeatmapLagged Correlation HeatmapAssesses and visualizes correlation between target variable and lagged independent variables in a time-series...TrueFalse['dataset']{'num_lags': {'type': 'int', 'default': 10}}['time_series_data', 'visualization']['regression']
validmind.data_validation.MissingValuesMissing ValuesEvaluates dataset quality by ensuring missing value ratio across all features does not exceed a set threshold....FalseTrue['dataset']{'min_threshold': {'type': 'int', 'default': 1}}['tabular_data', 'data_quality']['classification', 'regression']
validmind.data_validation.MissingValuesBarPlotMissing Values Bar PlotAssesses the percentage and distribution of missing values in the dataset via a bar plot, with emphasis on...TrueFalse['dataset']{'threshold': {'type': 'int', 'default': 80}, 'fig_height': {'type': 'int', 'default': 600}}['tabular_data', 'data_quality', 'visualization']['classification', 'regression']
validmind.data_validation.MutualInformationMutual InformationCalculates mutual information scores between features and target variable to evaluate feature relevance....TrueFalse['dataset']{'min_threshold': {'type': 'float', 'default': 0.01}, 'task': {'type': 'str', 'default': 'classification'}}['feature_selection', 'data_analysis']['classification', 'regression']
validmind.data_validation.PearsonCorrelationMatrixPearson Correlation MatrixEvaluates linear dependency between numerical variables in a dataset via a Pearson Correlation coefficient heat map....TrueFalse['dataset']{}['tabular_data', 'numerical_data', 'correlation']['classification', 'regression']
validmind.data_validation.PhillipsPerronArchPhillips Perron ArchAssesses the stationarity of time series data in each feature of the ML model using the Phillips-Perron test....FalseTrue['dataset']{}['time_series_data', 'forecasting', 'statistical_test', 'unit_root_test']['regression']
validmind.data_validation.ProtectedClassesDescriptionProtected Classes DescriptionVisualizes the distribution of protected classes in the dataset relative to the target variable...TrueTrue['dataset']{'protected_classes': {'type': '_empty', 'default': None}}['bias_and_fairness', 'descriptive_statistics']['classification', 'regression']
validmind.data_validation.RollingStatsPlotRolling Stats PlotEvaluates the stationarity of time series data by plotting its rolling mean and standard deviation over a specified...TrueFalse['dataset']{'window_size': {'type': 'int', 'default': 12}}['time_series_data', 'visualization', 'stationarity']['regression']
validmind.data_validation.RunsTestRuns TestExecutes Runs Test on ML model to detect non-random patterns in output data sequence....FalseTrue['dataset']{}['tabular_data', 'statistical_test', 'statsmodels']['classification', 'regression']
validmind.data_validation.ScatterPlotScatter PlotAssesses visual relationships, patterns, and outliers among features in a dataset through scatter plot matrices....TrueFalse['dataset']{}['tabular_data', 'visualization']['classification', 'regression']
validmind.data_validation.ScoreBandDefaultRatesScore Band Default RatesAnalyzes default rates and population distribution across credit score bands....FalseTrue['dataset', 'model']{'score_column': {'type': 'str', 'default': 'score'}, 'score_bands': {'type': 'list', 'default': None}}['visualization', 'credit_risk', 'scorecard']['classification']
validmind.data_validation.SeasonalDecomposeSeasonal DecomposeAssesses patterns and seasonality in a time series dataset by decomposing its features into foundational components....TrueFalse['dataset']{'seasonal_model': {'type': 'str', 'default': 'additive'}}['time_series_data', 'seasonality', 'statsmodels']['regression']
validmind.data_validation.ShapiroWilkShapiro WilkEvaluates feature-wise normality of training data using the Shapiro-Wilk test....FalseTrue['dataset']{}['tabular_data', 'data_distribution', 'statistical_test']['classification', 'regression']
validmind.data_validation.SkewnessSkewnessEvaluates the skewness of numerical data in a dataset to check against a defined threshold, aiming to ensure data...FalseTrue['dataset']{'max_threshold': {'type': '_empty', 'default': 1}}['data_quality', 'tabular_data']['classification', 'regression']
validmind.data_validation.SpreadPlotSpread PlotAssesses potential correlations between pairs of time series variables through visualization to enhance...TrueFalse['dataset']{}['time_series_data', 'visualization']['regression']
validmind.data_validation.TabularCategoricalBarPlotsTabular Categorical Bar PlotsGenerates and visualizes bar plots for each category in categorical features to evaluate the dataset's composition....TrueFalse['dataset']{}['tabular_data', 'visualization']['classification', 'regression']
validmind.data_validation.TabularDateTimeHistogramsTabular Date Time HistogramsGenerates histograms to provide graphical insight into the distribution of time intervals in a model's datetime...TrueFalse['dataset']{}['time_series_data', 'visualization']['classification', 'regression']
validmind.data_validation.TabularDescriptionTablesTabular Description TablesSummarizes key descriptive statistics for numerical, categorical, and datetime variables in a dataset....FalseTrue['dataset']{}['tabular_data']['classification', 'regression']
validmind.data_validation.TabularNumericalHistogramsTabular Numerical HistogramsGenerates histograms for each numerical feature in a dataset to provide visual insights into data distribution and...TrueFalse['dataset']{}['tabular_data', 'visualization']['classification', 'regression']
validmind.data_validation.TargetRateBarPlotsTarget Rate Bar PlotsGenerates bar plots visualizing the default rates of categorical features for a classification machine learning...TrueFalse['dataset']{}['tabular_data', 'visualization', 'categorical_data']['classification']
validmind.data_validation.TimeSeriesDescriptionTime Series DescriptionGenerates a detailed analysis for the provided time series dataset, summarizing key statistics to identify trends,...FalseTrue['dataset']{}['time_series_data', 'analysis']['regression']
validmind.data_validation.TimeSeriesDescriptiveStatisticsTime Series Descriptive StatisticsEvaluates the descriptive statistics of a time series dataset to identify trends, patterns, and data quality issues....FalseTrue['dataset']{}['time_series_data', 'analysis']['regression']
validmind.data_validation.TimeSeriesFrequencyTime Series FrequencyEvaluates consistency of time series data frequency and generates a frequency plot....TrueTrue['dataset']{}['time_series_data']['regression']
validmind.data_validation.TimeSeriesHistogramTime Series HistogramVisualizes distribution of time-series data using histograms and Kernel Density Estimation (KDE) lines....TrueFalse['dataset']{'nbins': {'type': '_empty', 'default': 30}}['data_validation', 'visualization', 'time_series_data']['regression', 'time_series_forecasting']
validmind.data_validation.TimeSeriesLinePlotTime Series Line PlotGenerates and analyses time-series data through line plots revealing trends, patterns, anomalies over time....TrueFalse['dataset']{}['time_series_data', 'visualization']['regression']
validmind.data_validation.TimeSeriesMissingValuesTime Series Missing ValuesValidates time-series data quality by confirming the count of missing values is below a certain threshold....TrueTrue['dataset']{'min_threshold': {'type': 'int', 'default': 1}}['time_series_data']['regression']
validmind.data_validation.TimeSeriesOutliersTime Series OutliersIdentifies and visualizes outliers in time-series data using the z-score method....FalseTrue['dataset']{'zscore_threshold': {'type': 'int', 'default': 3}}['time_series_data']['regression']
validmind.data_validation.TooManyZeroValuesToo Many Zero ValuesIdentifies numerical columns in a dataset that contain an excessive number of zero values, defined by a threshold...FalseTrue['dataset']{'max_percent_threshold': {'type': 'float', 'default': 0.03}}['tabular_data']['regression', 'classification']
validmind.data_validation.UniqueRowsUnique RowsVerifies the diversity of the dataset by ensuring that the count of unique rows exceeds a prescribed threshold....FalseTrue['dataset']{'min_percent_threshold': {'type': 'float', 'default': 1}}['tabular_data']['regression', 'classification']
validmind.data_validation.WOEBinPlotsWOE Bin PlotsGenerates visualizations of Weight of Evidence (WoE) and Information Value (IV) for understanding predictive power...TrueFalse['dataset']{'breaks_adj': {'type': 'list', 'default': None}, 'fig_height': {'type': 'int', 'default': 600}, 'fig_width': {'type': 'int', 'default': 500}}['tabular_data', 'visualization', 'categorical_data']['classification']
validmind.data_validation.WOEBinTableWOE Bin TableAssesses the Weight of Evidence (WoE) and Information Value (IV) of each feature to evaluate its predictive power...FalseTrue['dataset']{'breaks_adj': {'type': 'list', 'default': None}}['tabular_data', 'categorical_data']['classification']
validmind.data_validation.ZivotAndrewsArchZivot Andrews ArchEvaluates the order of integration and stationarity of time series data using the Zivot-Andrews unit root test....FalseTrue['dataset']{}['time_series_data', 'stationarity', 'unit_root_test']['regression']
validmind.data_validation.nlp.CommonWordsCommon WordsAssesses the most frequent non-stopwords in a text column for identifying prevalent language patterns....TrueFalse['dataset']{}['nlp', 'text_data', 'visualization', 'frequency_analysis']['text_classification', 'text_summarization']
validmind.data_validation.nlp.HashtagsHashtagsAssesses hashtag frequency in a text column, highlighting usage trends and potential dataset bias or spam....TrueFalse['dataset']{'top_hashtags': {'type': 'int', 'default': 25}}['nlp', 'text_data', 'visualization', 'frequency_analysis']['text_classification', 'text_summarization']
validmind.data_validation.nlp.LanguageDetectionLanguage DetectionAssesses the diversity of languages in a textual dataset by detecting and visualizing the distribution of languages....TrueFalse['dataset']{}['nlp', 'text_data', 'visualization']['text_classification', 'text_summarization']
validmind.data_validation.nlp.MentionsMentionsCalculates and visualizes frequencies of '@' prefixed mentions in a text-based dataset for NLP model analysis....TrueFalse['dataset']{'top_mentions': {'type': 'int', 'default': 25}}['nlp', 'text_data', 'visualization', 'frequency_analysis']['text_classification', 'text_summarization']
validmind.data_validation.nlp.PolarityAndSubjectivityPolarity And SubjectivityAnalyzes the polarity and subjectivity of text data within a given dataset to visualize the sentiment distribution....TrueTrue['dataset']{'threshold_subjectivity': {'type': '_empty', 'default': 0.5}, 'threshold_polarity': {'type': '_empty', 'default': 0}}['nlp', 'text_data', 'data_validation']['nlp']
validmind.data_validation.nlp.PunctuationsPunctuationsAnalyzes and visualizes the frequency distribution of punctuation usage in a given text dataset....TrueFalse['dataset']{'count_mode': {'type': '_empty', 'default': 'token'}}['nlp', 'text_data', 'visualization', 'frequency_analysis']['text_classification', 'text_summarization', 'nlp']
validmind.data_validation.nlp.SentimentSentimentAnalyzes the sentiment of text data within a dataset using the VADER sentiment analysis tool....TrueFalse['dataset']{}['nlp', 'text_data', 'data_validation']['nlp']
validmind.data_validation.nlp.StopWordsStop WordsEvaluates and visualizes the frequency of English stop words in a text dataset against a defined threshold....TrueTrue['dataset']{'min_percent_threshold': {'type': 'float', 'default': 0.5}, 'num_words': {'type': 'int', 'default': 25}}['nlp', 'text_data', 'frequency_analysis', 'visualization']['text_classification', 'text_summarization']
validmind.data_validation.nlp.TextDescriptionText DescriptionConducts comprehensive textual analysis on a dataset using NLTK to evaluate various parameters and generate...TrueFalse['dataset']{'unwanted_tokens': {'type': 'set', 'default': {'s', 'mrs', 'us', \"''\", ' ', 'ms', 'dr', 'dollar', '``', 'mr', \"'s\", \"s'\"}}, 'lang': {'type': 'str', 'default': 'english'}}['nlp', 'text_data', 'visualization']['text_classification', 'text_summarization']
validmind.data_validation.nlp.ToxicityToxicityAssesses the toxicity of text data within a dataset to visualize the distribution of toxicity scores....TrueFalse['dataset']{}['nlp', 'text_data', 'data_validation']['nlp']
validmind.model_validation.BertScoreBert ScoreAssesses the quality of machine-generated text using BERTScore metrics and visualizes results through histograms...TrueTrue['dataset', 'model']{'evaluation_model': {'type': '_empty', 'default': 'distilbert-base-uncased'}}['nlp', 'text_data', 'visualization']['text_classification', 'text_summarization']
validmind.model_validation.BleuScoreBleu ScoreEvaluates the quality of machine-generated text using BLEU metrics and visualizes the results through histograms...TrueTrue['dataset', 'model']{}['nlp', 'text_data', 'visualization']['text_classification', 'text_summarization']
validmind.model_validation.ClusterSizeDistributionCluster Size DistributionAssesses the performance of clustering models by comparing the distribution of cluster sizes in model predictions...TrueFalse['dataset', 'model']{}['sklearn', 'model_performance']['clustering']
validmind.model_validation.ContextualRecallContextual RecallEvaluates a Natural Language Generation model's ability to generate contextually relevant and factually correct...TrueTrue['dataset', 'model']{}['nlp', 'text_data', 'visualization']['text_classification', 'text_summarization']
validmind.model_validation.FeaturesAUCFeatures AUCEvaluates the discriminatory power of each individual feature within a binary classification model by calculating...TrueFalse['dataset']{'fontsize': {'type': 'int', 'default': 12}, 'figure_height': {'type': 'int', 'default': 500}}['feature_importance', 'AUC', 'visualization']['classification']
validmind.model_validation.MeteorScoreMeteor ScoreAssesses the quality of machine-generated translations by comparing them to human-produced references using the...TrueTrue['dataset', 'model']{}['nlp', 'text_data', 'visualization']['text_classification', 'text_summarization']
validmind.model_validation.ModelMetadataModel MetadataCompare metadata of different models and generate a summary table with the results....FalseTrue['model']{}['model_training', 'metadata']['regression', 'time_series_forecasting']
validmind.model_validation.ModelPredictionResidualsModel Prediction ResidualsAssesses normality and behavior of residuals in regression models through visualization and statistical tests....TrueTrue['dataset', 'model']{'nbins': {'type': 'int', 'default': 100}, 'p_value_threshold': {'type': 'float', 'default': 0.05}, 'start_date': {'type': None, 'default': None}, 'end_date': {'type': None, 'default': None}}['regression']['residual_analysis', 'visualization']
validmind.model_validation.RegardScoreRegard ScoreAssesses the sentiment and potential biases in text generated by NLP models by computing and visualizing regard...TrueTrue['dataset', 'model']{}['nlp', 'text_data', 'visualization']['text_classification', 'text_summarization']
validmind.model_validation.RegressionResidualsPlotRegression Residuals PlotEvaluates regression model performance using residual distribution and actual vs. predicted plots....TrueFalse['model', 'dataset']{'bin_size': {'type': 'float', 'default': 0.1}}['model_performance', 'visualization']['regression']
validmind.model_validation.RougeScoreRouge ScoreAssesses the quality of machine-generated text using ROUGE metrics and visualizes the results to provide...TrueTrue['dataset', 'model']{'metric': {'type': 'str', 'default': 'rouge-1'}}['nlp', 'text_data', 'visualization']['text_classification', 'text_summarization']
validmind.model_validation.TimeSeriesPredictionWithCITime Series Prediction With CIAssesses predictive accuracy and uncertainty in time series models, highlighting breaches beyond confidence...TrueTrue['dataset', 'model']{'confidence': {'type': 'float', 'default': 0.95}}['model_predictions', 'visualization']['regression', 'time_series_forecasting']
validmind.model_validation.TimeSeriesPredictionsPlotTime Series Predictions PlotPlot actual vs predicted values for time series data and generate a visual comparison for the model....TrueFalse['dataset', 'model']{}['model_predictions', 'visualization']['regression', 'time_series_forecasting']
validmind.model_validation.TimeSeriesR2SquareBySegmentsTime Series R2 Square By SegmentsEvaluates the R-Squared values of regression models over specified time segments in time series data to assess...TrueTrue['dataset', 'model']{'segments': {'type': None, 'default': None}}['model_performance', 'sklearn']['regression', 'time_series_forecasting']
validmind.model_validation.TokenDisparityToken DisparityEvaluates the token disparity between reference and generated texts, visualizing the results through histograms and...TrueTrue['dataset', 'model']{}['nlp', 'text_data', 'visualization']['text_classification', 'text_summarization']
validmind.model_validation.ToxicityScoreToxicity ScoreAssesses the toxicity levels of texts generated by NLP models to identify and mitigate harmful or offensive content....TrueTrue['dataset', 'model']{}['nlp', 'text_data', 'visualization']['text_classification', 'text_summarization']
validmind.model_validation.embeddings.ClusterDistributionCluster DistributionAssesses the distribution of text embeddings across clusters produced by a model using KMeans clustering....TrueFalse['model', 'dataset']{'num_clusters': {'type': 'int', 'default': 5}}['llm', 'text_data', 'embeddings', 'visualization']['feature_extraction']
validmind.model_validation.embeddings.CosineSimilarityComparisonCosine Similarity ComparisonAssesses the similarity between embeddings generated by different models using Cosine Similarity, providing both...TrueTrue['dataset', 'models']{}['visualization', 'dimensionality_reduction', 'embeddings']['text_qa', 'text_generation', 'text_summarization']
validmind.model_validation.embeddings.CosineSimilarityDistributionCosine Similarity DistributionAssesses the similarity between predicted text embeddings from a model using a Cosine Similarity distribution...TrueFalse['dataset', 'model']{}['llm', 'text_data', 'embeddings', 'visualization']['feature_extraction']
validmind.model_validation.embeddings.CosineSimilarityHeatmapCosine Similarity HeatmapGenerates an interactive heatmap to visualize the cosine similarities among embeddings derived from a given model....TrueFalse['dataset', 'model']{'title': {'type': '_empty', 'default': 'Cosine Similarity Matrix'}, 'color': {'type': '_empty', 'default': 'Cosine Similarity'}, 'xaxis_title': {'type': '_empty', 'default': 'Index'}, 'yaxis_title': {'type': '_empty', 'default': 'Index'}, 'color_scale': {'type': '_empty', 'default': 'Blues'}}['visualization', 'dimensionality_reduction', 'embeddings']['text_qa', 'text_generation', 'text_summarization']
validmind.model_validation.embeddings.DescriptiveAnalyticsDescriptive AnalyticsEvaluates statistical properties of text embeddings in an ML model via mean, median, and standard deviation...TrueFalse['dataset', 'model']{}['llm', 'text_data', 'embeddings', 'visualization']['feature_extraction']
validmind.model_validation.embeddings.EmbeddingsVisualization2DEmbeddings Visualization2 DVisualizes 2D representation of text embeddings generated by a model using t-SNE technique....TrueFalse['dataset', 'model']{'cluster_column': {'type': None, 'default': None}, 'perplexity': {'type': 'int', 'default': 30}}['llm', 'text_data', 'embeddings', 'visualization']['feature_extraction']
validmind.model_validation.embeddings.EuclideanDistanceComparisonEuclidean Distance ComparisonAssesses and visualizes the dissimilarity between model embeddings using Euclidean distance, providing insights...TrueTrue['dataset', 'models']{}['visualization', 'dimensionality_reduction', 'embeddings']['text_qa', 'text_generation', 'text_summarization']
validmind.model_validation.embeddings.EuclideanDistanceHeatmapEuclidean Distance HeatmapGenerates an interactive heatmap to visualize the Euclidean distances among embeddings derived from a given model....TrueFalse['dataset', 'model']{'title': {'type': '_empty', 'default': 'Euclidean Distance Matrix'}, 'color': {'type': '_empty', 'default': 'Euclidean Distance'}, 'xaxis_title': {'type': '_empty', 'default': 'Index'}, 'yaxis_title': {'type': '_empty', 'default': 'Index'}, 'color_scale': {'type': '_empty', 'default': 'Blues'}}['visualization', 'dimensionality_reduction', 'embeddings']['text_qa', 'text_generation', 'text_summarization']
validmind.model_validation.embeddings.PCAComponentsPairwisePlotsPCA Components Pairwise PlotsGenerates scatter plots for pairwise combinations of principal component analysis (PCA) components of model...TrueFalse['dataset', 'model']{'n_components': {'type': 'int', 'default': 3}}['visualization', 'dimensionality_reduction', 'embeddings']['text_qa', 'text_generation', 'text_summarization']
validmind.model_validation.embeddings.StabilityAnalysisKeywordStability Analysis KeywordEvaluates robustness of embedding models to keyword swaps in the test dataset....TrueTrue['dataset', 'model']{'keyword_dict': {'type': None, 'default': None}, 'mean_similarity_threshold': {'type': 'float', 'default': 0.7}}['llm', 'text_data', 'embeddings', 'visualization']['feature_extraction']
validmind.model_validation.embeddings.StabilityAnalysisRandomNoiseStability Analysis Random NoiseAssesses the robustness of text embeddings models to random noise introduced via text perturbations....TrueTrue['dataset', 'model']{'probability': {'type': 'float', 'default': 0.02}, 'mean_similarity_threshold': {'type': 'float', 'default': 0.7}}['llm', 'text_data', 'embeddings', 'visualization']['feature_extraction']
validmind.model_validation.embeddings.StabilityAnalysisSynonymsStability Analysis SynonymsEvaluates the stability of text embeddings models when words in test data are replaced by their synonyms randomly....TrueTrue['dataset', 'model']{'probability': {'type': 'float', 'default': 0.02}, 'mean_similarity_threshold': {'type': 'float', 'default': 0.7}}['llm', 'text_data', 'embeddings', 'visualization']['feature_extraction']
validmind.model_validation.embeddings.StabilityAnalysisTranslationStability Analysis TranslationEvaluates robustness of text embeddings models to noise introduced by translating the original text to another...TrueTrue['dataset', 'model']{'source_lang': {'type': 'str', 'default': 'en'}, 'target_lang': {'type': 'str', 'default': 'fr'}, 'mean_similarity_threshold': {'type': 'float', 'default': 0.7}}['llm', 'text_data', 'embeddings', 'visualization']['feature_extraction']
validmind.model_validation.embeddings.TSNEComponentsPairwisePlotsTSNE Components Pairwise PlotsCreates scatter plots for pairwise combinations of t-SNE components to visualize embeddings and highlight potential...TrueFalse['dataset', 'model']{'n_components': {'type': 'int', 'default': 2}, 'perplexity': {'type': 'int', 'default': 30}, 'title': {'type': 'str', 'default': 't-SNE'}}['visualization', 'dimensionality_reduction', 'embeddings']['text_qa', 'text_generation', 'text_summarization']
validmind.model_validation.ragas.AnswerCorrectnessAnswer CorrectnessEvaluates the correctness of answers in a dataset with respect to the provided ground...TrueTrue['dataset']{'user_input_column': {'type': 'str', 'default': 'user_input'}, 'response_column': {'type': 'str', 'default': 'response'}, 'reference_column': {'type': 'str', 'default': 'reference'}, 'judge_llm': {'type': '_empty', 'default': None}, 'judge_embeddings': {'type': '_empty', 'default': None}}['ragas', 'llm']['text_qa', 'text_generation', 'text_summarization']
validmind.model_validation.ragas.AspectCriticAspect CriticEvaluates generations against the following aspects: harmfulness, maliciousness,...TrueTrue['dataset']{'user_input_column': {'type': 'str', 'default': 'user_input'}, 'response_column': {'type': 'str', 'default': 'response'}, 'retrieved_contexts_column': {'type': None, 'default': None}, 'aspects': {'type': None, 'default': ['coherence', 'conciseness', 'correctness', 'harmfulness', 'maliciousness']}, 'additional_aspects': {'type': None, 'default': None}, 'judge_llm': {'type': '_empty', 'default': None}, 'judge_embeddings': {'type': '_empty', 'default': None}}['ragas', 'llm', 'qualitative']['text_summarization', 'text_generation', 'text_qa']
validmind.model_validation.ragas.ContextEntityRecallContext Entity RecallEvaluates the context entity recall for dataset entries and visualizes the results....TrueTrue['dataset']{'retrieved_contexts_column': {'type': 'str', 'default': 'retrieved_contexts'}, 'reference_column': {'type': 'str', 'default': 'reference'}, 'judge_llm': {'type': '_empty', 'default': None}, 'judge_embeddings': {'type': '_empty', 'default': None}}['ragas', 'llm', 'retrieval_performance']['text_qa', 'text_generation', 'text_summarization']
validmind.model_validation.ragas.ContextPrecisionContext PrecisionContext Precision is a metric that evaluates whether all of the ground-truth...TrueTrue['dataset']{'user_input_column': {'type': 'str', 'default': 'user_input'}, 'retrieved_contexts_column': {'type': 'str', 'default': 'retrieved_contexts'}, 'reference_column': {'type': 'str', 'default': 'reference'}, 'judge_llm': {'type': '_empty', 'default': None}, 'judge_embeddings': {'type': '_empty', 'default': None}}['ragas', 'llm', 'retrieval_performance']['text_qa', 'text_generation', 'text_summarization', 'text_classification']
validmind.model_validation.ragas.ContextPrecisionWithoutReferenceContext Precision Without ReferenceContext Precision Without Reference is a metric used to evaluate the relevance of...TrueTrue['dataset']{'user_input_column': {'type': 'str', 'default': 'user_input'}, 'retrieved_contexts_column': {'type': 'str', 'default': 'retrieved_contexts'}, 'response_column': {'type': 'str', 'default': 'response'}, 'judge_llm': {'type': '_empty', 'default': None}, 'judge_embeddings': {'type': '_empty', 'default': None}}['ragas', 'llm', 'retrieval_performance']['text_qa', 'text_generation', 'text_summarization', 'text_classification']
validmind.model_validation.ragas.ContextRecallContext RecallContext recall measures the extent to which the retrieved context aligns with the...TrueTrue['dataset']{'user_input_column': {'type': 'str', 'default': 'user_input'}, 'retrieved_contexts_column': {'type': 'str', 'default': 'retrieved_contexts'}, 'reference_column': {'type': 'str', 'default': 'reference'}, 'judge_llm': {'type': '_empty', 'default': None}, 'judge_embeddings': {'type': '_empty', 'default': None}}['ragas', 'llm', 'retrieval_performance']['text_qa', 'text_generation', 'text_summarization', 'text_classification']
validmind.model_validation.ragas.FaithfulnessFaithfulnessEvaluates the faithfulness of the generated answers with respect to retrieved contexts....TrueTrue['dataset']{'user_input_column': {'type': 'str', 'default': 'user_input'}, 'response_column': {'type': 'str', 'default': 'response'}, 'retrieved_contexts_column': {'type': 'str', 'default': 'retrieved_contexts'}, 'judge_llm': {'type': '_empty', 'default': None}, 'judge_embeddings': {'type': '_empty', 'default': None}}['ragas', 'llm', 'rag_performance']['text_qa', 'text_generation', 'text_summarization']
validmind.model_validation.ragas.NoiseSensitivityNoise SensitivityAssesses the sensitivity of a Large Language Model (LLM) to noise in retrieved context by measuring how often it...TrueTrue['dataset']{'response_column': {'type': 'str', 'default': 'response'}, 'retrieved_contexts_column': {'type': 'str', 'default': 'retrieved_contexts'}, 'reference_column': {'type': 'str', 'default': 'reference'}, 'focus': {'type': 'str', 'default': 'relevant'}, 'user_input_column': {'type': 'str', 'default': 'user_input'}, 'judge_llm': {'type': '_empty', 'default': None}, 'judge_embeddings': {'type': '_empty', 'default': None}}['ragas', 'llm', 'rag_performance']['text_qa', 'text_generation', 'text_summarization']
validmind.model_validation.ragas.ResponseRelevancyResponse RelevancyAssesses how pertinent the generated answer is to the given prompt....TrueTrue['dataset']{'user_input_column': {'type': 'str', 'default': 'user_input'}, 'retrieved_contexts_column': {'type': 'str', 'default': None}, 'response_column': {'type': 'str', 'default': 'response'}, 'judge_llm': {'type': '_empty', 'default': None}, 'judge_embeddings': {'type': '_empty', 'default': None}}['ragas', 'llm', 'rag_performance']['text_qa', 'text_generation', 'text_summarization']
validmind.model_validation.ragas.SemanticSimilaritySemantic SimilarityCalculates the semantic similarity between generated responses and ground truths...TrueTrue['dataset']{'response_column': {'type': 'str', 'default': 'response'}, 'reference_column': {'type': 'str', 'default': 'reference'}, 'judge_llm': {'type': '_empty', 'default': None}, 'judge_embeddings': {'type': '_empty', 'default': None}}['ragas', 'llm']['text_qa', 'text_generation', 'text_summarization']
validmind.model_validation.sklearn.AdjustedMutualInformationAdjusted Mutual InformationEvaluates clustering model performance by measuring mutual information between true and predicted labels, adjusting...FalseTrue['model', 'dataset']{}['sklearn', 'model_performance', 'clustering']['clustering']
validmind.model_validation.sklearn.AdjustedRandIndexAdjusted Rand IndexMeasures the similarity between two data clusters using the Adjusted Rand Index (ARI) metric in clustering machine...FalseTrue['model', 'dataset']{}['sklearn', 'model_performance', 'clustering']['clustering']
validmind.model_validation.sklearn.CalibrationCurveCalibration CurveEvaluates the calibration of probability estimates by comparing predicted probabilities against observed...TrueFalse['model', 'dataset']{'n_bins': {'type': 'int', 'default': 10}}['sklearn', 'model_performance', 'classification']['classification']
validmind.model_validation.sklearn.ClassifierPerformanceClassifier PerformanceEvaluates performance of binary or multiclass classification models using precision, recall, F1-Score, accuracy,...FalseTrue['dataset', 'model']{'average': {'type': 'str', 'default': 'macro'}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance']['classification', 'text_classification']
validmind.model_validation.sklearn.ClassifierThresholdOptimizationClassifier Threshold OptimizationAnalyzes and visualizes different threshold optimization methods for binary classification models....FalseTrue['dataset', 'model']{'methods': {'type': None, 'default': None}, 'target_recall': {'type': None, 'default': None}}['model_validation', 'threshold_optimization', 'classification_metrics']['classification']
validmind.model_validation.sklearn.ClusterCosineSimilarityCluster Cosine SimilarityMeasures the intra-cluster similarity of a clustering model using cosine similarity....FalseTrue['model', 'dataset']{}['sklearn', 'model_performance', 'clustering']['clustering']
validmind.model_validation.sklearn.ClusterPerformanceMetricsCluster Performance MetricsEvaluates the performance of clustering machine learning models using multiple established metrics....FalseTrue['model', 'dataset']{}['sklearn', 'model_performance', 'clustering']['clustering']
validmind.model_validation.sklearn.CompletenessScoreCompleteness ScoreEvaluates a clustering model's capacity to categorize instances from a single class into the same cluster....FalseTrue['model', 'dataset']{}['sklearn', 'model_performance', 'clustering']['clustering']
validmind.model_validation.sklearn.ConfusionMatrixConfusion MatrixEvaluates and visually represents the classification ML model's predictive performance using a Confusion Matrix...TrueFalse['dataset', 'model']{'threshold': {'type': 'float', 'default': 0.5}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.model_validation.sklearn.FeatureImportanceFeature ImportanceCompute feature importance scores for a given model and generate a summary table...FalseTrue['dataset', 'model']{'num_features': {'type': 'int', 'default': 3}}['model_explainability', 'sklearn']['regression', 'time_series_forecasting']
validmind.model_validation.sklearn.FowlkesMallowsScoreFowlkes Mallows ScoreEvaluates the similarity between predicted and actual cluster assignments in a model using the Fowlkes-Mallows...FalseTrue['dataset', 'model']{}['sklearn', 'model_performance']['clustering']
validmind.model_validation.sklearn.HomogeneityScoreHomogeneity ScoreAssesses clustering homogeneity by comparing true and predicted labels, scoring from 0 (heterogeneous) to 1...FalseTrue['dataset', 'model']{}['sklearn', 'model_performance']['clustering']
validmind.model_validation.sklearn.HyperParametersTuningHyper Parameters TuningPerforms exhaustive grid search over specified parameter ranges to find optimal model configurations...FalseTrue['model', 'dataset']{'param_grid': {'type': 'dict', 'default': None}, 'scoring': {'type': None, 'default': None}, 'thresholds': {'type': None, 'default': None}, 'fit_params': {'type': 'dict', 'default': None}}['sklearn', 'model_performance']['clustering', 'classification']
validmind.model_validation.sklearn.KMeansClustersOptimizationK Means Clusters OptimizationOptimizes the number of clusters in K-means models using Elbow and Silhouette methods....TrueFalse['model', 'dataset']{'n_clusters': {'type': None, 'default': None}}['sklearn', 'model_performance', 'kmeans']['clustering']
validmind.model_validation.sklearn.MinimumAccuracyMinimum AccuracyChecks if the model's prediction accuracy meets or surpasses a specified threshold....FalseTrue['dataset', 'model']{'min_threshold': {'type': 'float', 'default': 0.7}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance']['classification', 'text_classification']
validmind.model_validation.sklearn.MinimumF1ScoreMinimum F1 ScoreAssesses if the model's F1 score on the validation set meets a predefined minimum threshold, ensuring balanced...FalseTrue['dataset', 'model']{'min_threshold': {'type': 'float', 'default': 0.5}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance']['classification', 'text_classification']
validmind.model_validation.sklearn.MinimumROCAUCScoreMinimum ROCAUC ScoreValidates model by checking if the ROC AUC score meets or surpasses a specified threshold....FalseTrue['dataset', 'model']{'min_threshold': {'type': 'float', 'default': 0.5}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance']['classification', 'text_classification']
validmind.model_validation.sklearn.ModelParametersModel ParametersExtracts and displays model parameters in a structured format for transparency and reproducibility....FalseTrue['model']{'model_params': {'type': None, 'default': None}}['model_training', 'metadata']['classification', 'regression']
validmind.model_validation.sklearn.ModelsPerformanceComparisonModels Performance ComparisonEvaluates and compares the performance of multiple Machine Learning models using various metrics like accuracy,...FalseTrue['dataset', 'models']{}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance', 'model_comparison']['classification', 'text_classification']
validmind.model_validation.sklearn.OverfitDiagnosisOverfit DiagnosisAssesses potential overfitting in a model's predictions, identifying regions where performance between training and...TrueTrue['model', 'datasets']{'metric': {'type': 'str', 'default': None}, 'cut_off_threshold': {'type': 'float', 'default': 0.04}}['sklearn', 'binary_classification', 'multiclass_classification', 'linear_regression', 'model_diagnosis']['classification', 'regression']
validmind.model_validation.sklearn.PermutationFeatureImportancePermutation Feature ImportanceAssesses the significance of each feature in a model by evaluating the impact on model performance when feature...TrueFalse['model', 'dataset']{'fontsize': {'type': None, 'default': None}, 'figure_height': {'type': None, 'default': None}}['sklearn', 'binary_classification', 'multiclass_classification', 'feature_importance', 'visualization']['classification', 'text_classification']
validmind.model_validation.sklearn.PopulationStabilityIndexPopulation Stability IndexAssesses the Population Stability Index (PSI) to quantify the stability of an ML model's predictions across...TrueTrue['datasets', 'model']{'num_bins': {'type': 'int', 'default': 10}, 'mode': {'type': 'str', 'default': 'fixed'}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance']['classification', 'text_classification']
validmind.model_validation.sklearn.PrecisionRecallCurvePrecision Recall CurveEvaluates the precision-recall trade-off for binary classification models and visualizes the Precision-Recall curve....TrueFalse['model', 'dataset']{}['sklearn', 'binary_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.model_validation.sklearn.ROCCurveROC CurveEvaluates binary classification model performance by generating and plotting the Receiver Operating Characteristic...TrueFalse['model', 'dataset']{}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.model_validation.sklearn.RegressionErrorsRegression ErrorsAssesses the performance and error distribution of a regression model using various error metrics....FalseTrue['model', 'dataset']{}['sklearn', 'model_performance']['regression', 'classification']
validmind.model_validation.sklearn.RegressionErrorsComparisonRegression Errors ComparisonAssesses multiple regression error metrics to compare model performance across different datasets, emphasizing...FalseTrue['datasets', 'models']{}['model_performance', 'sklearn']['regression', 'time_series_forecasting']
validmind.model_validation.sklearn.RegressionPerformanceRegression PerformanceEvaluates the performance of a regression model using five different metrics: MAE, MSE, RMSE, MAPE, and MBD....FalseTrue['model', 'dataset']{}['sklearn', 'model_performance']['regression']
validmind.model_validation.sklearn.RegressionR2SquareRegression R2 SquareAssesses the overall goodness-of-fit of a regression model by evaluating R-squared (R2) and Adjusted R-squared (Adj...FalseTrue['dataset', 'model']{}['sklearn', 'model_performance']['regression']
validmind.model_validation.sklearn.RegressionR2SquareComparisonRegression R2 Square ComparisonCompares R-Squared and Adjusted R-Squared values for different regression models across multiple datasets to assess...FalseTrue['datasets', 'models']{}['model_performance', 'sklearn']['regression', 'time_series_forecasting']
validmind.model_validation.sklearn.RobustnessDiagnosisRobustness DiagnosisAssesses the robustness of a machine learning model by evaluating performance decay under noisy conditions....TrueTrue['datasets', 'model']{'metric': {'type': 'str', 'default': None}, 'scaling_factor_std_dev_list': {'type': None, 'default': [0.1, 0.2, 0.3, 0.4, 0.5]}, 'performance_decay_threshold': {'type': 'float', 'default': 0.05}}['sklearn', 'model_diagnosis', 'visualization']['classification', 'regression']
validmind.model_validation.sklearn.SHAPGlobalImportanceSHAP Global ImportanceEvaluates and visualizes global feature importance using SHAP values for model explanation and risk identification....FalseTrue['model', 'dataset']{'kernel_explainer_samples': {'type': 'int', 'default': 10}, 'tree_or_linear_explainer_samples': {'type': 'int', 'default': 200}, 'class_of_interest': {'type': None, 'default': None}}['sklearn', 'binary_classification', 'multiclass_classification', 'feature_importance', 'visualization']['classification', 'text_classification']
validmind.model_validation.sklearn.ScoreProbabilityAlignmentScore Probability AlignmentAnalyzes the alignment between credit scores and predicted probabilities....TrueTrue['model', 'dataset']{'score_column': {'type': 'str', 'default': 'score'}, 'n_bins': {'type': 'int', 'default': 10}}['visualization', 'credit_risk', 'calibration']['classification']
validmind.model_validation.sklearn.SilhouettePlotSilhouette PlotCalculates and visualizes Silhouette Score, assessing the degree of data point suitability to its cluster in ML...TrueTrue['model', 'dataset']{}['sklearn', 'model_performance']['clustering']
validmind.model_validation.sklearn.TrainingTestDegradationTraining Test DegradationTests if model performance degradation between training and test datasets exceeds a predefined threshold....FalseTrue['datasets', 'model']{'max_threshold': {'type': 'float', 'default': 0.1}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.model_validation.sklearn.VMeasureV MeasureEvaluates homogeneity and completeness of a clustering model using the V Measure Score....FalseTrue['dataset', 'model']{}['sklearn', 'model_performance']['clustering']
validmind.model_validation.sklearn.WeakspotsDiagnosisWeakspots DiagnosisIdentifies and visualizes weak spots in a machine learning model's performance across various sections of the...TrueTrue['datasets', 'model']{'features_columns': {'type': None, 'default': None}, 'metrics': {'type': None, 'default': None}, 'thresholds': {'type': None, 'default': None}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_diagnosis', 'visualization']['classification', 'text_classification']
validmind.model_validation.statsmodels.AutoARIMAAuto ARIMAEvaluates ARIMA models for time-series forecasting, ranking them using Bayesian and Akaike Information Criteria....FalseTrue['model', 'dataset']{}['time_series_data', 'forecasting', 'model_selection', 'statsmodels']['regression']
validmind.model_validation.statsmodels.CumulativePredictionProbabilitiesCumulative Prediction ProbabilitiesVisualizes cumulative probabilities of positive and negative classes for both training and testing in classification models....TrueFalse['dataset', 'model']{'title': {'type': 'str', 'default': 'Cumulative Probabilities'}}['visualization', 'credit_risk']['classification']
validmind.model_validation.statsmodels.DurbinWatsonTestDurbin Watson TestAssesses autocorrelation in time series data features using the Durbin-Watson statistic....FalseTrue['dataset', 'model']{'threshold': {'type': None, 'default': [1.5, 2.5]}}['time_series_data', 'forecasting', 'statistical_test', 'statsmodels']['regression']
validmind.model_validation.statsmodels.GINITableGINI TableEvaluates classification model performance using AUC, GINI, and KS metrics for training and test datasets....FalseTrue['dataset', 'model']{}['model_performance']['classification']
validmind.model_validation.statsmodels.KolmogorovSmirnovKolmogorov SmirnovAssesses whether each feature in the dataset aligns with a normal distribution using the Kolmogorov-Smirnov test....FalseTrue['model', 'dataset']{'dist': {'type': 'str', 'default': 'norm'}}['tabular_data', 'data_distribution', 'statistical_test', 'statsmodels']['classification', 'regression']
validmind.model_validation.statsmodels.LillieforsLillieforsAssesses the normality of feature distributions in an ML model's training dataset using the Lilliefors test....FalseTrue['dataset']{}['tabular_data', 'data_distribution', 'statistical_test', 'statsmodels']['classification', 'regression']
validmind.model_validation.statsmodels.PredictionProbabilitiesHistogramPrediction Probabilities HistogramAssesses the predictive probability distribution for binary classification to evaluate model performance and...TrueFalse['dataset', 'model']{'title': {'type': 'str', 'default': 'Histogram of Predictive Probabilities'}}['visualization', 'credit_risk']['classification']
validmind.model_validation.statsmodels.RegressionCoeffsRegression CoeffsAssesses the significance and uncertainty of predictor variables in a regression model through visualization of...TrueTrue['model']{}['tabular_data', 'visualization', 'model_training']['regression']
validmind.model_validation.statsmodels.RegressionFeatureSignificanceRegression Feature SignificanceAssesses and visualizes the statistical significance of features in a regression model....TrueFalse['model']{'fontsize': {'type': 'int', 'default': 10}, 'p_threshold': {'type': 'float', 'default': 0.05}}['statistical_test', 'model_interpretation', 'visualization', 'feature_importance']['regression']
validmind.model_validation.statsmodels.RegressionModelForecastPlotRegression Model Forecast PlotGenerates plots to visually compare the forecasted outcomes of a regression model against actual observed values over...TrueFalse['model', 'dataset']{'start_date': {'type': None, 'default': None}, 'end_date': {'type': None, 'default': None}}['time_series_data', 'forecasting', 'visualization']['regression']
validmind.model_validation.statsmodels.RegressionModelForecastPlotLevelsRegression Model Forecast Plot LevelsAssesses the alignment between forecasted and observed values in regression models through visual plots...TrueFalse['model', 'dataset']{}['time_series_data', 'forecasting', 'visualization']['regression']
validmind.model_validation.statsmodels.RegressionModelSensitivityPlotRegression Model Sensitivity PlotAssesses the sensitivity of a regression model to changes in independent variables by applying shocks and...TrueFalse['dataset', 'model']{'shocks': {'type': None, 'default': [0.1]}, 'transformation': {'type': None, 'default': None}}['senstivity_analysis', 'visualization']['regression']
validmind.model_validation.statsmodels.RegressionModelSummaryRegression Model SummaryEvaluates regression model performance using metrics including R-Squared, Adjusted R-Squared, MSE, and RMSE....FalseTrue['dataset', 'model']{}['model_performance', 'regression']['regression']
validmind.model_validation.statsmodels.RegressionPermutationFeatureImportanceRegression Permutation Feature ImportanceAssesses the significance of each feature in a model by evaluating the impact on model performance when feature...TrueFalse['dataset', 'model']{'fontsize': {'type': 'int', 'default': 12}, 'figure_height': {'type': 'int', 'default': 500}}['statsmodels', 'feature_importance', 'visualization']['regression']
validmind.model_validation.statsmodels.ScorecardHistogramScorecard HistogramThe Scorecard Histogram test evaluates the distribution of credit scores between default and non-default instances,...TrueFalse['dataset']{'title': {'type': 'str', 'default': 'Histogram of Scores'}, 'score_column': {'type': 'str', 'default': 'score'}}['visualization', 'credit_risk', 'logistic_regression']['classification']
validmind.ongoing_monitoring.CalibrationCurveDriftCalibration Curve DriftEvaluates changes in probability calibration between reference and monitoring datasets....TrueTrue['datasets', 'model']{'n_bins': {'type': 'int', 'default': 10}, 'drift_pct_threshold': {'type': 'float', 'default': 20}}['sklearn', 'binary_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.ongoing_monitoring.ClassDiscriminationDriftClass Discrimination DriftCompares classification discrimination metrics between reference and monitoring datasets....FalseTrue['datasets', 'model']{'drift_pct_threshold': {'type': '_empty', 'default': 20}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance']['classification', 'text_classification']
validmind.ongoing_monitoring.ClassImbalanceDriftClass Imbalance DriftEvaluates drift in class distribution between reference and monitoring datasets....TrueTrue['datasets']{'drift_pct_threshold': {'type': 'float', 'default': 5.0}, 'title': {'type': 'str', 'default': 'Class Distribution Drift'}}['tabular_data', 'binary_classification', 'multiclass_classification']['classification']
validmind.ongoing_monitoring.ClassificationAccuracyDriftClassification Accuracy DriftCompares classification accuracy metrics between reference and monitoring datasets....FalseTrue['datasets', 'model']{'drift_pct_threshold': {'type': '_empty', 'default': 20}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance']['classification', 'text_classification']
validmind.ongoing_monitoring.ConfusionMatrixDriftConfusion Matrix DriftCompares confusion matrix metrics between reference and monitoring datasets....FalseTrue['datasets', 'model']{'drift_pct_threshold': {'type': '_empty', 'default': 20}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance']['classification', 'text_classification']
validmind.ongoing_monitoring.CumulativePredictionProbabilitiesDriftCumulative Prediction Probabilities DriftCompares cumulative prediction probability distributions between reference and monitoring datasets....TrueFalse['datasets', 'model']{}['visualization', 'credit_risk']['classification']
validmind.ongoing_monitoring.FeatureDriftFeature DriftEvaluates changes in feature distribution over time to identify potential model drift....TrueTrue['datasets']{'bins': {'type': '_empty', 'default': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]}, 'feature_columns': {'type': '_empty', 'default': None}, 'psi_threshold': {'type': '_empty', 'default': 0.2}}['visualization']['monitoring']
validmind.ongoing_monitoring.PredictionAcrossEachFeaturePrediction Across Each FeatureAssesses differences in model predictions across individual features between reference and monitoring datasets...TrueFalse['datasets', 'model']{}['visualization']['monitoring']
validmind.ongoing_monitoring.PredictionCorrelationPrediction CorrelationAssesses correlation changes between model predictions from reference and monitoring datasets to detect potential...TrueTrue['datasets', 'model']{'drift_pct_threshold': {'type': 'float', 'default': 20}}['visualization']['monitoring']
validmind.ongoing_monitoring.PredictionProbabilitiesHistogramDriftPrediction Probabilities Histogram DriftCompares prediction probability distributions between reference and monitoring datasets....TrueTrue['datasets', 'model']{'title': {'type': '_empty', 'default': 'Prediction Probabilities Histogram Drift'}, 'drift_pct_threshold': {'type': 'float', 'default': 20.0}}['visualization', 'credit_risk']['classification']
validmind.ongoing_monitoring.PredictionQuantilesAcrossFeaturesPrediction Quantiles Across FeaturesAssesses differences in model prediction distributions across individual features between reference...TrueFalse['datasets', 'model']{}['visualization']['monitoring']
validmind.ongoing_monitoring.ROCCurveDriftROC Curve DriftCompares ROC curves between reference and monitoring datasets....TrueFalse['datasets', 'model']{}['sklearn', 'binary_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.ongoing_monitoring.ScoreBandsDriftScore Bands DriftAnalyzes drift in population distribution and default rates across score bands....FalseTrue['datasets', 'model']{'score_column': {'type': 'str', 'default': 'score'}, 'score_bands': {'type': 'list', 'default': None}, 'drift_threshold': {'type': 'float', 'default': 20.0}}['visualization', 'credit_risk', 'scorecard']['classification']
validmind.ongoing_monitoring.ScorecardHistogramDriftScorecard Histogram DriftCompares score distributions between reference and monitoring datasets for each class....TrueTrue['datasets']{'score_column': {'type': 'str', 'default': 'score'}, 'title': {'type': 'str', 'default': 'Scorecard Histogram Drift'}, 'drift_pct_threshold': {'type': 'float', 'default': 20.0}}['visualization', 'credit_risk', 'logistic_regression']['classification']
validmind.ongoing_monitoring.TargetPredictionDistributionPlotTarget Prediction Distribution PlotAssesses differences in prediction distributions between a reference dataset and a monitoring dataset to identify...TrueTrue['datasets', 'model']{'drift_pct_threshold': {'type': 'float', 'default': 20}}['visualization']['monitoring']
validmind.prompt_validation.BiasBiasAssesses potential bias in a Large Language Model by analyzing the distribution and order of exemplars in the...FalseTrue['model']{'min_threshold': {'type': '_empty', 'default': 7}, 'judge_llm': {'type': '_empty', 'default': None}}['llm', 'few_shot']['text_classification', 'text_summarization']
validmind.prompt_validation.ClarityClarityEvaluates and scores the clarity of prompts in a Large Language Model based on specified guidelines....FalseTrue['model']{'min_threshold': {'type': '_empty', 'default': 7}, 'judge_llm': {'type': '_empty', 'default': None}}['llm', 'zero_shot', 'few_shot']['text_classification', 'text_summarization']
validmind.prompt_validation.ConcisenessConcisenessAnalyzes and grades the conciseness of prompts provided to a Large Language Model....FalseTrue['model']{'min_threshold': {'type': '_empty', 'default': 7}, 'judge_llm': {'type': '_empty', 'default': None}}['llm', 'zero_shot', 'few_shot']['text_classification', 'text_summarization']
validmind.prompt_validation.DelimitationDelimitationEvaluates the proper use of delimiters in prompts provided to Large Language Models....FalseTrue['model']{'min_threshold': {'type': '_empty', 'default': 7}, 'judge_llm': {'type': '_empty', 'default': None}}['llm', 'zero_shot', 'few_shot']['text_classification', 'text_summarization']
validmind.prompt_validation.NegativeInstructionNegative InstructionEvaluates and grades the use of affirmative, proactive language over negative instructions in LLM prompts....FalseTrue['model']{'min_threshold': {'type': '_empty', 'default': 7}, 'judge_llm': {'type': '_empty', 'default': None}}['llm', 'zero_shot', 'few_shot']['text_classification', 'text_summarization']
validmind.prompt_validation.RobustnessRobustnessAssesses the robustness of prompts provided to a Large Language Model under varying conditions and contexts. This test...FalseTrue['model', 'dataset']{'num_tests': {'type': '_empty', 'default': 10}, 'judge_llm': {'type': '_empty', 'default': None}}['llm', 'zero_shot', 'few_shot']['text_classification', 'text_summarization']
validmind.prompt_validation.SpecificitySpecificityEvaluates and scores the specificity of prompts provided to a Large Language Model (LLM), based on clarity, detail,...FalseTrue['model']{'min_threshold': {'type': '_empty', 'default': 7}, 'judge_llm': {'type': '_empty', 'default': None}}['llm', 'zero_shot', 'few_shot']['text_classification', 'text_summarization']
validmind.unit_metrics.classification.AccuracyAccuracyCalculates the accuracy of a modelFalseFalse['dataset', 'model']{}['classification']['classification']
validmind.unit_metrics.classification.F1F1Calculates the F1 score for a classification model.FalseFalse['model', 'dataset']{}['classification']['classification']
validmind.unit_metrics.classification.PrecisionPrecisionCalculates the precision for a classification model.FalseFalse['model', 'dataset']{}['classification']['classification']
validmind.unit_metrics.classification.ROC_AUCROC AUCCalculates the ROC AUC for a classification model.FalseFalse['model', 'dataset']{}['classification']['classification']
validmind.unit_metrics.classification.RecallRecallCalculates the recall for a classification model.FalseFalse['model', 'dataset']{}['classification']['classification']
validmind.unit_metrics.regression.AdjustedRSquaredScoreAdjusted R Squared ScoreCalculates the adjusted R-squared score for a regression model.FalseFalse['model', 'dataset']{}['regression']['regression']
validmind.unit_metrics.regression.GiniCoefficientGini CoefficientCalculates the Gini coefficient for a regression model.FalseFalse['dataset', 'model']{}['regression']['regression']
validmind.unit_metrics.regression.HuberLossHuber LossCalculates the Huber loss for a regression model.FalseFalse['model', 'dataset']{}['regression']['regression']
validmind.unit_metrics.regression.KolmogorovSmirnovStatisticKolmogorov Smirnov StatisticCalculates the Kolmogorov-Smirnov statistic for a regression model.FalseFalse['dataset', 'model']{}['regression']['regression']
validmind.unit_metrics.regression.MeanAbsoluteErrorMean Absolute ErrorCalculates the mean absolute error for a regression model.FalseFalse['model', 'dataset']{}['regression']['regression']
validmind.unit_metrics.regression.MeanAbsolutePercentageErrorMean Absolute Percentage ErrorCalculates the mean absolute percentage error for a regression model.FalseFalse['model', 'dataset']{}['regression']['regression']
validmind.unit_metrics.regression.MeanBiasDeviationMean Bias DeviationCalculates the mean bias deviation for a regression model.FalseFalse['model', 'dataset']{}['regression']['regression']
validmind.unit_metrics.regression.MeanSquaredErrorMean Squared ErrorCalculates the mean squared error for a regression model.FalseFalse['model', 'dataset']{}['regression']['regression']
validmind.unit_metrics.regression.QuantileLossQuantile LossCalculates the quantile loss for a regression model.FalseFalse['model', 'dataset']{'quantile': {'type': '_empty', 'default': 0.5}}['regression']['regression']
validmind.unit_metrics.regression.RSquaredScoreR Squared ScoreCalculates the R-squared score for a regression model.FalseFalse['model', 'dataset']{}['regression']['regression']
validmind.unit_metrics.regression.RootMeanSquaredErrorRoot Mean Squared ErrorCalculates the root mean squared error for a regression model.FalseFalse['model', 'dataset']{}['regression']['regression']
\n" ], "text/plain": [ - "" + "" ] }, "execution_count": 2, @@ -1912,20 +2304,20 @@ { "data": { "text/plain": [ - "['time_series_forecasting',\n", - " 'feature_extraction',\n", - " 'text_qa',\n", - " 'text_generation',\n", - " 'residual_analysis',\n", - " 'visualization',\n", + "['text_qa',\n", + " 'classification',\n", + " 'data_validation',\n", " 'text_classification',\n", + " 'feature_extraction',\n", " 'regression',\n", - " 'nlp',\n", - " 'text_summarization',\n", - " 'data_validation',\n", - " 'classification',\n", + " 'visualization',\n", " 'clustering',\n", - " 'monitoring']" + " 'time_series_forecasting',\n", + " 'text_summarization',\n", + " 'nlp',\n", + " 'residual_analysis',\n", + " 'monitoring',\n", + " 'text_generation']" ] }, "execution_count": 3, @@ -1945,66 +2337,66 @@ { "data": { "text/plain": [ - "['few_shot',\n", - " 'ragas',\n", - " 'bias_and_fairness',\n", - " 'AUC',\n", - " 'visualization',\n", - " 'rag_performance',\n", - " 'logistic_regression',\n", - " 'model_validation',\n", - " 'credit_risk',\n", - " 'model_selection',\n", - " 'linear_regression',\n", + "['senstivity_analysis',\n", + " 'calibration',\n", " 'clustering',\n", - " 'data_distribution',\n", - " 'model_explainability',\n", - " 'frequency_analysis',\n", - " 'model_interpretation',\n", + " 'anomaly_detection',\n", + " 'nlp',\n", + " 'classification_metrics',\n", + " 'dimensionality_reduction',\n", + " 'tabular_data',\n", " 'time_series_data',\n", - " 'forecasting',\n", + " 'model_predictions',\n", + " 'feature_selection',\n", + " 'correlation',\n", + " 'frequency_analysis',\n", + " 'embeddings',\n", + " 'regression',\n", " 'llm',\n", + " 'statsmodels',\n", + " 'ragas',\n", + " 'model_performance',\n", + " 'model_validation',\n", + " 'rag_performance',\n", + " 'model_training',\n", + " 'qualitative',\n", + " 'classification',\n", + " 'kmeans',\n", " 'multiclass_classification',\n", - " 'data_validation',\n", + " 'linear_regression',\n", + " 'data_quality',\n", + " 'text_data',\n", " 'binary_classification',\n", + " 'threshold_optimization',\n", " 'stationarity',\n", - " 'senstivity_analysis',\n", - " 'retrieval_performance',\n", - " 'categorical_data',\n", - " 'seasonality',\n", - " 'qualitative',\n", + " 'bias_and_fairness',\n", + " 'scorecard',\n", + " 'model_explainability',\n", " 'model_comparison',\n", - " 'model_training',\n", - " 'data_quality',\n", - " 'regression',\n", - " 'anomaly_detection',\n", - " 'calibration',\n", - " 'model_predictions',\n", - " 'dimensionality_reduction',\n", - " 'descriptive_statistics',\n", - " 'classification',\n", - " 'unit_root_test',\n", - " 'metadata',\n", - " 'threshold_optimization',\n", - " 'model_diagnosis',\n", - " 'feature_selection',\n", - " 'data_analysis',\n", + " 'numerical_data',\n", + " 'sklearn',\n", + " 'model_selection',\n", + " 'retrieval_performance',\n", + " 'zero_shot',\n", " 'statistical_test',\n", - " 'embeddings',\n", + " 'descriptive_statistics',\n", + " 'seasonality',\n", " 'analysis',\n", + " 'data_validation',\n", + " 'data_distribution',\n", " 'feature_importance',\n", - " 'scorecard',\n", - " 'correlation',\n", - " 'classification_metrics',\n", - " 'nlp',\n", - " 'sklearn',\n", - " 'kmeans',\n", - " 'statsmodels',\n", - " 'numerical_data',\n", - " 'zero_shot',\n", - " 'text_data',\n", - " 'tabular_data',\n", - " 'model_performance']" + " 'metadata',\n", + " 'few_shot',\n", + " 'visualization',\n", + " 'credit_risk',\n", + " 'forecasting',\n", + " 'AUC',\n", + " 'logistic_regression',\n", + " 'model_diagnosis',\n", + " 'model_interpretation',\n", + " 'unit_root_test',\n", + " 'categorical_data',\n", + " 'data_analysis']" ] }, "execution_count": 4, @@ -2032,82 +2424,82 @@ "data": { "text/html": [ "\n", - "\n", + "
\n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", "
TaskTagsTaskTags
regressionbias_and_fairness, visualization, model_selection, linear_regression, data_distribution, model_explainability, model_interpretation, time_series_data, forecasting, multiclass_classification, data_validation, binary_classification, stationarity, model_performance, senstivity_analysis, categorical_data, seasonality, data_quality, regression, model_predictions, descriptive_statistics, unit_root_test, metadata, model_diagnosis, feature_selection, data_analysis, statistical_test, analysis, feature_importance, correlation, sklearn, statsmodels, numerical_data, text_data, tabular_data, model_trainingregressionsenstivity_analysis, tabular_data, time_series_data, model_predictions, feature_selection, correlation, regression, statsmodels, model_performance, model_training, multiclass_classification, linear_regression, data_quality, text_data, model_explainability, binary_classification, stationarity, bias_and_fairness, numerical_data, sklearn, model_selection, statistical_test, descriptive_statistics, seasonality, analysis, data_validation, data_distribution, metadata, feature_importance, visualization, forecasting, model_diagnosis, model_interpretation, unit_root_test, categorical_data, data_analysis
classificationbias_and_fairness, AUC, visualization, logistic_regression, model_validation, credit_risk, linear_regression, data_distribution, time_series_data, multiclass_classification, binary_classification, categorical_data, model_comparison, model_training, data_quality, anomaly_detection, calibration, descriptive_statistics, classification, metadata, model_diagnosis, threshold_optimization, feature_selection, data_analysis, statistical_test, classification_metrics, feature_importance, scorecard, correlation, sklearn, statsmodels, numerical_data, text_data, tabular_data, model_performanceclassificationcalibration, anomaly_detection, classification_metrics, tabular_data, time_series_data, feature_selection, correlation, statsmodels, model_performance, model_validation, model_training, classification, multiclass_classification, linear_regression, data_quality, text_data, binary_classification, threshold_optimization, bias_and_fairness, scorecard, model_comparison, numerical_data, sklearn, statistical_test, descriptive_statistics, feature_importance, data_distribution, metadata, visualization, credit_risk, AUC, logistic_regression, model_diagnosis, categorical_data, data_analysis
text_classificationfew_shot, ragas, visualization, frequency_analysis, model_comparison, feature_importance, time_series_data, nlp, llm, sklearn, multiclass_classification, zero_shot, text_data, binary_classification, retrieval_performance, tabular_data, model_performance, model_diagnosistext_classificationmodel_performance, feature_importance, multiclass_classification, few_shot, frequency_analysis, zero_shot, text_data, visualization, llm, binary_classification, ragas, model_diagnosis, model_comparison, sklearn, nlp, retrieval_performance, tabular_data, time_series_data
text_summarizationfew_shot, ragas, qualitative, visualization, frequency_analysis, embeddings, rag_performance, time_series_data, nlp, llm, zero_shot, text_data, dimensionality_reduction, retrieval_performance, tabular_datatext_summarizationqualitative, few_shot, frequency_analysis, embeddings, zero_shot, text_data, visualization, llm, rag_performance, ragas, retrieval_performance, nlp, dimensionality_reduction, tabular_data, time_series_data
data_validationstationarity, time_series_data, statsmodels, unit_root_testdata_validationstationarity, statsmodels, unit_root_test, time_series_data
time_series_forecastingmodel_explainability, visualization, time_series_data, sklearn, model_predictions, data_validation, model_performance, model_training, metadatatime_series_forecastingmodel_training, data_validation, metadata, visualization, model_explainability, sklearn, model_performance, model_predictions, time_series_data
nlpvisualization, frequency_analysis, data_validation, nlp, text_datanlpdata_validation, frequency_analysis, text_data, visualization, nlp
clusteringsklearn, kmeans, clustering, model_performanceclusteringclustering, model_performance, kmeans, sklearn
residual_analysisregressionresidual_analysisregression
visualizationregressionvisualizationregression
feature_extractiontext_data, llm, visualization, embeddingsfeature_extractionembeddings, text_data, visualization, llm
text_qaragas, qualitative, visualization, embeddings, rag_performance, llm, dimensionality_reduction, retrieval_performancetext_qaqualitative, embeddings, visualization, llm, rag_performance, ragas, dimensionality_reduction, retrieval_performance
text_generationragas, qualitative, visualization, embeddings, rag_performance, llm, dimensionality_reduction, retrieval_performancetext_generationqualitative, embeddings, visualization, llm, rag_performance, ragas, dimensionality_reduction, retrieval_performance
monitoringvisualizationmonitoringvisualization
\n" ], "text/plain": [ - "" + "" ] }, "execution_count": 5, @@ -2146,418 +2538,506 @@ "data": { "text/html": [ "\n", - "\n", + "
\n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
IDNameDescriptionRequired InputsParamsTagsTasksIDNameDescriptionHas FigureHas TableRequired InputsParamsTagsTasks
validmind.model_validation.ClusterSizeDistributionCluster Size DistributionAssesses the performance of clustering models by comparing the distribution of cluster sizes in model predictions...['dataset', 'model']{}['sklearn', 'model_performance']['clustering']
validmind.model_validation.TimeSeriesR2SquareBySegmentsTime Series R2 Square By SegmentsEvaluates the R-Squared values of regression models over specified time segments in time series data to assess...['dataset', 'model']{'segments': {'type': '_empty', 'default': None}}['model_performance', 'sklearn']['regression', 'time_series_forecasting']
validmind.model_validation.sklearn.AdjustedMutualInformationAdjusted Mutual InformationEvaluates clustering model performance by measuring mutual information between true and predicted labels, adjusting...['model', 'dataset']{}['sklearn', 'model_performance', 'clustering']['clustering']
validmind.model_validation.sklearn.AdjustedRandIndexAdjusted Rand IndexMeasures the similarity between two data clusters using the Adjusted Rand Index (ARI) metric in clustering machine...['model', 'dataset']{}['sklearn', 'model_performance', 'clustering']['clustering']
validmind.model_validation.sklearn.CalibrationCurveCalibration CurveEvaluates the calibration of probability estimates by comparing predicted probabilities against observed...['model', 'dataset']{'n_bins': {'type': 'int', 'default': 10}}['sklearn', 'model_performance', 'classification']['classification']
validmind.model_validation.sklearn.ClassifierPerformanceClassifier PerformanceEvaluates performance of binary or multiclass classification models using precision, recall, F1-Score, accuracy,...['dataset', 'model']{'average': {'type': 'str', 'default': 'macro'}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance']['classification', 'text_classification']
validmind.model_validation.sklearn.ClassifierThresholdOptimizationClassifier Threshold OptimizationAnalyzes and visualizes different threshold optimization methods for binary classification models....['dataset', 'model']{'methods': {'type': None, 'default': None}, 'target_recall': {'type': None, 'default': None}}['model_validation', 'threshold_optimization', 'classification_metrics']['classification']
validmind.model_validation.sklearn.ClusterCosineSimilarityCluster Cosine SimilarityMeasures the intra-cluster similarity of a clustering model using cosine similarity....['model', 'dataset']{}['sklearn', 'model_performance', 'clustering']['clustering']
validmind.model_validation.sklearn.ClusterPerformanceMetricsCluster Performance MetricsEvaluates the performance of clustering machine learning models using multiple established metrics....['model', 'dataset']{}['sklearn', 'model_performance', 'clustering']['clustering']
validmind.model_validation.sklearn.CompletenessScoreCompleteness ScoreEvaluates a clustering model's capacity to categorize instances from a single class into the same cluster....['model', 'dataset']{}['sklearn', 'model_performance', 'clustering']['clustering']
validmind.model_validation.sklearn.ConfusionMatrixConfusion MatrixEvaluates and visually represents the classification ML model's predictive performance using a Confusion Matrix...['dataset', 'model']{'threshold': {'type': 'float', 'default': 0.5}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.model_validation.sklearn.FeatureImportanceFeature ImportanceCompute feature importance scores for a given model and generate a summary table...['dataset', 'model']{'num_features': {'type': 'int', 'default': 3}}['model_explainability', 'sklearn']['regression', 'time_series_forecasting']
validmind.model_validation.sklearn.FowlkesMallowsScoreFowlkes Mallows ScoreEvaluates the similarity between predicted and actual cluster assignments in a model using the Fowlkes-Mallows...['dataset', 'model']{}['sklearn', 'model_performance']['clustering']
validmind.model_validation.sklearn.HomogeneityScoreHomogeneity ScoreAssesses clustering homogeneity by comparing true and predicted labels, scoring from 0 (heterogeneous) to 1...['dataset', 'model']{}['sklearn', 'model_performance']['clustering']
validmind.model_validation.sklearn.HyperParametersTuningHyper Parameters TuningPerforms exhaustive grid search over specified parameter ranges to find optimal model configurations...['model', 'dataset']{'param_grid': {'type': 'dict', 'default': None}, 'scoring': {'type': None, 'default': None}, 'thresholds': {'type': None, 'default': None}, 'fit_params': {'type': 'dict', 'default': None}}['sklearn', 'model_performance']['clustering', 'classification']
validmind.model_validation.sklearn.KMeansClustersOptimizationK Means Clusters OptimizationOptimizes the number of clusters in K-means models using Elbow and Silhouette methods....['model', 'dataset']{'n_clusters': {'type': None, 'default': None}}['sklearn', 'model_performance', 'kmeans']['clustering']
validmind.model_validation.sklearn.MinimumAccuracyMinimum AccuracyChecks if the model's prediction accuracy meets or surpasses a specified threshold....['dataset', 'model']{'min_threshold': {'type': 'float', 'default': 0.7}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance']['classification', 'text_classification']
validmind.model_validation.sklearn.MinimumF1ScoreMinimum F1 ScoreAssesses if the model's F1 score on the validation set meets a predefined minimum threshold, ensuring balanced...['dataset', 'model']{'min_threshold': {'type': 'float', 'default': 0.5}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance']['classification', 'text_classification']
validmind.model_validation.sklearn.MinimumROCAUCScoreMinimum ROCAUC ScoreValidates model by checking if the ROC AUC score meets or surpasses a specified threshold....['dataset', 'model']{'min_threshold': {'type': 'float', 'default': 0.5}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance']['classification', 'text_classification']
validmind.model_validation.sklearn.ModelParametersModel ParametersExtracts and displays model parameters in a structured format for transparency and reproducibility....['model']{'model_params': {'type': '_empty', 'default': None}}['model_training', 'metadata']['classification', 'regression']
validmind.model_validation.sklearn.ModelsPerformanceComparisonModels Performance ComparisonEvaluates and compares the performance of multiple Machine Learning models using various metrics like accuracy,...['dataset', 'models']{}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance', 'model_comparison']['classification', 'text_classification']
validmind.model_validation.sklearn.OverfitDiagnosisOverfit DiagnosisAssesses potential overfitting in a model's predictions, identifying regions where performance between training and...['model', 'datasets']{'metric': {'type': 'str', 'default': None}, 'cut_off_threshold': {'type': 'float', 'default': 0.04}}['sklearn', 'binary_classification', 'multiclass_classification', 'linear_regression', 'model_diagnosis']['classification', 'regression']
validmind.model_validation.sklearn.PermutationFeatureImportancePermutation Feature ImportanceAssesses the significance of each feature in a model by evaluating the impact on model performance when feature...['model', 'dataset']{'fontsize': {'type': None, 'default': None}, 'figure_height': {'type': None, 'default': None}}['sklearn', 'binary_classification', 'multiclass_classification', 'feature_importance', 'visualization']['classification', 'text_classification']
validmind.model_validation.sklearn.PopulationStabilityIndexPopulation Stability IndexAssesses the Population Stability Index (PSI) to quantify the stability of an ML model's predictions across...['datasets', 'model']{'num_bins': {'type': 'int', 'default': 10}, 'mode': {'type': 'str', 'default': 'fixed'}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance']['classification', 'text_classification']
validmind.model_validation.sklearn.PrecisionRecallCurvePrecision Recall CurveEvaluates the precision-recall trade-off for binary classification models and visualizes the Precision-Recall curve....['model', 'dataset']{}['sklearn', 'binary_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.model_validation.sklearn.ROCCurveROC CurveEvaluates binary classification model performance by generating and plotting the Receiver Operating Characteristic...['model', 'dataset']{}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.model_validation.sklearn.RegressionErrorsRegression ErrorsAssesses the performance and error distribution of a regression model using various error metrics....['model', 'dataset']{}['sklearn', 'model_performance']['regression', 'classification']
validmind.model_validation.sklearn.RegressionErrorsComparisonRegression Errors ComparisonAssesses multiple regression error metrics to compare model performance across different datasets, emphasizing...['datasets', 'models']{}['model_performance', 'sklearn']['regression', 'time_series_forecasting']
validmind.model_validation.sklearn.RegressionPerformanceRegression PerformanceEvaluates the performance of a regression model using five different metrics: MAE, MSE, RMSE, MAPE, and MBD....['model', 'dataset']{}['sklearn', 'model_performance']['regression']
validmind.model_validation.sklearn.RegressionR2SquareRegression R2 SquareAssesses the overall goodness-of-fit of a regression model by evaluating R-squared (R2) and Adjusted R-squared (Adj...['dataset', 'model']{}['sklearn', 'model_performance']['regression']
validmind.model_validation.sklearn.RegressionR2SquareComparisonRegression R2 Square ComparisonCompares R-Squared and Adjusted R-Squared values for different regression models across multiple datasets to assess...['datasets', 'models']{}['model_performance', 'sklearn']['regression', 'time_series_forecasting']
validmind.model_validation.sklearn.RobustnessDiagnosisRobustness DiagnosisAssesses the robustness of a machine learning model by evaluating performance decay under noisy conditions....['datasets', 'model']{'metric': {'type': 'str', 'default': None}, 'scaling_factor_std_dev_list': {'type': None, 'default': [0.1, 0.2, 0.3, 0.4, 0.5]}, 'performance_decay_threshold': {'type': 'float', 'default': 0.05}}['sklearn', 'model_diagnosis', 'visualization']['classification', 'regression']
validmind.model_validation.sklearn.SHAPGlobalImportanceSHAP Global ImportanceEvaluates and visualizes global feature importance using SHAP values for model explanation and risk identification....['model', 'dataset']{'kernel_explainer_samples': {'type': 'int', 'default': 10}, 'tree_or_linear_explainer_samples': {'type': 'int', 'default': 200}, 'class_of_interest': {'type': None, 'default': None}}['sklearn', 'binary_classification', 'multiclass_classification', 'feature_importance', 'visualization']['classification', 'text_classification']
validmind.model_validation.sklearn.ScoreProbabilityAlignmentScore Probability AlignmentAnalyzes the alignment between credit scores and predicted probabilities....['model', 'dataset']{'score_column': {'type': 'str', 'default': 'score'}, 'n_bins': {'type': 'int', 'default': 10}}['visualization', 'credit_risk', 'calibration']['classification']
validmind.model_validation.sklearn.SilhouettePlotSilhouette PlotCalculates and visualizes Silhouette Score, assessing the degree of data point suitability to its cluster in ML...['model', 'dataset']{}['sklearn', 'model_performance']['clustering']
validmind.model_validation.sklearn.TrainingTestDegradationTraining Test DegradationTests if model performance degradation between training and test datasets exceeds a predefined threshold....['datasets', 'model']{'max_threshold': {'type': 'float', 'default': 0.1}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.model_validation.sklearn.VMeasureV MeasureEvaluates homogeneity and completeness of a clustering model using the V Measure Score....['dataset', 'model']{}['sklearn', 'model_performance']['clustering']
validmind.model_validation.sklearn.WeakspotsDiagnosisWeakspots DiagnosisIdentifies and visualizes weak spots in a machine learning model's performance across various sections of the...['datasets', 'model']{'features_columns': {'type': None, 'default': None}, 'metrics': {'type': None, 'default': None}, 'thresholds': {'type': None, 'default': None}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_diagnosis', 'visualization']['classification', 'text_classification']
validmind.ongoing_monitoring.CalibrationCurveDriftCalibration Curve DriftEvaluates changes in probability calibration between reference and monitoring datasets....['datasets', 'model']{'n_bins': {'type': 'int', 'default': 10}, 'drift_pct_threshold': {'type': 'float', 'default': 20}}['sklearn', 'binary_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.ongoing_monitoring.ClassDiscriminationDriftClass Discrimination DriftCompares classification discrimination metrics between reference and monitoring datasets....['datasets', 'model']{'drift_pct_threshold': {'type': '_empty', 'default': 20}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance']['classification', 'text_classification']
validmind.ongoing_monitoring.ClassificationAccuracyDriftClassification Accuracy DriftCompares classification accuracy metrics between reference and monitoring datasets....['datasets', 'model']{'drift_pct_threshold': {'type': '_empty', 'default': 20}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance']['classification', 'text_classification']
validmind.ongoing_monitoring.ConfusionMatrixDriftConfusion Matrix DriftCompares confusion matrix metrics between reference and monitoring datasets....['datasets', 'model']{'drift_pct_threshold': {'type': '_empty', 'default': 20}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance']['classification', 'text_classification']
validmind.ongoing_monitoring.ROCCurveDriftROC Curve DriftCompares ROC curves between reference and monitoring datasets....['datasets', 'model']{}['sklearn', 'binary_classification', 'model_performance', 'visualization']['classification', 'text_classification']validmind.model_validation.ClusterSizeDistributionCluster Size DistributionAssesses the performance of clustering models by comparing the distribution of cluster sizes in model predictions...TrueFalse['dataset', 'model']{}['sklearn', 'model_performance']['clustering']
validmind.model_validation.TimeSeriesR2SquareBySegmentsTime Series R2 Square By SegmentsEvaluates the R-Squared values of regression models over specified time segments in time series data to assess...TrueTrue['dataset', 'model']{'segments': {'type': None, 'default': None}}['model_performance', 'sklearn']['regression', 'time_series_forecasting']
validmind.model_validation.sklearn.AdjustedMutualInformationAdjusted Mutual InformationEvaluates clustering model performance by measuring mutual information between true and predicted labels, adjusting...FalseTrue['model', 'dataset']{}['sklearn', 'model_performance', 'clustering']['clustering']
validmind.model_validation.sklearn.AdjustedRandIndexAdjusted Rand IndexMeasures the similarity between two data clusters using the Adjusted Rand Index (ARI) metric in clustering machine...FalseTrue['model', 'dataset']{}['sklearn', 'model_performance', 'clustering']['clustering']
validmind.model_validation.sklearn.CalibrationCurveCalibration CurveEvaluates the calibration of probability estimates by comparing predicted probabilities against observed...TrueFalse['model', 'dataset']{'n_bins': {'type': 'int', 'default': 10}}['sklearn', 'model_performance', 'classification']['classification']
validmind.model_validation.sklearn.ClassifierPerformanceClassifier PerformanceEvaluates performance of binary or multiclass classification models using precision, recall, F1-Score, accuracy,...FalseTrue['dataset', 'model']{'average': {'type': 'str', 'default': 'macro'}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance']['classification', 'text_classification']
validmind.model_validation.sklearn.ClassifierThresholdOptimizationClassifier Threshold OptimizationAnalyzes and visualizes different threshold optimization methods for binary classification models....FalseTrue['dataset', 'model']{'methods': {'type': None, 'default': None}, 'target_recall': {'type': None, 'default': None}}['model_validation', 'threshold_optimization', 'classification_metrics']['classification']
validmind.model_validation.sklearn.ClusterCosineSimilarityCluster Cosine SimilarityMeasures the intra-cluster similarity of a clustering model using cosine similarity....FalseTrue['model', 'dataset']{}['sklearn', 'model_performance', 'clustering']['clustering']
validmind.model_validation.sklearn.ClusterPerformanceMetricsCluster Performance MetricsEvaluates the performance of clustering machine learning models using multiple established metrics....FalseTrue['model', 'dataset']{}['sklearn', 'model_performance', 'clustering']['clustering']
validmind.model_validation.sklearn.CompletenessScoreCompleteness ScoreEvaluates a clustering model's capacity to categorize instances from a single class into the same cluster....FalseTrue['model', 'dataset']{}['sklearn', 'model_performance', 'clustering']['clustering']
validmind.model_validation.sklearn.ConfusionMatrixConfusion MatrixEvaluates and visually represents the classification ML model's predictive performance using a Confusion Matrix...TrueFalse['dataset', 'model']{'threshold': {'type': 'float', 'default': 0.5}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.model_validation.sklearn.FeatureImportanceFeature ImportanceCompute feature importance scores for a given model and generate a summary table...FalseTrue['dataset', 'model']{'num_features': {'type': 'int', 'default': 3}}['model_explainability', 'sklearn']['regression', 'time_series_forecasting']
validmind.model_validation.sklearn.FowlkesMallowsScoreFowlkes Mallows ScoreEvaluates the similarity between predicted and actual cluster assignments in a model using the Fowlkes-Mallows...FalseTrue['dataset', 'model']{}['sklearn', 'model_performance']['clustering']
validmind.model_validation.sklearn.HomogeneityScoreHomogeneity ScoreAssesses clustering homogeneity by comparing true and predicted labels, scoring from 0 (heterogeneous) to 1...FalseTrue['dataset', 'model']{}['sklearn', 'model_performance']['clustering']
validmind.model_validation.sklearn.HyperParametersTuningHyper Parameters TuningPerforms exhaustive grid search over specified parameter ranges to find optimal model configurations...FalseTrue['model', 'dataset']{'param_grid': {'type': 'dict', 'default': None}, 'scoring': {'type': None, 'default': None}, 'thresholds': {'type': None, 'default': None}, 'fit_params': {'type': 'dict', 'default': None}}['sklearn', 'model_performance']['clustering', 'classification']
validmind.model_validation.sklearn.KMeansClustersOptimizationK Means Clusters OptimizationOptimizes the number of clusters in K-means models using Elbow and Silhouette methods....TrueFalse['model', 'dataset']{'n_clusters': {'type': None, 'default': None}}['sklearn', 'model_performance', 'kmeans']['clustering']
validmind.model_validation.sklearn.MinimumAccuracyMinimum AccuracyChecks if the model's prediction accuracy meets or surpasses a specified threshold....FalseTrue['dataset', 'model']{'min_threshold': {'type': 'float', 'default': 0.7}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance']['classification', 'text_classification']
validmind.model_validation.sklearn.MinimumF1ScoreMinimum F1 ScoreAssesses if the model's F1 score on the validation set meets a predefined minimum threshold, ensuring balanced...FalseTrue['dataset', 'model']{'min_threshold': {'type': 'float', 'default': 0.5}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance']['classification', 'text_classification']
validmind.model_validation.sklearn.MinimumROCAUCScoreMinimum ROCAUC ScoreValidates model by checking if the ROC AUC score meets or surpasses a specified threshold....FalseTrue['dataset', 'model']{'min_threshold': {'type': 'float', 'default': 0.5}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance']['classification', 'text_classification']
validmind.model_validation.sklearn.ModelParametersModel ParametersExtracts and displays model parameters in a structured format for transparency and reproducibility....FalseTrue['model']{'model_params': {'type': None, 'default': None}}['model_training', 'metadata']['classification', 'regression']
validmind.model_validation.sklearn.ModelsPerformanceComparisonModels Performance ComparisonEvaluates and compares the performance of multiple Machine Learning models using various metrics like accuracy,...FalseTrue['dataset', 'models']{}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance', 'model_comparison']['classification', 'text_classification']
validmind.model_validation.sklearn.OverfitDiagnosisOverfit DiagnosisAssesses potential overfitting in a model's predictions, identifying regions where performance between training and...TrueTrue['model', 'datasets']{'metric': {'type': 'str', 'default': None}, 'cut_off_threshold': {'type': 'float', 'default': 0.04}}['sklearn', 'binary_classification', 'multiclass_classification', 'linear_regression', 'model_diagnosis']['classification', 'regression']
validmind.model_validation.sklearn.PermutationFeatureImportancePermutation Feature ImportanceAssesses the significance of each feature in a model by evaluating the impact on model performance when feature...TrueFalse['model', 'dataset']{'fontsize': {'type': None, 'default': None}, 'figure_height': {'type': None, 'default': None}}['sklearn', 'binary_classification', 'multiclass_classification', 'feature_importance', 'visualization']['classification', 'text_classification']
validmind.model_validation.sklearn.PopulationStabilityIndexPopulation Stability IndexAssesses the Population Stability Index (PSI) to quantify the stability of an ML model's predictions across...TrueTrue['datasets', 'model']{'num_bins': {'type': 'int', 'default': 10}, 'mode': {'type': 'str', 'default': 'fixed'}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance']['classification', 'text_classification']
validmind.model_validation.sklearn.PrecisionRecallCurvePrecision Recall CurveEvaluates the precision-recall trade-off for binary classification models and visualizes the Precision-Recall curve....TrueFalse['model', 'dataset']{}['sklearn', 'binary_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.model_validation.sklearn.ROCCurveROC CurveEvaluates binary classification model performance by generating and plotting the Receiver Operating Characteristic...TrueFalse['model', 'dataset']{}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.model_validation.sklearn.RegressionErrorsRegression ErrorsAssesses the performance and error distribution of a regression model using various error metrics....FalseTrue['model', 'dataset']{}['sklearn', 'model_performance']['regression', 'classification']
validmind.model_validation.sklearn.RegressionErrorsComparisonRegression Errors ComparisonAssesses multiple regression error metrics to compare model performance across different datasets, emphasizing...FalseTrue['datasets', 'models']{}['model_performance', 'sklearn']['regression', 'time_series_forecasting']
validmind.model_validation.sklearn.RegressionPerformanceRegression PerformanceEvaluates the performance of a regression model using five different metrics: MAE, MSE, RMSE, MAPE, and MBD....FalseTrue['model', 'dataset']{}['sklearn', 'model_performance']['regression']
validmind.model_validation.sklearn.RegressionR2SquareRegression R2 SquareAssesses the overall goodness-of-fit of a regression model by evaluating R-squared (R2) and Adjusted R-squared (Adj...FalseTrue['dataset', 'model']{}['sklearn', 'model_performance']['regression']
validmind.model_validation.sklearn.RegressionR2SquareComparisonRegression R2 Square ComparisonCompares R-Squared and Adjusted R-Squared values for different regression models across multiple datasets to assess...FalseTrue['datasets', 'models']{}['model_performance', 'sklearn']['regression', 'time_series_forecasting']
validmind.model_validation.sklearn.RobustnessDiagnosisRobustness DiagnosisAssesses the robustness of a machine learning model by evaluating performance decay under noisy conditions....TrueTrue['datasets', 'model']{'metric': {'type': 'str', 'default': None}, 'scaling_factor_std_dev_list': {'type': None, 'default': [0.1, 0.2, 0.3, 0.4, 0.5]}, 'performance_decay_threshold': {'type': 'float', 'default': 0.05}}['sklearn', 'model_diagnosis', 'visualization']['classification', 'regression']
validmind.model_validation.sklearn.SHAPGlobalImportanceSHAP Global ImportanceEvaluates and visualizes global feature importance using SHAP values for model explanation and risk identification....FalseTrue['model', 'dataset']{'kernel_explainer_samples': {'type': 'int', 'default': 10}, 'tree_or_linear_explainer_samples': {'type': 'int', 'default': 200}, 'class_of_interest': {'type': None, 'default': None}}['sklearn', 'binary_classification', 'multiclass_classification', 'feature_importance', 'visualization']['classification', 'text_classification']
validmind.model_validation.sklearn.ScoreProbabilityAlignmentScore Probability AlignmentAnalyzes the alignment between credit scores and predicted probabilities....TrueTrue['model', 'dataset']{'score_column': {'type': 'str', 'default': 'score'}, 'n_bins': {'type': 'int', 'default': 10}}['visualization', 'credit_risk', 'calibration']['classification']
validmind.model_validation.sklearn.SilhouettePlotSilhouette PlotCalculates and visualizes Silhouette Score, assessing the degree of data point suitability to its cluster in ML...TrueTrue['model', 'dataset']{}['sklearn', 'model_performance']['clustering']
validmind.model_validation.sklearn.TrainingTestDegradationTraining Test DegradationTests if model performance degradation between training and test datasets exceeds a predefined threshold....FalseTrue['datasets', 'model']{'max_threshold': {'type': 'float', 'default': 0.1}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.model_validation.sklearn.VMeasureV MeasureEvaluates homogeneity and completeness of a clustering model using the V Measure Score....FalseTrue['dataset', 'model']{}['sklearn', 'model_performance']['clustering']
validmind.model_validation.sklearn.WeakspotsDiagnosisWeakspots DiagnosisIdentifies and visualizes weak spots in a machine learning model's performance across various sections of the...TrueTrue['datasets', 'model']{'features_columns': {'type': None, 'default': None}, 'metrics': {'type': None, 'default': None}, 'thresholds': {'type': None, 'default': None}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_diagnosis', 'visualization']['classification', 'text_classification']
validmind.ongoing_monitoring.CalibrationCurveDriftCalibration Curve DriftEvaluates changes in probability calibration between reference and monitoring datasets....TrueTrue['datasets', 'model']{'n_bins': {'type': 'int', 'default': 10}, 'drift_pct_threshold': {'type': 'float', 'default': 20}}['sklearn', 'binary_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.ongoing_monitoring.ClassDiscriminationDriftClass Discrimination DriftCompares classification discrimination metrics between reference and monitoring datasets....FalseTrue['datasets', 'model']{'drift_pct_threshold': {'type': '_empty', 'default': 20}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance']['classification', 'text_classification']
validmind.ongoing_monitoring.ClassificationAccuracyDriftClassification Accuracy DriftCompares classification accuracy metrics between reference and monitoring datasets....FalseTrue['datasets', 'model']{'drift_pct_threshold': {'type': '_empty', 'default': 20}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance']['classification', 'text_classification']
validmind.ongoing_monitoring.ConfusionMatrixDriftConfusion Matrix DriftCompares confusion matrix metrics between reference and monitoring datasets....FalseTrue['datasets', 'model']{'drift_pct_threshold': {'type': '_empty', 'default': 20}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance']['classification', 'text_classification']
validmind.ongoing_monitoring.ROCCurveDriftROC Curve DriftCompares ROC curves between reference and monitoring datasets....TrueFalse['datasets', 'model']{}['sklearn', 'binary_classification', 'model_performance', 'visualization']['classification', 'text_classification']
\n" ], "text/plain": [ - "" + "" ] }, "execution_count": 6, @@ -2585,715 +3065,869 @@ "data": { "text/html": [ "\n", - "\n", + "
\n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
IDNameDescriptionRequired InputsParamsTagsTasksIDNameDescriptionHas FigureHas TableRequired InputsParamsTagsTasks
validmind.data_validation.BivariateScatterPlotsBivariate Scatter PlotsGenerates bivariate scatterplots to visually inspect relationships between pairs of numerical predictor variables...['dataset']{}['tabular_data', 'numerical_data', 'visualization']['classification']
validmind.data_validation.ChiSquaredFeaturesTableChi Squared Features TableAssesses the statistical association between categorical features and a target variable using the Chi-Squared test....['dataset']{'p_threshold': {'type': '_empty', 'default': 0.05}}['tabular_data', 'categorical_data', 'statistical_test']['classification']
validmind.data_validation.ClassImbalanceClass ImbalanceEvaluates and quantifies class distribution imbalance in a dataset used by a machine learning model....['dataset']{'min_percent_threshold': {'type': 'int', 'default': 10}}['tabular_data', 'binary_classification', 'multiclass_classification', 'data_quality']['classification']
validmind.data_validation.DatasetDescriptionDataset DescriptionProvides comprehensive analysis and statistical summaries of each column in a machine learning model's dataset....['dataset']{}['tabular_data', 'time_series_data', 'text_data']['classification', 'regression', 'text_classification', 'text_summarization']
validmind.data_validation.DatasetSplitDataset SplitEvaluates and visualizes the distribution proportions among training, testing, and validation datasets of an ML...['datasets']{}['tabular_data', 'time_series_data', 'text_data']['classification', 'regression', 'text_classification', 'text_summarization']
validmind.data_validation.DescriptiveStatisticsDescriptive StatisticsPerforms a detailed descriptive statistical analysis of both numerical and categorical data within a model's...['dataset']{}['tabular_data', 'time_series_data', 'data_quality']['classification', 'regression']
validmind.data_validation.DuplicatesDuplicatesTests dataset for duplicate entries, ensuring model reliability via data quality verification....['dataset']{'min_threshold': {'type': '_empty', 'default': 1}}['tabular_data', 'data_quality', 'text_data']['classification', 'regression']
validmind.data_validation.FeatureTargetCorrelationPlotFeature Target Correlation PlotVisualizes the correlation between input features and the model's target output in a color-coded horizontal bar...['dataset']{'fig_height': {'type': '_empty', 'default': 600}}['tabular_data', 'visualization', 'correlation']['classification', 'regression']
validmind.data_validation.HighCardinalityHigh CardinalityAssesses the number of unique values in categorical columns to detect high cardinality and potential overfitting....['dataset']{'num_threshold': {'type': 'int', 'default': 100}, 'percent_threshold': {'type': 'float', 'default': 0.1}, 'threshold_type': {'type': 'str', 'default': 'percent'}}['tabular_data', 'data_quality', 'categorical_data']['classification', 'regression']
validmind.data_validation.HighPearsonCorrelationHigh Pearson CorrelationIdentifies highly correlated feature pairs in a dataset suggesting feature redundancy or multicollinearity....['dataset']{'max_threshold': {'type': 'float', 'default': 0.3}, 'top_n_correlations': {'type': 'int', 'default': 10}, 'feature_columns': {'type': 'list', 'default': None}}['tabular_data', 'data_quality', 'correlation']['classification', 'regression']
validmind.data_validation.IQROutliersBarPlotIQR Outliers Bar PlotVisualizes outlier distribution across percentiles in numerical data using the Interquartile Range (IQR) method....['dataset']{'threshold': {'type': 'float', 'default': 1.5}, 'fig_width': {'type': 'int', 'default': 800}}['tabular_data', 'visualization', 'numerical_data']['classification', 'regression']
validmind.data_validation.IQROutliersTableIQR Outliers TableDetermines and summarizes outliers in numerical features using the Interquartile Range method....['dataset']{'threshold': {'type': 'float', 'default': 1.5}}['tabular_data', 'numerical_data']['classification', 'regression']
validmind.data_validation.IsolationForestOutliersIsolation Forest OutliersDetects outliers in a dataset using the Isolation Forest algorithm and visualizes results through scatter plots....['dataset']{'random_state': {'type': 'int', 'default': 0}, 'contamination': {'type': 'float', 'default': 0.1}, 'feature_columns': {'type': 'list', 'default': None}}['tabular_data', 'anomaly_detection']['classification']
validmind.data_validation.JarqueBeraJarque BeraAssesses normality of dataset features in an ML model using the Jarque-Bera test....['dataset']{}['tabular_data', 'data_distribution', 'statistical_test', 'statsmodels']['classification', 'regression']
validmind.data_validation.MissingValuesMissing ValuesEvaluates dataset quality by ensuring missing value ratio across all features does not exceed a set threshold....['dataset']{'min_threshold': {'type': 'int', 'default': 1}}['tabular_data', 'data_quality']['classification', 'regression']
validmind.data_validation.MissingValuesBarPlotMissing Values Bar PlotAssesses the percentage and distribution of missing values in the dataset via a bar plot, with emphasis on...['dataset']{'threshold': {'type': 'int', 'default': 80}, 'fig_height': {'type': 'int', 'default': 600}}['tabular_data', 'data_quality', 'visualization']['classification', 'regression']
validmind.data_validation.MutualInformationMutual InformationCalculates mutual information scores between features and target variable to evaluate feature relevance....['dataset']{'min_threshold': {'type': 'float', 'default': 0.01}, 'task': {'type': 'str', 'default': 'classification'}}['feature_selection', 'data_analysis']['classification', 'regression']
validmind.data_validation.PearsonCorrelationMatrixPearson Correlation MatrixEvaluates linear dependency between numerical variables in a dataset via a Pearson Correlation coefficient heat map....['dataset']{}['tabular_data', 'numerical_data', 'correlation']['classification', 'regression']
validmind.data_validation.ProtectedClassesDescriptionProtected Classes DescriptionVisualizes the distribution of protected classes in the dataset relative to the target variable...['dataset']{'protected_classes': {'type': '_empty', 'default': None}}['bias_and_fairness', 'descriptive_statistics']['classification', 'regression']
validmind.data_validation.RunsTestRuns TestExecutes Runs Test on ML model to detect non-random patterns in output data sequence....['dataset']{}['tabular_data', 'statistical_test', 'statsmodels']['classification', 'regression']
validmind.data_validation.ScatterPlotScatter PlotAssesses visual relationships, patterns, and outliers among features in a dataset through scatter plot matrices....['dataset']{}['tabular_data', 'visualization']['classification', 'regression']
validmind.data_validation.ScoreBandDefaultRatesScore Band Default RatesAnalyzes default rates and population distribution across credit score bands....['dataset', 'model']{'score_column': {'type': 'str', 'default': 'score'}, 'score_bands': {'type': 'list', 'default': None}}['visualization', 'credit_risk', 'scorecard']['classification']
validmind.data_validation.ShapiroWilkShapiro WilkEvaluates feature-wise normality of training data using the Shapiro-Wilk test....['dataset']{}['tabular_data', 'data_distribution', 'statistical_test']['classification', 'regression']
validmind.data_validation.SkewnessSkewnessEvaluates the skewness of numerical data in a dataset to check against a defined threshold, aiming to ensure data...['dataset']{'max_threshold': {'type': '_empty', 'default': 1}}['data_quality', 'tabular_data']['classification', 'regression']
validmind.data_validation.TabularCategoricalBarPlotsTabular Categorical Bar PlotsGenerates and visualizes bar plots for each category in categorical features to evaluate the dataset's composition....['dataset']{}['tabular_data', 'visualization']['classification', 'regression']
validmind.data_validation.TabularDateTimeHistogramsTabular Date Time HistogramsGenerates histograms to provide graphical insight into the distribution of time intervals in a model's datetime...['dataset']{}['time_series_data', 'visualization']['classification', 'regression']
validmind.data_validation.TabularDescriptionTablesTabular Description TablesSummarizes key descriptive statistics for numerical, categorical, and datetime variables in a dataset....['dataset']{}['tabular_data']['classification', 'regression']
validmind.data_validation.TabularNumericalHistogramsTabular Numerical HistogramsGenerates histograms for each numerical feature in a dataset to provide visual insights into data distribution and...['dataset']{}['tabular_data', 'visualization']['classification', 'regression']
validmind.data_validation.TargetRateBarPlotsTarget Rate Bar PlotsGenerates bar plots visualizing the default rates of categorical features for a classification machine learning...['dataset']{}['tabular_data', 'visualization', 'categorical_data']['classification']
validmind.data_validation.TooManyZeroValuesToo Many Zero ValuesIdentifies numerical columns in a dataset that contain an excessive number of zero values, defined by a threshold...['dataset']{'max_percent_threshold': {'type': 'float', 'default': 0.03}}['tabular_data']['regression', 'classification']
validmind.data_validation.UniqueRowsUnique RowsVerifies the diversity of the dataset by ensuring that the count of unique rows exceeds a prescribed threshold....['dataset']{'min_percent_threshold': {'type': 'float', 'default': 1}}['tabular_data']['regression', 'classification']
validmind.data_validation.WOEBinPlotsWOE Bin PlotsGenerates visualizations of Weight of Evidence (WoE) and Information Value (IV) for understanding predictive power...['dataset']{'breaks_adj': {'type': 'list', 'default': None}, 'fig_height': {'type': 'int', 'default': 600}, 'fig_width': {'type': 'int', 'default': 500}}['tabular_data', 'visualization', 'categorical_data']['classification']
validmind.data_validation.WOEBinTableWOE Bin TableAssesses the Weight of Evidence (WoE) and Information Value (IV) of each feature to evaluate its predictive power...['dataset']{'breaks_adj': {'type': 'list', 'default': None}}['tabular_data', 'categorical_data']['classification']
validmind.model_validation.FeaturesAUCFeatures AUCEvaluates the discriminatory power of each individual feature within a binary classification model by calculating...['dataset']{'fontsize': {'type': 'int', 'default': 12}, 'figure_height': {'type': 'int', 'default': 500}}['feature_importance', 'AUC', 'visualization']['classification']
validmind.model_validation.sklearn.CalibrationCurveCalibration CurveEvaluates the calibration of probability estimates by comparing predicted probabilities against observed...['model', 'dataset']{'n_bins': {'type': 'int', 'default': 10}}['sklearn', 'model_performance', 'classification']['classification']
validmind.model_validation.sklearn.ClassifierPerformanceClassifier PerformanceEvaluates performance of binary or multiclass classification models using precision, recall, F1-Score, accuracy,...['dataset', 'model']{'average': {'type': 'str', 'default': 'macro'}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance']['classification', 'text_classification']
validmind.model_validation.sklearn.ClassifierThresholdOptimizationClassifier Threshold OptimizationAnalyzes and visualizes different threshold optimization methods for binary classification models....['dataset', 'model']{'methods': {'type': None, 'default': None}, 'target_recall': {'type': None, 'default': None}}['model_validation', 'threshold_optimization', 'classification_metrics']['classification']
validmind.model_validation.sklearn.ConfusionMatrixConfusion MatrixEvaluates and visually represents the classification ML model's predictive performance using a Confusion Matrix...['dataset', 'model']{'threshold': {'type': 'float', 'default': 0.5}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.model_validation.sklearn.HyperParametersTuningHyper Parameters TuningPerforms exhaustive grid search over specified parameter ranges to find optimal model configurations...['model', 'dataset']{'param_grid': {'type': 'dict', 'default': None}, 'scoring': {'type': None, 'default': None}, 'thresholds': {'type': None, 'default': None}, 'fit_params': {'type': 'dict', 'default': None}}['sklearn', 'model_performance']['clustering', 'classification']
validmind.model_validation.sklearn.MinimumAccuracyMinimum AccuracyChecks if the model's prediction accuracy meets or surpasses a specified threshold....['dataset', 'model']{'min_threshold': {'type': 'float', 'default': 0.7}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance']['classification', 'text_classification']
validmind.model_validation.sklearn.MinimumF1ScoreMinimum F1 ScoreAssesses if the model's F1 score on the validation set meets a predefined minimum threshold, ensuring balanced...['dataset', 'model']{'min_threshold': {'type': 'float', 'default': 0.5}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance']['classification', 'text_classification']
validmind.model_validation.sklearn.MinimumROCAUCScoreMinimum ROCAUC ScoreValidates model by checking if the ROC AUC score meets or surpasses a specified threshold....['dataset', 'model']{'min_threshold': {'type': 'float', 'default': 0.5}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance']['classification', 'text_classification']
validmind.model_validation.sklearn.ModelParametersModel ParametersExtracts and displays model parameters in a structured format for transparency and reproducibility....['model']{'model_params': {'type': '_empty', 'default': None}}['model_training', 'metadata']['classification', 'regression']
validmind.model_validation.sklearn.ModelsPerformanceComparisonModels Performance ComparisonEvaluates and compares the performance of multiple Machine Learning models using various metrics like accuracy,...['dataset', 'models']{}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance', 'model_comparison']['classification', 'text_classification']
validmind.model_validation.sklearn.OverfitDiagnosisOverfit DiagnosisAssesses potential overfitting in a model's predictions, identifying regions where performance between training and...['model', 'datasets']{'metric': {'type': 'str', 'default': None}, 'cut_off_threshold': {'type': 'float', 'default': 0.04}}['sklearn', 'binary_classification', 'multiclass_classification', 'linear_regression', 'model_diagnosis']['classification', 'regression']
validmind.model_validation.sklearn.PermutationFeatureImportancePermutation Feature ImportanceAssesses the significance of each feature in a model by evaluating the impact on model performance when feature...['model', 'dataset']{'fontsize': {'type': None, 'default': None}, 'figure_height': {'type': None, 'default': None}}['sklearn', 'binary_classification', 'multiclass_classification', 'feature_importance', 'visualization']['classification', 'text_classification']
validmind.model_validation.sklearn.PopulationStabilityIndexPopulation Stability IndexAssesses the Population Stability Index (PSI) to quantify the stability of an ML model's predictions across...['datasets', 'model']{'num_bins': {'type': 'int', 'default': 10}, 'mode': {'type': 'str', 'default': 'fixed'}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance']['classification', 'text_classification']
validmind.model_validation.sklearn.PrecisionRecallCurvePrecision Recall CurveEvaluates the precision-recall trade-off for binary classification models and visualizes the Precision-Recall curve....['model', 'dataset']{}['sklearn', 'binary_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.model_validation.sklearn.ROCCurveROC CurveEvaluates binary classification model performance by generating and plotting the Receiver Operating Characteristic...['model', 'dataset']{}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.model_validation.sklearn.RegressionErrorsRegression ErrorsAssesses the performance and error distribution of a regression model using various error metrics....['model', 'dataset']{}['sklearn', 'model_performance']['regression', 'classification']
validmind.model_validation.sklearn.RobustnessDiagnosisRobustness DiagnosisAssesses the robustness of a machine learning model by evaluating performance decay under noisy conditions....['datasets', 'model']{'metric': {'type': 'str', 'default': None}, 'scaling_factor_std_dev_list': {'type': None, 'default': [0.1, 0.2, 0.3, 0.4, 0.5]}, 'performance_decay_threshold': {'type': 'float', 'default': 0.05}}['sklearn', 'model_diagnosis', 'visualization']['classification', 'regression']
validmind.model_validation.sklearn.SHAPGlobalImportanceSHAP Global ImportanceEvaluates and visualizes global feature importance using SHAP values for model explanation and risk identification....['model', 'dataset']{'kernel_explainer_samples': {'type': 'int', 'default': 10}, 'tree_or_linear_explainer_samples': {'type': 'int', 'default': 200}, 'class_of_interest': {'type': None, 'default': None}}['sklearn', 'binary_classification', 'multiclass_classification', 'feature_importance', 'visualization']['classification', 'text_classification']
validmind.model_validation.sklearn.ScoreProbabilityAlignmentScore Probability AlignmentAnalyzes the alignment between credit scores and predicted probabilities....['model', 'dataset']{'score_column': {'type': 'str', 'default': 'score'}, 'n_bins': {'type': 'int', 'default': 10}}['visualization', 'credit_risk', 'calibration']['classification']
validmind.model_validation.sklearn.TrainingTestDegradationTraining Test DegradationTests if model performance degradation between training and test datasets exceeds a predefined threshold....['datasets', 'model']{'max_threshold': {'type': 'float', 'default': 0.1}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.model_validation.sklearn.WeakspotsDiagnosisWeakspots DiagnosisIdentifies and visualizes weak spots in a machine learning model's performance across various sections of the...['datasets', 'model']{'features_columns': {'type': None, 'default': None}, 'metrics': {'type': None, 'default': None}, 'thresholds': {'type': None, 'default': None}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_diagnosis', 'visualization']['classification', 'text_classification']
validmind.model_validation.statsmodels.CumulativePredictionProbabilitiesCumulative Prediction ProbabilitiesVisualizes cumulative probabilities of positive and negative classes for both training and testing in classification models....['dataset', 'model']{'title': {'type': '_empty', 'default': 'Cumulative Probabilities'}}['visualization', 'credit_risk']['classification']
validmind.model_validation.statsmodels.GINITableGINI TableEvaluates classification model performance using AUC, GINI, and KS metrics for training and test datasets....['dataset', 'model']{}['model_performance']['classification']
validmind.model_validation.statsmodels.KolmogorovSmirnovKolmogorov SmirnovAssesses whether each feature in the dataset aligns with a normal distribution using the Kolmogorov-Smirnov test....['model', 'dataset']{'dist': {'type': 'str', 'default': 'norm'}}['tabular_data', 'data_distribution', 'statistical_test', 'statsmodels']['classification', 'regression']
validmind.model_validation.statsmodels.LillieforsLillieforsAssesses the normality of feature distributions in an ML model's training dataset using the Lilliefors test....['dataset']{}['tabular_data', 'data_distribution', 'statistical_test', 'statsmodels']['classification', 'regression']
validmind.model_validation.statsmodels.PredictionProbabilitiesHistogramPrediction Probabilities HistogramAssesses the predictive probability distribution for binary classification to evaluate model performance and...['dataset', 'model']{'title': {'type': '_empty', 'default': 'Histogram of Predictive Probabilities'}}['visualization', 'credit_risk']['classification']
validmind.model_validation.statsmodels.ScorecardHistogramScorecard HistogramThe Scorecard Histogram test evaluates the distribution of credit scores between default and non-default instances,...['dataset']{'title': {'type': '_empty', 'default': 'Histogram of Scores'}, 'score_column': {'type': '_empty', 'default': 'score'}}['visualization', 'credit_risk', 'logistic_regression']['classification']
validmind.ongoing_monitoring.CalibrationCurveDriftCalibration Curve DriftEvaluates changes in probability calibration between reference and monitoring datasets....['datasets', 'model']{'n_bins': {'type': 'int', 'default': 10}, 'drift_pct_threshold': {'type': 'float', 'default': 20}}['sklearn', 'binary_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.ongoing_monitoring.ClassDiscriminationDriftClass Discrimination DriftCompares classification discrimination metrics between reference and monitoring datasets....['datasets', 'model']{'drift_pct_threshold': {'type': '_empty', 'default': 20}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance']['classification', 'text_classification']
validmind.ongoing_monitoring.ClassImbalanceDriftClass Imbalance DriftEvaluates drift in class distribution between reference and monitoring datasets....['datasets']{'drift_pct_threshold': {'type': 'float', 'default': 5.0}, 'title': {'type': 'str', 'default': 'Class Distribution Drift'}}['tabular_data', 'binary_classification', 'multiclass_classification']['classification']
validmind.ongoing_monitoring.ClassificationAccuracyDriftClassification Accuracy DriftCompares classification accuracy metrics between reference and monitoring datasets....['datasets', 'model']{'drift_pct_threshold': {'type': '_empty', 'default': 20}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance']['classification', 'text_classification']
validmind.ongoing_monitoring.ConfusionMatrixDriftConfusion Matrix DriftCompares confusion matrix metrics between reference and monitoring datasets....['datasets', 'model']{'drift_pct_threshold': {'type': '_empty', 'default': 20}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance']['classification', 'text_classification']
validmind.ongoing_monitoring.CumulativePredictionProbabilitiesDriftCumulative Prediction Probabilities DriftCompares cumulative prediction probability distributions between reference and monitoring datasets....['datasets', 'model']{}['visualization', 'credit_risk']['classification']
validmind.ongoing_monitoring.PredictionProbabilitiesHistogramDriftPrediction Probabilities Histogram DriftCompares prediction probability distributions between reference and monitoring datasets....['datasets', 'model']{'title': {'type': '_empty', 'default': 'Prediction Probabilities Histogram Drift'}, 'drift_pct_threshold': {'type': 'float', 'default': 20.0}}['visualization', 'credit_risk']['classification']
validmind.ongoing_monitoring.ROCCurveDriftROC Curve DriftCompares ROC curves between reference and monitoring datasets....['datasets', 'model']{}['sklearn', 'binary_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.ongoing_monitoring.ScoreBandsDriftScore Bands DriftAnalyzes drift in population distribution and default rates across score bands....['datasets', 'model']{'score_column': {'type': 'str', 'default': 'score'}, 'score_bands': {'type': 'list', 'default': None}, 'drift_threshold': {'type': 'float', 'default': 20.0}}['visualization', 'credit_risk', 'scorecard']['classification']
validmind.ongoing_monitoring.ScorecardHistogramDriftScorecard Histogram DriftCompares score distributions between reference and monitoring datasets for each class....['datasets']{'score_column': {'type': 'str', 'default': 'score'}, 'title': {'type': 'str', 'default': 'Scorecard Histogram Drift'}, 'drift_pct_threshold': {'type': 'float', 'default': 20.0}}['visualization', 'credit_risk', 'logistic_regression']['classification']
validmind.unit_metrics.classification.AccuracyAccuracyCalculates the accuracy of a model['dataset', 'model']{}['classification']['classification']
validmind.unit_metrics.classification.F1F1Calculates the F1 score for a classification model.['model', 'dataset']{}['classification']['classification']
validmind.unit_metrics.classification.PrecisionPrecisionCalculates the precision for a classification model.['model', 'dataset']{}['classification']['classification']
validmind.unit_metrics.classification.ROC_AUCROC AUCCalculates the ROC AUC for a classification model.['model', 'dataset']{}['classification']['classification']
validmind.unit_metrics.classification.RecallRecallCalculates the recall for a classification model.['model', 'dataset']{}['classification']['classification']validmind.data_validation.BivariateScatterPlotsBivariate Scatter PlotsGenerates bivariate scatterplots to visually inspect relationships between pairs of numerical predictor variables...TrueFalse['dataset']{}['tabular_data', 'numerical_data', 'visualization']['classification']
validmind.data_validation.ChiSquaredFeaturesTableChi Squared Features TableAssesses the statistical association between categorical features and a target variable using the Chi-Squared test....FalseTrue['dataset']{'p_threshold': {'type': '_empty', 'default': 0.05}}['tabular_data', 'categorical_data', 'statistical_test']['classification']
validmind.data_validation.ClassImbalanceClass ImbalanceEvaluates and quantifies class distribution imbalance in a dataset used by a machine learning model....TrueTrue['dataset']{'min_percent_threshold': {'type': 'int', 'default': 10}}['tabular_data', 'binary_classification', 'multiclass_classification', 'data_quality']['classification']
validmind.data_validation.DatasetDescriptionDataset DescriptionProvides comprehensive analysis and statistical summaries of each column in a machine learning model's dataset....FalseTrue['dataset']{}['tabular_data', 'time_series_data', 'text_data']['classification', 'regression', 'text_classification', 'text_summarization']
validmind.data_validation.DatasetSplitDataset SplitEvaluates and visualizes the distribution proportions among training, testing, and validation datasets of an ML...FalseTrue['datasets']{}['tabular_data', 'time_series_data', 'text_data']['classification', 'regression', 'text_classification', 'text_summarization']
validmind.data_validation.DescriptiveStatisticsDescriptive StatisticsPerforms a detailed descriptive statistical analysis of both numerical and categorical data within a model's...FalseTrue['dataset']{}['tabular_data', 'time_series_data', 'data_quality']['classification', 'regression']
validmind.data_validation.DuplicatesDuplicatesTests dataset for duplicate entries, ensuring model reliability via data quality verification....FalseTrue['dataset']{'min_threshold': {'type': '_empty', 'default': 1}}['tabular_data', 'data_quality', 'text_data']['classification', 'regression']
validmind.data_validation.FeatureTargetCorrelationPlotFeature Target Correlation PlotVisualizes the correlation between input features and the model's target output in a color-coded horizontal bar...TrueFalse['dataset']{'fig_height': {'type': '_empty', 'default': 600}}['tabular_data', 'visualization', 'correlation']['classification', 'regression']
validmind.data_validation.HighCardinalityHigh CardinalityAssesses the number of unique values in categorical columns to detect high cardinality and potential overfitting....FalseTrue['dataset']{'num_threshold': {'type': 'int', 'default': 100}, 'percent_threshold': {'type': 'float', 'default': 0.1}, 'threshold_type': {'type': 'str', 'default': 'percent'}}['tabular_data', 'data_quality', 'categorical_data']['classification', 'regression']
validmind.data_validation.HighPearsonCorrelationHigh Pearson CorrelationIdentifies highly correlated feature pairs in a dataset suggesting feature redundancy or multicollinearity....FalseTrue['dataset']{'max_threshold': {'type': 'float', 'default': 0.3}, 'top_n_correlations': {'type': 'int', 'default': 10}, 'feature_columns': {'type': 'list', 'default': None}}['tabular_data', 'data_quality', 'correlation']['classification', 'regression']
validmind.data_validation.IQROutliersBarPlotIQR Outliers Bar PlotVisualizes outlier distribution across percentiles in numerical data using the Interquartile Range (IQR) method....TrueFalse['dataset']{'threshold': {'type': 'float', 'default': 1.5}, 'fig_width': {'type': 'int', 'default': 800}}['tabular_data', 'visualization', 'numerical_data']['classification', 'regression']
validmind.data_validation.IQROutliersTableIQR Outliers TableDetermines and summarizes outliers in numerical features using the Interquartile Range method....FalseTrue['dataset']{'threshold': {'type': 'float', 'default': 1.5}}['tabular_data', 'numerical_data']['classification', 'regression']
validmind.data_validation.IsolationForestOutliersIsolation Forest OutliersDetects outliers in a dataset using the Isolation Forest algorithm and visualizes results through scatter plots....TrueFalse['dataset']{'random_state': {'type': 'int', 'default': 0}, 'contamination': {'type': 'float', 'default': 0.1}, 'feature_columns': {'type': 'list', 'default': None}}['tabular_data', 'anomaly_detection']['classification']
validmind.data_validation.JarqueBeraJarque BeraAssesses normality of dataset features in an ML model using the Jarque-Bera test....FalseTrue['dataset']{}['tabular_data', 'data_distribution', 'statistical_test', 'statsmodels']['classification', 'regression']
validmind.data_validation.MissingValuesMissing ValuesEvaluates dataset quality by ensuring missing value ratio across all features does not exceed a set threshold....FalseTrue['dataset']{'min_threshold': {'type': 'int', 'default': 1}}['tabular_data', 'data_quality']['classification', 'regression']
validmind.data_validation.MissingValuesBarPlotMissing Values Bar PlotAssesses the percentage and distribution of missing values in the dataset via a bar plot, with emphasis on...TrueFalse['dataset']{'threshold': {'type': 'int', 'default': 80}, 'fig_height': {'type': 'int', 'default': 600}}['tabular_data', 'data_quality', 'visualization']['classification', 'regression']
validmind.data_validation.MutualInformationMutual InformationCalculates mutual information scores between features and target variable to evaluate feature relevance....TrueFalse['dataset']{'min_threshold': {'type': 'float', 'default': 0.01}, 'task': {'type': 'str', 'default': 'classification'}}['feature_selection', 'data_analysis']['classification', 'regression']
validmind.data_validation.PearsonCorrelationMatrixPearson Correlation MatrixEvaluates linear dependency between numerical variables in a dataset via a Pearson Correlation coefficient heat map....TrueFalse['dataset']{}['tabular_data', 'numerical_data', 'correlation']['classification', 'regression']
validmind.data_validation.ProtectedClassesDescriptionProtected Classes DescriptionVisualizes the distribution of protected classes in the dataset relative to the target variable...TrueTrue['dataset']{'protected_classes': {'type': '_empty', 'default': None}}['bias_and_fairness', 'descriptive_statistics']['classification', 'regression']
validmind.data_validation.RunsTestRuns TestExecutes Runs Test on ML model to detect non-random patterns in output data sequence....FalseTrue['dataset']{}['tabular_data', 'statistical_test', 'statsmodels']['classification', 'regression']
validmind.data_validation.ScatterPlotScatter PlotAssesses visual relationships, patterns, and outliers among features in a dataset through scatter plot matrices....TrueFalse['dataset']{}['tabular_data', 'visualization']['classification', 'regression']
validmind.data_validation.ScoreBandDefaultRatesScore Band Default RatesAnalyzes default rates and population distribution across credit score bands....FalseTrue['dataset', 'model']{'score_column': {'type': 'str', 'default': 'score'}, 'score_bands': {'type': 'list', 'default': None}}['visualization', 'credit_risk', 'scorecard']['classification']
validmind.data_validation.ShapiroWilkShapiro WilkEvaluates feature-wise normality of training data using the Shapiro-Wilk test....FalseTrue['dataset']{}['tabular_data', 'data_distribution', 'statistical_test']['classification', 'regression']
validmind.data_validation.SkewnessSkewnessEvaluates the skewness of numerical data in a dataset to check against a defined threshold, aiming to ensure data...FalseTrue['dataset']{'max_threshold': {'type': '_empty', 'default': 1}}['data_quality', 'tabular_data']['classification', 'regression']
validmind.data_validation.TabularCategoricalBarPlotsTabular Categorical Bar PlotsGenerates and visualizes bar plots for each category in categorical features to evaluate the dataset's composition....TrueFalse['dataset']{}['tabular_data', 'visualization']['classification', 'regression']
validmind.data_validation.TabularDateTimeHistogramsTabular Date Time HistogramsGenerates histograms to provide graphical insight into the distribution of time intervals in a model's datetime...TrueFalse['dataset']{}['time_series_data', 'visualization']['classification', 'regression']
validmind.data_validation.TabularDescriptionTablesTabular Description TablesSummarizes key descriptive statistics for numerical, categorical, and datetime variables in a dataset....FalseTrue['dataset']{}['tabular_data']['classification', 'regression']
validmind.data_validation.TabularNumericalHistogramsTabular Numerical HistogramsGenerates histograms for each numerical feature in a dataset to provide visual insights into data distribution and...TrueFalse['dataset']{}['tabular_data', 'visualization']['classification', 'regression']
validmind.data_validation.TargetRateBarPlotsTarget Rate Bar PlotsGenerates bar plots visualizing the default rates of categorical features for a classification machine learning...TrueFalse['dataset']{}['tabular_data', 'visualization', 'categorical_data']['classification']
validmind.data_validation.TooManyZeroValuesToo Many Zero ValuesIdentifies numerical columns in a dataset that contain an excessive number of zero values, defined by a threshold...FalseTrue['dataset']{'max_percent_threshold': {'type': 'float', 'default': 0.03}}['tabular_data']['regression', 'classification']
validmind.data_validation.UniqueRowsUnique RowsVerifies the diversity of the dataset by ensuring that the count of unique rows exceeds a prescribed threshold....FalseTrue['dataset']{'min_percent_threshold': {'type': 'float', 'default': 1}}['tabular_data']['regression', 'classification']
validmind.data_validation.WOEBinPlotsWOE Bin PlotsGenerates visualizations of Weight of Evidence (WoE) and Information Value (IV) for understanding predictive power...TrueFalse['dataset']{'breaks_adj': {'type': 'list', 'default': None}, 'fig_height': {'type': 'int', 'default': 600}, 'fig_width': {'type': 'int', 'default': 500}}['tabular_data', 'visualization', 'categorical_data']['classification']
validmind.data_validation.WOEBinTableWOE Bin TableAssesses the Weight of Evidence (WoE) and Information Value (IV) of each feature to evaluate its predictive power...FalseTrue['dataset']{'breaks_adj': {'type': 'list', 'default': None}}['tabular_data', 'categorical_data']['classification']
validmind.model_validation.FeaturesAUCFeatures AUCEvaluates the discriminatory power of each individual feature within a binary classification model by calculating...TrueFalse['dataset']{'fontsize': {'type': 'int', 'default': 12}, 'figure_height': {'type': 'int', 'default': 500}}['feature_importance', 'AUC', 'visualization']['classification']
validmind.model_validation.sklearn.CalibrationCurveCalibration CurveEvaluates the calibration of probability estimates by comparing predicted probabilities against observed...TrueFalse['model', 'dataset']{'n_bins': {'type': 'int', 'default': 10}}['sklearn', 'model_performance', 'classification']['classification']
validmind.model_validation.sklearn.ClassifierPerformanceClassifier PerformanceEvaluates performance of binary or multiclass classification models using precision, recall, F1-Score, accuracy,...FalseTrue['dataset', 'model']{'average': {'type': 'str', 'default': 'macro'}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance']['classification', 'text_classification']
validmind.model_validation.sklearn.ClassifierThresholdOptimizationClassifier Threshold OptimizationAnalyzes and visualizes different threshold optimization methods for binary classification models....FalseTrue['dataset', 'model']{'methods': {'type': None, 'default': None}, 'target_recall': {'type': None, 'default': None}}['model_validation', 'threshold_optimization', 'classification_metrics']['classification']
validmind.model_validation.sklearn.ConfusionMatrixConfusion MatrixEvaluates and visually represents the classification ML model's predictive performance using a Confusion Matrix...TrueFalse['dataset', 'model']{'threshold': {'type': 'float', 'default': 0.5}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.model_validation.sklearn.HyperParametersTuningHyper Parameters TuningPerforms exhaustive grid search over specified parameter ranges to find optimal model configurations...FalseTrue['model', 'dataset']{'param_grid': {'type': 'dict', 'default': None}, 'scoring': {'type': None, 'default': None}, 'thresholds': {'type': None, 'default': None}, 'fit_params': {'type': 'dict', 'default': None}}['sklearn', 'model_performance']['clustering', 'classification']
validmind.model_validation.sklearn.MinimumAccuracyMinimum AccuracyChecks if the model's prediction accuracy meets or surpasses a specified threshold....FalseTrue['dataset', 'model']{'min_threshold': {'type': 'float', 'default': 0.7}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance']['classification', 'text_classification']
validmind.model_validation.sklearn.MinimumF1ScoreMinimum F1 ScoreAssesses if the model's F1 score on the validation set meets a predefined minimum threshold, ensuring balanced...FalseTrue['dataset', 'model']{'min_threshold': {'type': 'float', 'default': 0.5}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance']['classification', 'text_classification']
validmind.model_validation.sklearn.MinimumROCAUCScoreMinimum ROCAUC ScoreValidates model by checking if the ROC AUC score meets or surpasses a specified threshold....FalseTrue['dataset', 'model']{'min_threshold': {'type': 'float', 'default': 0.5}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance']['classification', 'text_classification']
validmind.model_validation.sklearn.ModelParametersModel ParametersExtracts and displays model parameters in a structured format for transparency and reproducibility....FalseTrue['model']{'model_params': {'type': None, 'default': None}}['model_training', 'metadata']['classification', 'regression']
validmind.model_validation.sklearn.ModelsPerformanceComparisonModels Performance ComparisonEvaluates and compares the performance of multiple Machine Learning models using various metrics like accuracy,...FalseTrue['dataset', 'models']{}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance', 'model_comparison']['classification', 'text_classification']
validmind.model_validation.sklearn.OverfitDiagnosisOverfit DiagnosisAssesses potential overfitting in a model's predictions, identifying regions where performance between training and...TrueTrue['model', 'datasets']{'metric': {'type': 'str', 'default': None}, 'cut_off_threshold': {'type': 'float', 'default': 0.04}}['sklearn', 'binary_classification', 'multiclass_classification', 'linear_regression', 'model_diagnosis']['classification', 'regression']
validmind.model_validation.sklearn.PermutationFeatureImportancePermutation Feature ImportanceAssesses the significance of each feature in a model by evaluating the impact on model performance when feature...TrueFalse['model', 'dataset']{'fontsize': {'type': None, 'default': None}, 'figure_height': {'type': None, 'default': None}}['sklearn', 'binary_classification', 'multiclass_classification', 'feature_importance', 'visualization']['classification', 'text_classification']
validmind.model_validation.sklearn.PopulationStabilityIndexPopulation Stability IndexAssesses the Population Stability Index (PSI) to quantify the stability of an ML model's predictions across...TrueTrue['datasets', 'model']{'num_bins': {'type': 'int', 'default': 10}, 'mode': {'type': 'str', 'default': 'fixed'}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance']['classification', 'text_classification']
validmind.model_validation.sklearn.PrecisionRecallCurvePrecision Recall CurveEvaluates the precision-recall trade-off for binary classification models and visualizes the Precision-Recall curve....TrueFalse['model', 'dataset']{}['sklearn', 'binary_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.model_validation.sklearn.ROCCurveROC CurveEvaluates binary classification model performance by generating and plotting the Receiver Operating Characteristic...TrueFalse['model', 'dataset']{}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.model_validation.sklearn.RegressionErrorsRegression ErrorsAssesses the performance and error distribution of a regression model using various error metrics....FalseTrue['model', 'dataset']{}['sklearn', 'model_performance']['regression', 'classification']
validmind.model_validation.sklearn.RobustnessDiagnosisRobustness DiagnosisAssesses the robustness of a machine learning model by evaluating performance decay under noisy conditions....TrueTrue['datasets', 'model']{'metric': {'type': 'str', 'default': None}, 'scaling_factor_std_dev_list': {'type': None, 'default': [0.1, 0.2, 0.3, 0.4, 0.5]}, 'performance_decay_threshold': {'type': 'float', 'default': 0.05}}['sklearn', 'model_diagnosis', 'visualization']['classification', 'regression']
validmind.model_validation.sklearn.SHAPGlobalImportanceSHAP Global ImportanceEvaluates and visualizes global feature importance using SHAP values for model explanation and risk identification....FalseTrue['model', 'dataset']{'kernel_explainer_samples': {'type': 'int', 'default': 10}, 'tree_or_linear_explainer_samples': {'type': 'int', 'default': 200}, 'class_of_interest': {'type': None, 'default': None}}['sklearn', 'binary_classification', 'multiclass_classification', 'feature_importance', 'visualization']['classification', 'text_classification']
validmind.model_validation.sklearn.ScoreProbabilityAlignmentScore Probability AlignmentAnalyzes the alignment between credit scores and predicted probabilities....TrueTrue['model', 'dataset']{'score_column': {'type': 'str', 'default': 'score'}, 'n_bins': {'type': 'int', 'default': 10}}['visualization', 'credit_risk', 'calibration']['classification']
validmind.model_validation.sklearn.TrainingTestDegradationTraining Test DegradationTests if model performance degradation between training and test datasets exceeds a predefined threshold....FalseTrue['datasets', 'model']{'max_threshold': {'type': 'float', 'default': 0.1}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.model_validation.sklearn.WeakspotsDiagnosisWeakspots DiagnosisIdentifies and visualizes weak spots in a machine learning model's performance across various sections of the...TrueTrue['datasets', 'model']{'features_columns': {'type': None, 'default': None}, 'metrics': {'type': None, 'default': None}, 'thresholds': {'type': None, 'default': None}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_diagnosis', 'visualization']['classification', 'text_classification']
validmind.model_validation.statsmodels.CumulativePredictionProbabilitiesCumulative Prediction ProbabilitiesVisualizes cumulative probabilities of positive and negative classes for both training and testing in classification models....TrueFalse['dataset', 'model']{'title': {'type': 'str', 'default': 'Cumulative Probabilities'}}['visualization', 'credit_risk']['classification']
validmind.model_validation.statsmodels.GINITableGINI TableEvaluates classification model performance using AUC, GINI, and KS metrics for training and test datasets....FalseTrue['dataset', 'model']{}['model_performance']['classification']
validmind.model_validation.statsmodels.KolmogorovSmirnovKolmogorov SmirnovAssesses whether each feature in the dataset aligns with a normal distribution using the Kolmogorov-Smirnov test....FalseTrue['model', 'dataset']{'dist': {'type': 'str', 'default': 'norm'}}['tabular_data', 'data_distribution', 'statistical_test', 'statsmodels']['classification', 'regression']
validmind.model_validation.statsmodels.LillieforsLillieforsAssesses the normality of feature distributions in an ML model's training dataset using the Lilliefors test....FalseTrue['dataset']{}['tabular_data', 'data_distribution', 'statistical_test', 'statsmodels']['classification', 'regression']
validmind.model_validation.statsmodels.PredictionProbabilitiesHistogramPrediction Probabilities HistogramAssesses the predictive probability distribution for binary classification to evaluate model performance and...TrueFalse['dataset', 'model']{'title': {'type': 'str', 'default': 'Histogram of Predictive Probabilities'}}['visualization', 'credit_risk']['classification']
validmind.model_validation.statsmodels.ScorecardHistogramScorecard HistogramThe Scorecard Histogram test evaluates the distribution of credit scores between default and non-default instances,...TrueFalse['dataset']{'title': {'type': 'str', 'default': 'Histogram of Scores'}, 'score_column': {'type': 'str', 'default': 'score'}}['visualization', 'credit_risk', 'logistic_regression']['classification']
validmind.ongoing_monitoring.CalibrationCurveDriftCalibration Curve DriftEvaluates changes in probability calibration between reference and monitoring datasets....TrueTrue['datasets', 'model']{'n_bins': {'type': 'int', 'default': 10}, 'drift_pct_threshold': {'type': 'float', 'default': 20}}['sklearn', 'binary_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.ongoing_monitoring.ClassDiscriminationDriftClass Discrimination DriftCompares classification discrimination metrics between reference and monitoring datasets....FalseTrue['datasets', 'model']{'drift_pct_threshold': {'type': '_empty', 'default': 20}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance']['classification', 'text_classification']
validmind.ongoing_monitoring.ClassImbalanceDriftClass Imbalance DriftEvaluates drift in class distribution between reference and monitoring datasets....TrueTrue['datasets']{'drift_pct_threshold': {'type': 'float', 'default': 5.0}, 'title': {'type': 'str', 'default': 'Class Distribution Drift'}}['tabular_data', 'binary_classification', 'multiclass_classification']['classification']
validmind.ongoing_monitoring.ClassificationAccuracyDriftClassification Accuracy DriftCompares classification accuracy metrics between reference and monitoring datasets....FalseTrue['datasets', 'model']{'drift_pct_threshold': {'type': '_empty', 'default': 20}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance']['classification', 'text_classification']
validmind.ongoing_monitoring.ConfusionMatrixDriftConfusion Matrix DriftCompares confusion matrix metrics between reference and monitoring datasets....FalseTrue['datasets', 'model']{'drift_pct_threshold': {'type': '_empty', 'default': 20}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance']['classification', 'text_classification']
validmind.ongoing_monitoring.CumulativePredictionProbabilitiesDriftCumulative Prediction Probabilities DriftCompares cumulative prediction probability distributions between reference and monitoring datasets....TrueFalse['datasets', 'model']{}['visualization', 'credit_risk']['classification']
validmind.ongoing_monitoring.PredictionProbabilitiesHistogramDriftPrediction Probabilities Histogram DriftCompares prediction probability distributions between reference and monitoring datasets....TrueTrue['datasets', 'model']{'title': {'type': '_empty', 'default': 'Prediction Probabilities Histogram Drift'}, 'drift_pct_threshold': {'type': 'float', 'default': 20.0}}['visualization', 'credit_risk']['classification']
validmind.ongoing_monitoring.ROCCurveDriftROC Curve DriftCompares ROC curves between reference and monitoring datasets....TrueFalse['datasets', 'model']{}['sklearn', 'binary_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.ongoing_monitoring.ScoreBandsDriftScore Bands DriftAnalyzes drift in population distribution and default rates across score bands....FalseTrue['datasets', 'model']{'score_column': {'type': 'str', 'default': 'score'}, 'score_bands': {'type': 'list', 'default': None}, 'drift_threshold': {'type': 'float', 'default': 20.0}}['visualization', 'credit_risk', 'scorecard']['classification']
validmind.ongoing_monitoring.ScorecardHistogramDriftScorecard Histogram DriftCompares score distributions between reference and monitoring datasets for each class....TrueTrue['datasets']{'score_column': {'type': 'str', 'default': 'score'}, 'title': {'type': 'str', 'default': 'Scorecard Histogram Drift'}, 'drift_pct_threshold': {'type': 'float', 'default': 20.0}}['visualization', 'credit_risk', 'logistic_regression']['classification']
validmind.unit_metrics.classification.AccuracyAccuracyCalculates the accuracy of a modelFalseFalse['dataset', 'model']{}['classification']['classification']
validmind.unit_metrics.classification.F1F1Calculates the F1 score for a classification model.FalseFalse['model', 'dataset']{}['classification']['classification']
validmind.unit_metrics.classification.PrecisionPrecisionCalculates the precision for a classification model.FalseFalse['model', 'dataset']{}['classification']['classification']
validmind.unit_metrics.classification.ROC_AUCROC AUCCalculates the ROC AUC for a classification model.FalseFalse['model', 'dataset']{}['classification']['classification']
validmind.unit_metrics.classification.RecallRecallCalculates the recall for a classification model.FalseFalse['model', 'dataset']{}['classification']['classification']
\n" ], "text/plain": [ - "" + "" ] }, "execution_count": 7, @@ -3321,94 +3955,110 @@ "data": { "text/html": [ "\n", - "\n", + "
\n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
IDNameDescriptionRequired InputsParamsTagsTasksIDNameDescriptionHas FigureHas TableRequired InputsParamsTagsTasks
validmind.model_validation.RegressionResidualsPlotRegression Residuals PlotEvaluates regression model performance using residual distribution and actual vs. predicted plots....['model', 'dataset']{'bin_size': {'type': 'float', 'default': 0.1}}['model_performance', 'visualization']['regression']
validmind.model_validation.sklearn.ConfusionMatrixConfusion MatrixEvaluates and visually represents the classification ML model's predictive performance using a Confusion Matrix...['dataset', 'model']{'threshold': {'type': 'float', 'default': 0.5}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.model_validation.sklearn.PrecisionRecallCurvePrecision Recall CurveEvaluates the precision-recall trade-off for binary classification models and visualizes the Precision-Recall curve....['model', 'dataset']{}['sklearn', 'binary_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.model_validation.sklearn.ROCCurveROC CurveEvaluates binary classification model performance by generating and plotting the Receiver Operating Characteristic...['model', 'dataset']{}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.model_validation.sklearn.TrainingTestDegradationTraining Test DegradationTests if model performance degradation between training and test datasets exceeds a predefined threshold....['datasets', 'model']{'max_threshold': {'type': 'float', 'default': 0.1}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.ongoing_monitoring.CalibrationCurveDriftCalibration Curve DriftEvaluates changes in probability calibration between reference and monitoring datasets....['datasets', 'model']{'n_bins': {'type': 'int', 'default': 10}, 'drift_pct_threshold': {'type': 'float', 'default': 20}}['sklearn', 'binary_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.ongoing_monitoring.ROCCurveDriftROC Curve DriftCompares ROC curves between reference and monitoring datasets....['datasets', 'model']{}['sklearn', 'binary_classification', 'model_performance', 'visualization']['classification', 'text_classification']validmind.model_validation.RegressionResidualsPlotRegression Residuals PlotEvaluates regression model performance using residual distribution and actual vs. predicted plots....TrueFalse['model', 'dataset']{'bin_size': {'type': 'float', 'default': 0.1}}['model_performance', 'visualization']['regression']
validmind.model_validation.sklearn.ConfusionMatrixConfusion MatrixEvaluates and visually represents the classification ML model's predictive performance using a Confusion Matrix...TrueFalse['dataset', 'model']{'threshold': {'type': 'float', 'default': 0.5}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.model_validation.sklearn.PrecisionRecallCurvePrecision Recall CurveEvaluates the precision-recall trade-off for binary classification models and visualizes the Precision-Recall curve....TrueFalse['model', 'dataset']{}['sklearn', 'binary_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.model_validation.sklearn.ROCCurveROC CurveEvaluates binary classification model performance by generating and plotting the Receiver Operating Characteristic...TrueFalse['model', 'dataset']{}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.model_validation.sklearn.TrainingTestDegradationTraining Test DegradationTests if model performance degradation between training and test datasets exceeds a predefined threshold....FalseTrue['datasets', 'model']{'max_threshold': {'type': 'float', 'default': 0.1}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.ongoing_monitoring.CalibrationCurveDriftCalibration Curve DriftEvaluates changes in probability calibration between reference and monitoring datasets....TrueTrue['datasets', 'model']{'n_bins': {'type': 'int', 'default': 10}, 'drift_pct_threshold': {'type': 'float', 'default': 20}}['sklearn', 'binary_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.ongoing_monitoring.ROCCurveDriftROC Curve DriftCompares ROC curves between reference and monitoring datasets....TrueFalse['datasets', 'model']{}['sklearn', 'binary_classification', 'model_performance', 'visualization']['classification', 'text_classification']
\n" ], "text/plain": [ - "" + "" ] }, "execution_count": 8, @@ -3436,85 +4086,99 @@ "data": { "text/html": [ "\n", - "\n", + "
\n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
IDNameDescriptionRequired InputsParamsTagsTasksIDNameDescriptionHas FigureHas TableRequired InputsParamsTagsTasks
validmind.model_validation.sklearn.ConfusionMatrixConfusion MatrixEvaluates and visually represents the classification ML model's predictive performance using a Confusion Matrix...['dataset', 'model']{'threshold': {'type': 'float', 'default': 0.5}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.model_validation.sklearn.PrecisionRecallCurvePrecision Recall CurveEvaluates the precision-recall trade-off for binary classification models and visualizes the Precision-Recall curve....['model', 'dataset']{}['sklearn', 'binary_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.model_validation.sklearn.ROCCurveROC CurveEvaluates binary classification model performance by generating and plotting the Receiver Operating Characteristic...['model', 'dataset']{}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.model_validation.sklearn.TrainingTestDegradationTraining Test DegradationTests if model performance degradation between training and test datasets exceeds a predefined threshold....['datasets', 'model']{'max_threshold': {'type': 'float', 'default': 0.1}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.ongoing_monitoring.CalibrationCurveDriftCalibration Curve DriftEvaluates changes in probability calibration between reference and monitoring datasets....['datasets', 'model']{'n_bins': {'type': 'int', 'default': 10}, 'drift_pct_threshold': {'type': 'float', 'default': 20}}['sklearn', 'binary_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.ongoing_monitoring.ROCCurveDriftROC Curve DriftCompares ROC curves between reference and monitoring datasets....['datasets', 'model']{}['sklearn', 'binary_classification', 'model_performance', 'visualization']['classification', 'text_classification']validmind.model_validation.sklearn.ConfusionMatrixConfusion MatrixEvaluates and visually represents the classification ML model's predictive performance using a Confusion Matrix...TrueFalse['dataset', 'model']{'threshold': {'type': 'float', 'default': 0.5}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.model_validation.sklearn.PrecisionRecallCurvePrecision Recall CurveEvaluates the precision-recall trade-off for binary classification models and visualizes the Precision-Recall curve....TrueFalse['model', 'dataset']{}['sklearn', 'binary_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.model_validation.sklearn.ROCCurveROC CurveEvaluates binary classification model performance by generating and plotting the Receiver Operating Characteristic...TrueFalse['model', 'dataset']{}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.model_validation.sklearn.TrainingTestDegradationTraining Test DegradationTests if model performance degradation between training and test datasets exceeds a predefined threshold....FalseTrue['datasets', 'model']{'max_threshold': {'type': 'float', 'default': 0.1}}['sklearn', 'binary_classification', 'multiclass_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.ongoing_monitoring.CalibrationCurveDriftCalibration Curve DriftEvaluates changes in probability calibration between reference and monitoring datasets....TrueTrue['datasets', 'model']{'n_bins': {'type': 'int', 'default': 10}, 'drift_pct_threshold': {'type': 'float', 'default': 20}}['sklearn', 'binary_classification', 'model_performance', 'visualization']['classification', 'text_classification']
validmind.ongoing_monitoring.ROCCurveDriftROC Curve DriftCompares ROC curves between reference and monitoring datasets....TrueFalse['datasets', 'model']{}['sklearn', 'binary_classification', 'model_performance', 'visualization']['classification', 'text_classification']
\n" ], "text/plain": [ - "" + "" ] }, "execution_count": 9, @@ -3616,7 +4280,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "5025f3a7dbb34f4c9de1b26e4909f3f7", + "model_id": "41899643f5a5460ca23e60681f09d81c", "version_major": 2, "version_minor": 0 }, diff --git a/scripts/bulk_ai_test_updates.py b/scripts/bulk_ai_test_updates.py index d44a7136f..6fbc82a95 100644 --- a/scripts/bulk_ai_test_updates.py +++ b/scripts/bulk_ai_test_updates.py @@ -264,6 +264,98 @@ def ExampleConfusionMatrix(model: VMModel, dataset: VMDataset): DO NOT CHANGE ANYTHING OTHER THAN ADDING THE NEW RAW DATA MECHANISM... I.E. DO NOT REMOVE ANYTHING FROM THE RETURN TUPLE OR THE RETURN VALUE (if it is a single object) """ +add_return_type_prompt = """ +You are an expert Python engineer and data scientist with broad experience across many domains. +ValidMind is a company that provides a Python SDK for building and running tests for the purposes of model risk management. +ValidMind's SDK offers a library of "test" functions that are run with our test harness against many types of models and datasets. + +Your task is to analyze the test function and add appropriate return type annotations to the function signature. + +CRITICAL: DO NOT CHANGE ANYTHING IN THE CODE EXCEPT: +1. Adding the return type annotation to the function signature +2. Adding any necessary import statements WITH THE EXISTING IMPORTS (do not add imports elsewhere) + +EXTREMELY IMPORTANT: ALWAYS PRESERVE COPYRIGHT AND LICENSE INFORMATION AT THE TOP OF THE FILE! +You must include any copyright, license, and SPDX identifier lines from the original file! + +ValidMind test functions return either a single object or a tuple of objects. +These objects are turned into a test result report by the test harness. +They can return any number of the following types of objects: +- Tables (pd.DataFrame or List[Dict[str, Any]]) +- Figures (matplotlib.figure.Figure, plotly.graph_objects.Figure (go.Figure), or List of these) +- Values (scalar values like float, int, str, or container types like List, Dict) +- Pass/Fail (bool value indicating whether the test passed or failed) +- Raw Data (RawData object containing intermediate data) + +Common imports that might be needed in the return type annotation: +- from typing import Any, Dict, List, Tuple, Union, Optional +- import plotly.graph_objects as go +- import matplotlib.figure +- import pandas as pd +- from validmind import RawData + +You should inspect the return statement(s) in the function to determine what the function actually returns. +Then, add the appropriate return type annotation to the function signature. + +If the function already has a return type annotation, don't change it - in this case, return the original code without any changes. + +Examples: + +1. For a function that returns a single figure: +```python +def PlotHistogram(dataset: VMDataset): + # ... code ... + return fig +``` +Should become: +```python +def PlotHistogram(dataset: VMDataset) -> go.Figure: + # ... code ... + return fig +``` + +2. For a function that returns multiple objects in a tuple: +```python +def ClassImbalance(dataset: VMDataset): + # ... code ... + return stats, fig, passed +``` +Should become: +```python +def ClassImbalance(dataset: VMDataset) -> Tuple[Dict[str, Any], go.Figure, bool]: + # ... code ... + return stats, fig, passed +``` + +3. For a function that builds a list of figures and returns it as a tuple: +```python +def MultiplePlots(dataset: VMDataset): + # ... code ... + returns = [] + returns.append(fig1) + returns.append(fig2) + returns.append(RawData(...)) + return tuple(returns) +``` +Should become: +```python +def MultiplePlots(dataset: VMDataset) -> Tuple[go.Figure, go.Figure, RawData]: + # ... code ... + returns = [] + returns.append(fig1) + returns.append(fig2) + returns.append(RawData(...)) + return tuple(returns) +``` + +Return only the updated code and nothing else. +Do not wrap the code in backticks, simply return valid Python code. +Only add the correct imports if they are not already present in the file, and place them with the existing imports. +DO NOT modify the function body in any way - the only changes should be to the function signature and possibly adding imports. +NEVER REMOVE COPYRIGHT NOTICES OR LICENSE INFORMATION! +If the function already has a return type annotation, return the original code without any changes. +""".strip() + custom_prompt_system = """ You are an expert Python engineer and data scientist with broad experience across many domains. ValidMind is a company that provides a Python SDK for building and running tests for the purposes of model risk management. @@ -394,6 +486,31 @@ def add_raw_data_to_test(path): f.write(updated_file_contents) +def add_return_type_to_test(path): + """Add return type annotation to a test function""" + # get file contents from path + click.echo(f"> {path}") + with open(path, "r") as f: + file_contents = f.read() + + response = client.chat.completions.create( + model=OPENAI_GPT_MODEL, + messages=[ + {"role": "system", "content": add_return_type_prompt}, + {"role": "user", "content": f"```python\n{file_contents}```"}, + ], + ) + + updated_file_contents = response.choices[0].message.content + # remove starting "```python" and ending "```" + updated_file_contents = ( + updated_file_contents.lstrip("```python").rstrip("```").strip() + ) + + with open(path, "w") as f: + f.write(updated_file_contents) + + def custom_prompt(path, user_prompt): """Custom prompt for a test file""" # get file contents from path @@ -461,7 +578,13 @@ def _is_test_file(path): @click.option( "--action", type=click.Choice( - ["add_description", "add_raw_data", "custom_prompt", "custom_review"] + [ + "add_description", + "add_raw_data", + "add_return_type", + "custom_prompt", + "custom_review", + ] ), required=True, ) @@ -494,6 +617,8 @@ def main(action, path, model): func = add_description_to_test elif action == "add_raw_data": func = add_raw_data_to_test + elif action == "add_return_type": + func = add_return_type_to_test elif action == "custom_prompt": if not USER_PROMPT: user_prompt = input("Enter your prompt: ") diff --git a/validmind/tests/data_validation/ACFandPACFPlot.py b/validmind/tests/data_validation/ACFandPACFPlot.py index c2b6adbc1..5f5d6f397 100644 --- a/validmind/tests/data_validation/ACFandPACFPlot.py +++ b/validmind/tests/data_validation/ACFandPACFPlot.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + import pandas as pd import plotly.graph_objects as go from statsmodels.tsa.stattools import acf, pacf @@ -12,7 +14,7 @@ @tags("time_series_data", "forecasting", "statistical_test", "visualization") @tasks("regression") -def ACFandPACFPlot(dataset: VMDataset): +def ACFandPACFPlot(dataset: VMDataset) -> Tuple[go.Figure, RawData]: """ Analyzes time series data using Autocorrelation Function (ACF) and Partial Autocorrelation Function (PACF) plots to reveal trends and correlations. diff --git a/validmind/tests/data_validation/ADF.py b/validmind/tests/data_validation/ADF.py index 747f99181..493556e0e 100644 --- a/validmind/tests/data_validation/ADF.py +++ b/validmind/tests/data_validation/ADF.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Dict + import pandas as pd from statsmodels.tsa.stattools import adfuller @@ -16,7 +18,7 @@ "time_series_data", "statsmodels", "forecasting", "statistical_test", "stationarity" ) @tasks("regression") -def ADF(dataset: VMDataset): +def ADF(dataset: VMDataset) -> Dict[str, pd.DataFrame]: """ Assesses the stationarity of a time series dataset using the Augmented Dickey-Fuller (ADF) test. diff --git a/validmind/tests/data_validation/AutoAR.py b/validmind/tests/data_validation/AutoAR.py index 94a0df9f2..860c0a40e 100644 --- a/validmind/tests/data_validation/AutoAR.py +++ b/validmind/tests/data_validation/AutoAR.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Dict + import pandas as pd from statsmodels.tsa.ar_model import AutoReg from statsmodels.tsa.stattools import adfuller @@ -15,7 +17,7 @@ @tags("time_series_data", "statsmodels", "forecasting", "statistical_test") @tasks("regression") -def AutoAR(dataset: VMDataset, max_ar_order: int = 3): +def AutoAR(dataset: VMDataset, max_ar_order: int = 3) -> Dict[str, pd.DataFrame]: """ Automatically identifies the optimal Autoregressive (AR) order for a time series using BIC and AIC criteria. diff --git a/validmind/tests/data_validation/AutoMA.py b/validmind/tests/data_validation/AutoMA.py index 1c28ee7de..5898da1cc 100644 --- a/validmind/tests/data_validation/AutoMA.py +++ b/validmind/tests/data_validation/AutoMA.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Dict, Tuple + import pandas as pd from statsmodels.tsa.arima.model import ARIMA from statsmodels.tsa.stattools import adfuller @@ -15,7 +17,9 @@ @tags("time_series_data", "statsmodels", "forecasting", "statistical_test") @tasks("regression") -def AutoMA(dataset: VMDataset, max_ma_order: int = 3): +def AutoMA( + dataset: VMDataset, max_ma_order: int = 3 +) -> Tuple[Dict[str, pd.DataFrame], RawData]: """ Automatically selects the optimal Moving Average (MA) order for each variable in a time series dataset based on minimal BIC and AIC values. diff --git a/validmind/tests/data_validation/AutoStationarity.py b/validmind/tests/data_validation/AutoStationarity.py index e4d857353..a3bef7aad 100644 --- a/validmind/tests/data_validation/AutoStationarity.py +++ b/validmind/tests/data_validation/AutoStationarity.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Dict + import numpy as np import pandas as pd from statsmodels.tsa.stattools import adfuller @@ -12,7 +14,9 @@ @tags("time_series_data", "statsmodels", "forecasting", "statistical_test") @tasks("regression") -def AutoStationarity(dataset: VMDataset, max_order: int = 5, threshold: float = 0.05): +def AutoStationarity( + dataset: VMDataset, max_order: int = 5, threshold: float = 0.05 +) -> Dict[str, pd.DataFrame]: """ Automates Augmented Dickey-Fuller test to assess stationarity across multiple time series in a DataFrame. diff --git a/validmind/tests/data_validation/BivariateScatterPlots.py b/validmind/tests/data_validation/BivariateScatterPlots.py index c22af160e..af3deb117 100644 --- a/validmind/tests/data_validation/BivariateScatterPlots.py +++ b/validmind/tests/data_validation/BivariateScatterPlots.py @@ -3,15 +3,17 @@ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial import itertools +from typing import Tuple import plotly.express as px +import plotly.graph_objects as go from validmind import RawData, tags, tasks @tags("tabular_data", "numerical_data", "visualization") @tasks("classification") -def BivariateScatterPlots(dataset): +def BivariateScatterPlots(dataset) -> Tuple[go.Figure, RawData]: """ Generates bivariate scatterplots to visually inspect relationships between pairs of numerical predictor variables in machine learning classification tasks. diff --git a/validmind/tests/data_validation/BoxPierce.py b/validmind/tests/data_validation/BoxPierce.py index a85e57a8e..035b2ee8a 100644 --- a/validmind/tests/data_validation/BoxPierce.py +++ b/validmind/tests/data_validation/BoxPierce.py @@ -2,6 +2,9 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial + +from typing import Tuple + import pandas as pd from statsmodels.stats.diagnostic import acorr_ljungbox @@ -10,7 +13,7 @@ @tasks("regression") @tags("time_series_data", "forecasting", "statistical_test", "statsmodels") -def BoxPierce(dataset): +def BoxPierce(dataset) -> Tuple[pd.DataFrame, RawData]: """ Detects autocorrelation in time-series data through the Box-Pierce test to validate model performance. diff --git a/validmind/tests/data_validation/ChiSquaredFeaturesTable.py b/validmind/tests/data_validation/ChiSquaredFeaturesTable.py index 87d553651..2e6620a25 100644 --- a/validmind/tests/data_validation/ChiSquaredFeaturesTable.py +++ b/validmind/tests/data_validation/ChiSquaredFeaturesTable.py @@ -12,7 +12,7 @@ @tags("tabular_data", "categorical_data", "statistical_test") @tasks("classification") -def ChiSquaredFeaturesTable(dataset, p_threshold=0.05): +def ChiSquaredFeaturesTable(dataset, p_threshold=0.05) -> pd.DataFrame: """ Assesses the statistical association between categorical features and a target variable using the Chi-Squared test. diff --git a/validmind/tests/data_validation/ClassImbalance.py b/validmind/tests/data_validation/ClassImbalance.py index 14d4eacf3..f85a99ea1 100644 --- a/validmind/tests/data_validation/ClassImbalance.py +++ b/validmind/tests/data_validation/ClassImbalance.py @@ -20,7 +20,7 @@ @tasks("classification") def ClassImbalance( dataset: VMDataset, min_percent_threshold: int = 10 -) -> Tuple[Dict[str, Any], go.Figure, bool]: +) -> Tuple[Dict[str, Any], go.Figure, bool, RawData]: """ Evaluates and quantifies class distribution imbalance in a dataset used by a machine learning model. diff --git a/validmind/tests/data_validation/DatasetDescription.py b/validmind/tests/data_validation/DatasetDescription.py index 64fe81db7..5b40a46a6 100644 --- a/validmind/tests/data_validation/DatasetDescription.py +++ b/validmind/tests/data_validation/DatasetDescription.py @@ -4,6 +4,7 @@ import re from collections import Counter +from typing import Any, Dict, List, Tuple import numpy as np @@ -142,7 +143,9 @@ def describe_column(df, column): @tags("tabular_data", "time_series_data", "text_data") @tasks("classification", "regression", "text_classification", "text_summarization") -def DatasetDescription(dataset: VMDataset): +def DatasetDescription( + dataset: VMDataset, +) -> Tuple[Dict[str, List[Dict[str, Any]]], RawData]: """ Provides comprehensive analysis and statistical summaries of each column in a machine learning model's dataset. diff --git a/validmind/tests/data_validation/DatasetSplit.py b/validmind/tests/data_validation/DatasetSplit.py index ee6e2b2c2..d4c103c63 100644 --- a/validmind/tests/data_validation/DatasetSplit.py +++ b/validmind/tests/data_validation/DatasetSplit.py @@ -2,7 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial -from typing import List + +from typing import Any, Dict, List, Tuple from validmind import RawData, tags, tasks from validmind.vm_models import VMDataset @@ -17,7 +18,7 @@ @tags("tabular_data", "time_series_data", "text_data") @tasks("classification", "regression", "text_classification", "text_summarization") -def DatasetSplit(datasets: List[VMDataset]): +def DatasetSplit(datasets: List[VMDataset]) -> Tuple[List[Dict[str, Any]], RawData]: """ Evaluates and visualizes the distribution proportions among training, testing, and validation datasets of an ML model. diff --git a/validmind/tests/data_validation/DescriptiveStatistics.py b/validmind/tests/data_validation/DescriptiveStatistics.py index 3e9e929e4..b64bfe138 100644 --- a/validmind/tests/data_validation/DescriptiveStatistics.py +++ b/validmind/tests/data_validation/DescriptiveStatistics.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Any, Dict + import pandas as pd from validmind import tags, tasks @@ -46,7 +48,7 @@ def get_summary_statistics_categorical(df, categorical_fields): @tags("tabular_data", "time_series_data", "data_quality") @tasks("classification", "regression") -def DescriptiveStatistics(dataset: VMDataset): +def DescriptiveStatistics(dataset: VMDataset) -> Dict[str, Any]: """ Performs a detailed descriptive statistical analysis of both numerical and categorical data within a model's dataset. diff --git a/validmind/tests/data_validation/DickeyFullerGLS.py b/validmind/tests/data_validation/DickeyFullerGLS.py index 0a9dda2c6..efede3b5b 100644 --- a/validmind/tests/data_validation/DickeyFullerGLS.py +++ b/validmind/tests/data_validation/DickeyFullerGLS.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Any, Dict, Tuple + import pandas as pd from arch.unitroot import DFGLS from numpy.linalg import LinAlgError @@ -16,7 +18,7 @@ @tags("time_series_data", "forecasting", "unit_root_test") @tasks("regression") -def DickeyFullerGLS(dataset: VMDataset): +def DickeyFullerGLS(dataset: VMDataset) -> Tuple[Dict[str, Any], RawData]: """ Assesses stationarity in time series data using the Dickey-Fuller GLS test to determine the order of integration. diff --git a/validmind/tests/data_validation/Duplicates.py b/validmind/tests/data_validation/Duplicates.py index 3dc748452..8d2d1f0e3 100644 --- a/validmind/tests/data_validation/Duplicates.py +++ b/validmind/tests/data_validation/Duplicates.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Any, Dict, Tuple + import pandas as pd from validmind import tags, tasks @@ -9,7 +11,7 @@ @tags("tabular_data", "data_quality", "text_data") @tasks("classification", "regression") -def Duplicates(dataset, min_threshold=1): +def Duplicates(dataset, min_threshold=1) -> Tuple[Dict[str, Any], bool]: """ Tests dataset for duplicate entries, ensuring model reliability via data quality verification. diff --git a/validmind/tests/data_validation/EngleGrangerCoint.py b/validmind/tests/data_validation/EngleGrangerCoint.py index 0c525f758..e43c6a2b7 100644 --- a/validmind/tests/data_validation/EngleGrangerCoint.py +++ b/validmind/tests/data_validation/EngleGrangerCoint.py @@ -2,6 +2,9 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial + +from typing import Dict + import pandas as pd from statsmodels.tsa.stattools import coint @@ -12,7 +15,9 @@ @tags("time_series_data", "statistical_test", "forecasting") @tasks("regression") -def EngleGrangerCoint(dataset: VMDataset, threshold: float = 0.05): +def EngleGrangerCoint( + dataset: VMDataset, threshold: float = 0.05 +) -> Dict[str, pd.DataFrame]: """ Assesses the degree of co-movement between pairs of time series data using the Engle-Granger cointegration test. diff --git a/validmind/tests/data_validation/FeatureTargetCorrelationPlot.py b/validmind/tests/data_validation/FeatureTargetCorrelationPlot.py index 83c2523db..bef6ed546 100644 --- a/validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +++ b/validmind/tests/data_validation/FeatureTargetCorrelationPlot.py @@ -3,6 +3,8 @@ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + import numpy as np import plotly.graph_objects as go @@ -11,7 +13,7 @@ @tags("tabular_data", "visualization", "correlation") @tasks("classification", "regression") -def FeatureTargetCorrelationPlot(dataset, fig_height=600): +def FeatureTargetCorrelationPlot(dataset, fig_height=600) -> Tuple[go.Figure, RawData]: """ Visualizes the correlation between input features and the model's target output in a color-coded horizontal bar plot. diff --git a/validmind/tests/data_validation/HighCardinality.py b/validmind/tests/data_validation/HighCardinality.py index 078b7f034..9faa5129d 100644 --- a/validmind/tests/data_validation/HighCardinality.py +++ b/validmind/tests/data_validation/HighCardinality.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Any, Dict, List, Tuple + from validmind import RawData, tags, tasks from validmind.vm_models import VMDataset @@ -13,7 +15,7 @@ def HighCardinality( num_threshold: int = 100, percent_threshold: float = 0.1, threshold_type: str = "percent", -): +) -> Tuple[List[Dict[str, Any]], bool, RawData]: """ Assesses the number of unique values in categorical columns to detect high cardinality and potential overfitting. diff --git a/validmind/tests/data_validation/HighPearsonCorrelation.py b/validmind/tests/data_validation/HighPearsonCorrelation.py index d533ca3c1..375662433 100644 --- a/validmind/tests/data_validation/HighPearsonCorrelation.py +++ b/validmind/tests/data_validation/HighPearsonCorrelation.py @@ -2,6 +2,9 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial + +from typing import Any, Dict, List, Tuple + from validmind import RawData, tags, tasks from validmind.vm_models import VMDataset @@ -13,7 +16,7 @@ def HighPearsonCorrelation( max_threshold: float = 0.3, top_n_correlations: int = 10, feature_columns: list = None, -): +) -> Tuple[List[Dict[str, Any]], bool, RawData]: """ Identifies highly correlated feature pairs in a dataset suggesting feature redundancy or multicollinearity. diff --git a/validmind/tests/data_validation/IQROutliersBarPlot.py b/validmind/tests/data_validation/IQROutliersBarPlot.py index c8c639750..7576fbc1b 100644 --- a/validmind/tests/data_validation/IQROutliersBarPlot.py +++ b/validmind/tests/data_validation/IQROutliersBarPlot.py @@ -2,6 +2,9 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial + +from typing import Tuple + import plotly.graph_objects as go from validmind import RawData, tags, tasks @@ -22,7 +25,7 @@ def compute_outliers(series, threshold): @tasks("classification", "regression") def IQROutliersBarPlot( dataset: VMDataset, threshold: float = 1.5, fig_width: int = 800 -): +) -> Tuple[go.Figure, RawData]: """ Visualizes outlier distribution across percentiles in numerical data using the Interquartile Range (IQR) method. diff --git a/validmind/tests/data_validation/IQROutliersTable.py b/validmind/tests/data_validation/IQROutliersTable.py index d9e154ca4..ce802eae3 100644 --- a/validmind/tests/data_validation/IQROutliersTable.py +++ b/validmind/tests/data_validation/IQROutliersTable.py @@ -2,6 +2,9 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial + +from typing import Any, Dict, Tuple + from validmind import RawData, tags, tasks from validmind.vm_models import VMDataset @@ -18,7 +21,9 @@ def compute_outliers(series, threshold=1.5): @tags("tabular_data", "numerical_data") @tasks("classification", "regression") -def IQROutliersTable(dataset: VMDataset, threshold: float = 1.5): +def IQROutliersTable( + dataset: VMDataset, threshold: float = 1.5 +) -> Tuple[Dict[str, Any], RawData]: """ Determines and summarizes outliers in numerical features using the Interquartile Range method. diff --git a/validmind/tests/data_validation/IsolationForestOutliers.py b/validmind/tests/data_validation/IsolationForestOutliers.py index 5120a97c9..76475e7d9 100644 --- a/validmind/tests/data_validation/IsolationForestOutliers.py +++ b/validmind/tests/data_validation/IsolationForestOutliers.py @@ -3,7 +3,9 @@ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial import itertools +from typing import Tuple +import matplotlib.figure import matplotlib.pyplot as plt import seaborn as sns from sklearn.ensemble import IsolationForest @@ -19,7 +21,7 @@ def IsolationForestOutliers( random_state: int = 0, contamination: float = 0.1, feature_columns: list = None, -): +) -> Tuple[matplotlib.figure.Figure, RawData]: """ Detects outliers in a dataset using the Isolation Forest algorithm and visualizes results through scatter plots. diff --git a/validmind/tests/data_validation/JarqueBera.py b/validmind/tests/data_validation/JarqueBera.py index d9857f8f5..78888fd9d 100644 --- a/validmind/tests/data_validation/JarqueBera.py +++ b/validmind/tests/data_validation/JarqueBera.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + import pandas as pd from statsmodels.stats.stattools import jarque_bera @@ -10,7 +12,7 @@ @tasks("classification", "regression") @tags("tabular_data", "data_distribution", "statistical_test", "statsmodels") -def JarqueBera(dataset): +def JarqueBera(dataset) -> Tuple[pd.DataFrame, RawData]: """ Assesses normality of dataset features in an ML model using the Jarque-Bera test. diff --git a/validmind/tests/data_validation/KPSS.py b/validmind/tests/data_validation/KPSS.py index 9d3d4985e..6dcb0134f 100644 --- a/validmind/tests/data_validation/KPSS.py +++ b/validmind/tests/data_validation/KPSS.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Any, Dict + import pandas as pd from statsmodels.tsa.stattools import kpss @@ -15,7 +17,7 @@ @tags("time_series_data", "stationarity", "unit_root_test", "statsmodels") @tasks("data_validation") -def KPSS(dataset: VMDataset): +def KPSS(dataset: VMDataset) -> Dict[str, Any]: """ Assesses the stationarity of time-series data in a machine learning model using the KPSS unit root test. diff --git a/validmind/tests/data_validation/LJungBox.py b/validmind/tests/data_validation/LJungBox.py index 6ffb966d7..7abccf45b 100644 --- a/validmind/tests/data_validation/LJungBox.py +++ b/validmind/tests/data_validation/LJungBox.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + import pandas as pd from statsmodels.stats.diagnostic import acorr_ljungbox @@ -10,7 +12,7 @@ @tasks("regression") @tags("time_series_data", "forecasting", "statistical_test", "statsmodels") -def LJungBox(dataset): +def LJungBox(dataset) -> Tuple[pd.DataFrame, RawData]: """ Assesses autocorrelations in dataset features by performing a Ljung-Box test on each feature. diff --git a/validmind/tests/data_validation/LaggedCorrelationHeatmap.py b/validmind/tests/data_validation/LaggedCorrelationHeatmap.py index d295b1caa..b1539c02c 100644 --- a/validmind/tests/data_validation/LaggedCorrelationHeatmap.py +++ b/validmind/tests/data_validation/LaggedCorrelationHeatmap.py @@ -2,9 +2,12 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + import numpy as np import pandas as pd import plotly.figure_factory as ff +import plotly.graph_objects as go from validmind import RawData, tags, tasks from validmind.vm_models import VMDataset @@ -15,7 +18,9 @@ @tags("time_series_data", "visualization") @tasks("regression") -def LaggedCorrelationHeatmap(dataset: VMDataset, num_lags: int = 10): +def LaggedCorrelationHeatmap( + dataset: VMDataset, num_lags: int = 10 +) -> Tuple[go.Figure, RawData]: """ Assesses and visualizes correlation between target variable and lagged independent variables in a time-series dataset. diff --git a/validmind/tests/data_validation/MissingValues.py b/validmind/tests/data_validation/MissingValues.py index 7aae5a14d..0c8aac33e 100644 --- a/validmind/tests/data_validation/MissingValues.py +++ b/validmind/tests/data_validation/MissingValues.py @@ -2,13 +2,17 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Any, Dict, List, Tuple + from validmind import RawData, tags, tasks from validmind.vm_models import VMDataset @tags("tabular_data", "data_quality") @tasks("classification", "regression") -def MissingValues(dataset: VMDataset, min_threshold: int = 1): +def MissingValues( + dataset: VMDataset, min_threshold: int = 1 +) -> Tuple[List[Dict[str, Any]], bool, RawData]: """ Evaluates dataset quality by ensuring missing value ratio across all features does not exceed a set threshold. diff --git a/validmind/tests/data_validation/MissingValuesBarPlot.py b/validmind/tests/data_validation/MissingValuesBarPlot.py index 8ec66519d..dcc5f8b00 100644 --- a/validmind/tests/data_validation/MissingValuesBarPlot.py +++ b/validmind/tests/data_validation/MissingValuesBarPlot.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + import plotly.graph_objects as go from validmind import RawData, tags, tasks @@ -12,7 +14,7 @@ @tasks("classification", "regression") def MissingValuesBarPlot( dataset: VMDataset, threshold: int = 80, fig_height: int = 600 -): +) -> Tuple[go.Figure, RawData]: """ Assesses the percentage and distribution of missing values in the dataset via a bar plot, with emphasis on identifying high-risk columns based on a user-defined threshold. diff --git a/validmind/tests/data_validation/MutualInformation.py b/validmind/tests/data_validation/MutualInformation.py index 148257dd7..d7f3d2e69 100644 --- a/validmind/tests/data_validation/MutualInformation.py +++ b/validmind/tests/data_validation/MutualInformation.py @@ -2,6 +2,9 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial + +from typing import Tuple + import plotly.graph_objects as go from sklearn.feature_selection import mutual_info_classif, mutual_info_regression @@ -14,7 +17,7 @@ @tasks("classification", "regression") def MutualInformation( dataset: VMDataset, min_threshold: float = 0.01, task: str = "classification" -): +) -> Tuple[go.Figure, RawData]: """ Calculates mutual information scores between features and target variable to evaluate feature relevance. diff --git a/validmind/tests/data_validation/PearsonCorrelationMatrix.py b/validmind/tests/data_validation/PearsonCorrelationMatrix.py index 5b8d756a1..ee162534c 100644 --- a/validmind/tests/data_validation/PearsonCorrelationMatrix.py +++ b/validmind/tests/data_validation/PearsonCorrelationMatrix.py @@ -3,6 +3,8 @@ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + import plotly.graph_objects as go from validmind import RawData, tags, tasks @@ -10,7 +12,7 @@ @tags("tabular_data", "numerical_data", "correlation") @tasks("classification", "regression") -def PearsonCorrelationMatrix(dataset): +def PearsonCorrelationMatrix(dataset) -> Tuple[go.Figure, RawData]: """ Evaluates linear dependency between numerical variables in a dataset via a Pearson Correlation coefficient heat map. diff --git a/validmind/tests/data_validation/PhillipsPerronArch.py b/validmind/tests/data_validation/PhillipsPerronArch.py index 401ac6125..3be6cd3b8 100644 --- a/validmind/tests/data_validation/PhillipsPerronArch.py +++ b/validmind/tests/data_validation/PhillipsPerronArch.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Any, Dict + import numpy as np import pandas as pd from arch.unitroot import PhillipsPerron @@ -17,7 +19,7 @@ @tags("time_series_data", "forecasting", "statistical_test", "unit_root_test") @tasks("regression") -def PhillipsPerronArch(dataset: VMDataset): +def PhillipsPerronArch(dataset: VMDataset) -> Dict[str, Any]: """ Assesses the stationarity of time series data in each feature of the ML model using the Phillips-Perron test. diff --git a/validmind/tests/data_validation/ProtectedClassesCombination.py b/validmind/tests/data_validation/ProtectedClassesCombination.py index 606144d45..f6e651e87 100644 --- a/validmind/tests/data_validation/ProtectedClassesCombination.py +++ b/validmind/tests/data_validation/ProtectedClassesCombination.py @@ -3,6 +3,7 @@ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial import sys +from typing import Dict, Tuple import pandas as pd import plotly.graph_objects as go @@ -11,6 +12,7 @@ from validmind import RawData, tags, tasks from validmind.errors import MissingDependencyError from validmind.logging import get_logger +from validmind.vm_models import VMDataset, VMModel try: from fairlearn.metrics import ( @@ -33,7 +35,9 @@ @tags("bias_and_fairness") @tasks("classification", "regression") -def ProtectedClassesCombination(dataset, model, protected_classes=None): +def ProtectedClassesCombination( + dataset: VMDataset, model: VMModel, protected_classes=None +) -> Tuple[Dict[str, pd.DataFrame], Dict[str, pd.DataFrame], go.Figure, RawData]: """ Visualizes combinations of protected classes and their corresponding error metric differences. diff --git a/validmind/tests/data_validation/ProtectedClassesDescription.py b/validmind/tests/data_validation/ProtectedClassesDescription.py index cb4e631ac..1912878d3 100644 --- a/validmind/tests/data_validation/ProtectedClassesDescription.py +++ b/validmind/tests/data_validation/ProtectedClassesDescription.py @@ -3,6 +3,8 @@ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import List, Tuple, Union + import pandas as pd import plotly.graph_objects as go @@ -14,7 +16,9 @@ @tags("bias_and_fairness", "descriptive_statistics") @tasks("classification", "regression") -def ProtectedClassesDescription(dataset, protected_classes=None): +def ProtectedClassesDescription( + dataset, protected_classes=None +) -> Tuple[pd.DataFrame, Union[go.Figure, List[go.Figure]], RawData]: """ Visualizes the distribution of protected classes in the dataset relative to the target variable and provides descriptive statistics. diff --git a/validmind/tests/data_validation/ProtectedClassesDisparity.py b/validmind/tests/data_validation/ProtectedClassesDisparity.py index ac9b88251..1d3a9f0d6 100644 --- a/validmind/tests/data_validation/ProtectedClassesDisparity.py +++ b/validmind/tests/data_validation/ProtectedClassesDisparity.py @@ -4,12 +4,14 @@ import io import sys +from typing import Any, List, Tuple import pandas as pd from validmind import RawData, tags, tasks from validmind.errors import MissingDependencyError from validmind.logging import get_logger +from validmind.vm_models import VMDataset, VMModel try: import aequitas.plot as ap @@ -28,12 +30,12 @@ @tags("bias_and_fairness") @tasks("classification", "regression") def ProtectedClassesDisparity( - dataset, - model, + dataset: VMDataset, + model: VMModel, protected_classes=None, disparity_tolerance=1.25, metrics=["fnr", "fpr", "tpr"], -): +) -> Tuple[pd.DataFrame, List[bytes], Any, RawData]: """ Investigates disparities in model performance across different protected class segments. diff --git a/validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py b/validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py index 31124a621..5783e691c 100644 --- a/validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +++ b/validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py @@ -4,13 +4,16 @@ import json import sys +from typing import Any, Dict, Tuple +import matplotlib.figure import matplotlib.pyplot as plt import pandas as pd from validmind import RawData, tags, tasks from validmind.errors import MissingDependencyError from validmind.logging import get_logger +from validmind.vm_models import VMDataset try: from fairlearn.metrics import ( @@ -35,8 +38,12 @@ @tags("bias_and_fairness") @tasks("classification", "regression") def ProtectedClassesThresholdOptimizer( - dataset, pipeline=None, protected_classes=None, X_train=None, y_train=None -): + dataset: VMDataset, + pipeline=None, + protected_classes=None, + X_train=None, + y_train=None, +) -> Tuple[Dict[str, Any], matplotlib.figure.Figure, RawData]: """ Obtains a classifier by applying group-specific thresholds to the provided estimator. diff --git a/validmind/tests/data_validation/RollingStatsPlot.py b/validmind/tests/data_validation/RollingStatsPlot.py index 7f05735f6..34d73291a 100644 --- a/validmind/tests/data_validation/RollingStatsPlot.py +++ b/validmind/tests/data_validation/RollingStatsPlot.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + import matplotlib.pyplot as plt import pandas as pd @@ -42,7 +44,9 @@ def plot_rolling_statistics(df, col, window_size): @tags("time_series_data", "visualization", "stationarity") @tasks("regression") -def RollingStatsPlot(dataset: VMDataset, window_size: int = 12): +def RollingStatsPlot( + dataset: VMDataset, window_size: int = 12 +) -> Tuple[plt.Figure, RawData]: """ Evaluates the stationarity of time series data by plotting its rolling mean and standard deviation over a specified window. diff --git a/validmind/tests/data_validation/RunsTest.py b/validmind/tests/data_validation/RunsTest.py index 7004b238d..227b49b04 100644 --- a/validmind/tests/data_validation/RunsTest.py +++ b/validmind/tests/data_validation/RunsTest.py @@ -10,7 +10,7 @@ @tasks("classification", "regression") @tags("tabular_data", "statistical_test", "statsmodels") -def RunsTest(dataset): +def RunsTest(dataset) -> pd.DataFrame: """ Executes Runs Test on ML model to detect non-random patterns in output data sequence. diff --git a/validmind/tests/data_validation/ScatterPlot.py b/validmind/tests/data_validation/ScatterPlot.py index a7e037475..899ef8f5d 100644 --- a/validmind/tests/data_validation/ScatterPlot.py +++ b/validmind/tests/data_validation/ScatterPlot.py @@ -2,6 +2,7 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +import matplotlib.figure import matplotlib.pyplot as plt import seaborn as sns @@ -10,7 +11,7 @@ @tags("tabular_data", "visualization") @tasks("classification", "regression") -def ScatterPlot(dataset): +def ScatterPlot(dataset) -> matplotlib.figure.Figure: """ Assesses visual relationships, patterns, and outliers among features in a dataset through scatter plot matrices. diff --git a/validmind/tests/data_validation/ScoreBandDefaultRates.py b/validmind/tests/data_validation/ScoreBandDefaultRates.py index ba995ad1c..5bab7303e 100644 --- a/validmind/tests/data_validation/ScoreBandDefaultRates.py +++ b/validmind/tests/data_validation/ScoreBandDefaultRates.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + import numpy as np import pandas as pd @@ -16,7 +18,7 @@ def ScoreBandDefaultRates( model: VMModel, score_column: str = "score", score_bands: list = None, -): +) -> Tuple[pd.DataFrame, RawData]: """ Analyzes default rates and population distribution across credit score bands. diff --git a/validmind/tests/data_validation/SeasonalDecompose.py b/validmind/tests/data_validation/SeasonalDecompose.py index e72879a5e..bc0bc3f26 100644 --- a/validmind/tests/data_validation/SeasonalDecompose.py +++ b/validmind/tests/data_validation/SeasonalDecompose.py @@ -2,6 +2,9 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial + +from typing import Tuple + import numpy as np import pandas as pd import plotly.graph_objects as go @@ -19,7 +22,9 @@ @tags("time_series_data", "seasonality", "statsmodels") @tasks("regression") -def SeasonalDecompose(dataset: VMDataset, seasonal_model: str = "additive"): +def SeasonalDecompose( + dataset: VMDataset, seasonal_model: str = "additive" +) -> Tuple[go.Figure, RawData]: """ Assesses patterns and seasonality in a time series dataset by decomposing its features into foundational components. diff --git a/validmind/tests/data_validation/ShapiroWilk.py b/validmind/tests/data_validation/ShapiroWilk.py index b299040c7..909d097c0 100644 --- a/validmind/tests/data_validation/ShapiroWilk.py +++ b/validmind/tests/data_validation/ShapiroWilk.py @@ -2,6 +2,9 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial + +from typing import Tuple + import pandas as pd from scipy import stats @@ -10,7 +13,7 @@ @tasks("classification", "regression") @tags("tabular_data", "data_distribution", "statistical_test") -def ShapiroWilk(dataset): +def ShapiroWilk(dataset) -> Tuple[pd.DataFrame, RawData]: """ Evaluates feature-wise normality of training data using the Shapiro-Wilk test. diff --git a/validmind/tests/data_validation/Skewness.py b/validmind/tests/data_validation/Skewness.py index 2c7550f75..b255393ec 100644 --- a/validmind/tests/data_validation/Skewness.py +++ b/validmind/tests/data_validation/Skewness.py @@ -2,13 +2,15 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Any, Dict, List, Tuple + from validmind import tags, tasks from validmind.utils import infer_datatypes @tags("data_quality", "tabular_data") @tasks("classification", "regression") -def Skewness(dataset, max_threshold=1): +def Skewness(dataset, max_threshold=1) -> Tuple[Dict[str, List[Dict[str, Any]]], bool]: """ Evaluates the skewness of numerical data in a dataset to check against a defined threshold, aiming to ensure data quality and optimize model performance. diff --git a/validmind/tests/data_validation/SpreadPlot.py b/validmind/tests/data_validation/SpreadPlot.py index d4f40df06..a43e0a507 100644 --- a/validmind/tests/data_validation/SpreadPlot.py +++ b/validmind/tests/data_validation/SpreadPlot.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + import matplotlib.pyplot as plt import pandas as pd import seaborn as sns @@ -13,7 +15,7 @@ @tags("time_series_data", "visualization") @tasks("regression") -def SpreadPlot(dataset: VMDataset): +def SpreadPlot(dataset: VMDataset) -> Tuple[plt.Figure, RawData]: """ Assesses potential correlations between pairs of time series variables through visualization to enhance understanding of their relationships. diff --git a/validmind/tests/data_validation/TabularCategoricalBarPlots.py b/validmind/tests/data_validation/TabularCategoricalBarPlots.py index 4a2419f34..d52257b73 100644 --- a/validmind/tests/data_validation/TabularCategoricalBarPlots.py +++ b/validmind/tests/data_validation/TabularCategoricalBarPlots.py @@ -2,6 +2,9 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial + +from typing import Tuple + import plotly.graph_objs as go from validmind import RawData, tags, tasks @@ -11,7 +14,7 @@ @tags("tabular_data", "visualization") @tasks("classification", "regression") -def TabularCategoricalBarPlots(dataset: VMDataset): +def TabularCategoricalBarPlots(dataset: VMDataset) -> Tuple[go.Figure, RawData]: """ Generates and visualizes bar plots for each category in categorical features to evaluate the dataset's composition. diff --git a/validmind/tests/data_validation/TabularDateTimeHistograms.py b/validmind/tests/data_validation/TabularDateTimeHistograms.py index 9d2a6ffca..b4774e889 100644 --- a/validmind/tests/data_validation/TabularDateTimeHistograms.py +++ b/validmind/tests/data_validation/TabularDateTimeHistograms.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + import pandas as pd import plotly.graph_objects as go @@ -12,7 +14,7 @@ @tags("time_series_data", "visualization") @tasks("classification", "regression") -def TabularDateTimeHistograms(dataset: VMDataset): +def TabularDateTimeHistograms(dataset: VMDataset) -> Tuple[go.Figure, RawData]: """ Generates histograms to provide graphical insight into the distribution of time intervals in a model's datetime data. diff --git a/validmind/tests/data_validation/TabularDescriptionTables.py b/validmind/tests/data_validation/TabularDescriptionTables.py index f568545e4..266bf1bf9 100644 --- a/validmind/tests/data_validation/TabularDescriptionTables.py +++ b/validmind/tests/data_validation/TabularDescriptionTables.py @@ -2,6 +2,9 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial + +from typing import Tuple + import pandas as pd from validmind import tags, tasks @@ -9,7 +12,7 @@ @tags("tabular_data") @tasks("classification", "regression") -def TabularDescriptionTables(dataset): +def TabularDescriptionTables(dataset) -> Tuple[pd.DataFrame]: """ Summarizes key descriptive statistics for numerical, categorical, and datetime variables in a dataset. diff --git a/validmind/tests/data_validation/TabularNumericalHistograms.py b/validmind/tests/data_validation/TabularNumericalHistograms.py index 438cdfd89..2121a9ed6 100644 --- a/validmind/tests/data_validation/TabularNumericalHistograms.py +++ b/validmind/tests/data_validation/TabularNumericalHistograms.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + import plotly.graph_objs as go from validmind import tags, tasks @@ -10,7 +12,7 @@ @tags("tabular_data", "visualization") @tasks("classification", "regression") -def TabularNumericalHistograms(dataset: VMDataset): +def TabularNumericalHistograms(dataset: VMDataset) -> Tuple[go.Figure]: """ Generates histograms for each numerical feature in a dataset to provide visual insights into data distribution and detect potential issues. diff --git a/validmind/tests/data_validation/TargetRateBarPlots.py b/validmind/tests/data_validation/TargetRateBarPlots.py index 094906fd7..5b860dc3d 100644 --- a/validmind/tests/data_validation/TargetRateBarPlots.py +++ b/validmind/tests/data_validation/TargetRateBarPlots.py @@ -2,6 +2,9 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial + +from typing import Tuple + import numpy as np import plotly.graph_objs as go from plotly.subplots import make_subplots @@ -13,7 +16,7 @@ @tags("tabular_data", "visualization", "categorical_data") @tasks("classification") -def TargetRateBarPlots(dataset: VMDataset): +def TargetRateBarPlots(dataset: VMDataset) -> Tuple[go.Figure, RawData]: """ Generates bar plots visualizing the default rates of categorical features for a classification machine learning model. diff --git a/validmind/tests/data_validation/TimeSeriesDescription.py b/validmind/tests/data_validation/TimeSeriesDescription.py index ad2fd08a1..0dc5e2c2d 100644 --- a/validmind/tests/data_validation/TimeSeriesDescription.py +++ b/validmind/tests/data_validation/TimeSeriesDescription.py @@ -9,7 +9,7 @@ @tags("time_series_data", "analysis") @tasks("regression") -def TimeSeriesDescription(dataset): +def TimeSeriesDescription(dataset) -> pd.DataFrame: """ Generates a detailed analysis for the provided time series dataset, summarizing key statistics to identify trends, patterns, and data quality issues. diff --git a/validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py b/validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py index b91f279dd..75d15eb87 100644 --- a/validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +++ b/validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py @@ -10,7 +10,7 @@ @tags("time_series_data", "analysis") @tasks("regression") -def TimeSeriesDescriptiveStatistics(dataset): +def TimeSeriesDescriptiveStatistics(dataset) -> pd.DataFrame: """ Evaluates the descriptive statistics of a time series dataset to identify trends, patterns, and data quality issues. diff --git a/validmind/tests/data_validation/TimeSeriesFrequency.py b/validmind/tests/data_validation/TimeSeriesFrequency.py index f89f96e09..5c26ef82a 100644 --- a/validmind/tests/data_validation/TimeSeriesFrequency.py +++ b/validmind/tests/data_validation/TimeSeriesFrequency.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Any, Dict, List, Tuple + import pandas as pd import plotly.graph_objects as go @@ -12,7 +14,9 @@ @tags("time_series_data") @tasks("regression") -def TimeSeriesFrequency(dataset: VMDataset): +def TimeSeriesFrequency( + dataset: VMDataset, +) -> Tuple[List[Dict[str, Any]], go.Figure, bool, RawData]: """ Evaluates consistency of time series data frequency and generates a frequency plot. diff --git a/validmind/tests/data_validation/TimeSeriesHistogram.py b/validmind/tests/data_validation/TimeSeriesHistogram.py index fd38e1eee..7a304357c 100644 --- a/validmind/tests/data_validation/TimeSeriesHistogram.py +++ b/validmind/tests/data_validation/TimeSeriesHistogram.py @@ -2,8 +2,11 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + import pandas as pd import plotly.express as px +import plotly.graph_objects as go from validmind import tags, tasks from validmind.logging import get_logger @@ -13,7 +16,7 @@ @tags("data_validation", "visualization", "time_series_data") @tasks("regression", "time_series_forecasting") -def TimeSeriesHistogram(dataset, nbins=30): +def TimeSeriesHistogram(dataset, nbins=30) -> Tuple[go.Figure]: """ Visualizes distribution of time-series data using histograms and Kernel Density Estimation (KDE) lines. diff --git a/validmind/tests/data_validation/TimeSeriesLinePlot.py b/validmind/tests/data_validation/TimeSeriesLinePlot.py index 4df6f1472..16f30d6ac 100644 --- a/validmind/tests/data_validation/TimeSeriesLinePlot.py +++ b/validmind/tests/data_validation/TimeSeriesLinePlot.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + import pandas as pd import plotly.graph_objects as go @@ -12,7 +14,7 @@ @tags("time_series_data", "visualization") @tasks("regression") -def TimeSeriesLinePlot(dataset: VMDataset): +def TimeSeriesLinePlot(dataset: VMDataset) -> Tuple[go.Figure]: """ Generates and analyses time-series data through line plots revealing trends, patterns, anomalies over time. diff --git a/validmind/tests/data_validation/TimeSeriesMissingValues.py b/validmind/tests/data_validation/TimeSeriesMissingValues.py index be7199387..b12a41bf2 100644 --- a/validmind/tests/data_validation/TimeSeriesMissingValues.py +++ b/validmind/tests/data_validation/TimeSeriesMissingValues.py @@ -2,9 +2,12 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Any, Dict, List, Tuple + import pandas as pd import plotly.express as px import plotly.figure_factory as ff +import plotly.graph_objects as go from validmind import RawData, tags, tasks from validmind.errors import SkipTestError @@ -13,7 +16,9 @@ @tags("time_series_data") @tasks("regression") -def TimeSeriesMissingValues(dataset: VMDataset, min_threshold: int = 1): +def TimeSeriesMissingValues( + dataset: VMDataset, min_threshold: int = 1 +) -> Tuple[List[Dict[str, Any]], go.Figure, go.Figure, bool, RawData]: """ Validates time-series data quality by confirming the count of missing values is below a certain threshold. diff --git a/validmind/tests/data_validation/TimeSeriesOutliers.py b/validmind/tests/data_validation/TimeSeriesOutliers.py index 6cf926e62..6718c4a18 100644 --- a/validmind/tests/data_validation/TimeSeriesOutliers.py +++ b/validmind/tests/data_validation/TimeSeriesOutliers.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import List, Tuple + import pandas as pd import plotly.graph_objects as go @@ -12,7 +14,9 @@ @tags("time_series_data") @tasks("regression") -def TimeSeriesOutliers(dataset: VMDataset, zscore_threshold: int = 3): +def TimeSeriesOutliers( + dataset: VMDataset, zscore_threshold: int = 3 +) -> Tuple[pd.DataFrame, List[go.Figure], bool, RawData]: """ Identifies and visualizes outliers in time-series data using the z-score method. diff --git a/validmind/tests/data_validation/TooManyZeroValues.py b/validmind/tests/data_validation/TooManyZeroValues.py index 539f3f424..adc04381d 100644 --- a/validmind/tests/data_validation/TooManyZeroValues.py +++ b/validmind/tests/data_validation/TooManyZeroValues.py @@ -2,13 +2,18 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial + +from typing import Any, Dict, List, Tuple + from validmind.tests import tags, tasks from validmind.vm_models import VMDataset @tags("tabular_data") @tasks("regression", "classification") -def TooManyZeroValues(dataset: VMDataset, max_percent_threshold: float = 0.03): +def TooManyZeroValues( + dataset: VMDataset, max_percent_threshold: float = 0.03 +) -> Tuple[List[Dict[str, Any]], bool]: """ Identifies numerical columns in a dataset that contain an excessive number of zero values, defined by a threshold percentage. diff --git a/validmind/tests/data_validation/UniqueRows.py b/validmind/tests/data_validation/UniqueRows.py index e6e0278bf..ac60ff0f3 100644 --- a/validmind/tests/data_validation/UniqueRows.py +++ b/validmind/tests/data_validation/UniqueRows.py @@ -2,13 +2,17 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Any, Dict, List, Tuple + from validmind import tags, tasks from validmind.vm_models import VMDataset @tags("tabular_data") @tasks("regression", "classification") -def UniqueRows(dataset: VMDataset, min_percent_threshold: float = 1): +def UniqueRows( + dataset: VMDataset, min_percent_threshold: float = 1 +) -> Tuple[List[Dict[str, Any]], bool]: """ Verifies the diversity of the dataset by ensuring that the count of unique rows exceeds a prescribed threshold. diff --git a/validmind/tests/data_validation/WOEBinPlots.py b/validmind/tests/data_validation/WOEBinPlots.py index 3a72a4ddc..ff974bdf9 100644 --- a/validmind/tests/data_validation/WOEBinPlots.py +++ b/validmind/tests/data_validation/WOEBinPlots.py @@ -2,6 +2,9 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial + +from typing import Tuple + import numpy as np import pandas as pd import plotly.express as px @@ -24,7 +27,7 @@ def WOEBinPlots( breaks_adj: list = None, fig_height: int = 600, fig_width: int = 500, -): +) -> Tuple[go.Figure, RawData]: """ Generates visualizations of Weight of Evidence (WoE) and Information Value (IV) for understanding predictive power of categorical variables in a data set. diff --git a/validmind/tests/data_validation/WOEBinTable.py b/validmind/tests/data_validation/WOEBinTable.py index b3ef04098..12726c00d 100644 --- a/validmind/tests/data_validation/WOEBinTable.py +++ b/validmind/tests/data_validation/WOEBinTable.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Dict, Tuple + import pandas as pd import scorecardpy as sc @@ -12,7 +14,9 @@ @tags("tabular_data", "categorical_data") @tasks("classification") -def WOEBinTable(dataset: VMDataset, breaks_adj: list = None): +def WOEBinTable( + dataset: VMDataset, breaks_adj: list = None +) -> Tuple[Dict[str, pd.DataFrame], RawData]: """ Assesses the Weight of Evidence (WoE) and Information Value (IV) of each feature to evaluate its predictive power in a binary classification model. diff --git a/validmind/tests/data_validation/ZivotAndrewsArch.py b/validmind/tests/data_validation/ZivotAndrewsArch.py index 07b689acc..34071e889 100644 --- a/validmind/tests/data_validation/ZivotAndrewsArch.py +++ b/validmind/tests/data_validation/ZivotAndrewsArch.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Any, Dict, Tuple + import pandas as pd from arch.unitroot import ZivotAndrews from numpy.linalg import LinAlgError @@ -16,7 +18,7 @@ @tags("time_series_data", "stationarity", "unit_root_test") @tasks("regression") -def ZivotAndrewsArch(dataset: VMDataset): +def ZivotAndrewsArch(dataset: VMDataset) -> Tuple[Dict[str, Any], RawData]: """ Evaluates the order of integration and stationarity of time series data using the Zivot-Andrews unit root test. diff --git a/validmind/tests/data_validation/nlp/CommonWords.py b/validmind/tests/data_validation/nlp/CommonWords.py index 52fe819f8..be9e50e0e 100644 --- a/validmind/tests/data_validation/nlp/CommonWords.py +++ b/validmind/tests/data_validation/nlp/CommonWords.py @@ -3,6 +3,7 @@ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial from collections import Counter +from typing import Tuple import nltk import plotly.graph_objects as go @@ -14,7 +15,7 @@ @tags("nlp", "text_data", "visualization", "frequency_analysis") @tasks("text_classification", "text_summarization") -def CommonWords(dataset: VMDataset): +def CommonWords(dataset: VMDataset) -> Tuple[go.Figure, RawData]: """ Assesses the most frequent non-stopwords in a text column for identifying prevalent language patterns. diff --git a/validmind/tests/data_validation/nlp/Hashtags.py b/validmind/tests/data_validation/nlp/Hashtags.py index 54d4d6c64..3e49462ef 100644 --- a/validmind/tests/data_validation/nlp/Hashtags.py +++ b/validmind/tests/data_validation/nlp/Hashtags.py @@ -3,6 +3,7 @@ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial import re +from typing import Tuple import plotly.graph_objects as go @@ -13,7 +14,7 @@ @tags("nlp", "text_data", "visualization", "frequency_analysis") @tasks("text_classification", "text_summarization") -def Hashtags(dataset: VMDataset, top_hashtags: int = 25): +def Hashtags(dataset: VMDataset, top_hashtags: int = 25) -> Tuple[go.Figure, RawData]: """ Assesses hashtag frequency in a text column, highlighting usage trends and potential dataset bias or spam. diff --git a/validmind/tests/data_validation/nlp/LanguageDetection.py b/validmind/tests/data_validation/nlp/LanguageDetection.py index 9b6608240..ffab229bc 100644 --- a/validmind/tests/data_validation/nlp/LanguageDetection.py +++ b/validmind/tests/data_validation/nlp/LanguageDetection.py @@ -2,7 +2,10 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + import plotly.express as px +import plotly.graph_objects as go from langdetect import LangDetectException, detect from validmind import RawData, tags, tasks @@ -10,7 +13,7 @@ @tags("nlp", "text_data", "visualization") @tasks("text_classification", "text_summarization") -def LanguageDetection(dataset): +def LanguageDetection(dataset) -> Tuple[go.Figure, RawData]: """ Assesses the diversity of languages in a textual dataset by detecting and visualizing the distribution of languages. diff --git a/validmind/tests/data_validation/nlp/Mentions.py b/validmind/tests/data_validation/nlp/Mentions.py index 9d69a05fc..18cdf8a8f 100644 --- a/validmind/tests/data_validation/nlp/Mentions.py +++ b/validmind/tests/data_validation/nlp/Mentions.py @@ -3,9 +3,11 @@ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial import re +from typing import Tuple import pandas as pd import plotly.express as px +import plotly.graph_objects as go from validmind import RawData, tags, tasks from validmind.errors import SkipTestError @@ -14,7 +16,7 @@ @tags("nlp", "text_data", "visualization", "frequency_analysis") @tasks("text_classification", "text_summarization") -def Mentions(dataset: VMDataset, top_mentions: int = 25): +def Mentions(dataset: VMDataset, top_mentions: int = 25) -> Tuple[go.Figure, RawData]: """ Calculates and visualizes frequencies of '@' prefixed mentions in a text-based dataset for NLP model analysis. diff --git a/validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py b/validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py index da4f816f6..11ef05936 100644 --- a/validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +++ b/validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py @@ -3,8 +3,11 @@ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Dict, Tuple + import pandas as pd import plotly.express as px +import plotly.graph_objects as go from textblob import TextBlob from validmind import RawData, tags, tasks @@ -12,7 +15,9 @@ @tags("nlp", "text_data", "data_validation") @tasks("nlp") -def PolarityAndSubjectivity(dataset, threshold_subjectivity=0.5, threshold_polarity=0): +def PolarityAndSubjectivity( + dataset, threshold_subjectivity=0.5, threshold_polarity=0 +) -> Tuple[go.Figure, Dict[str, pd.DataFrame], RawData]: """ Analyzes the polarity and subjectivity of text data within a given dataset to visualize the sentiment distribution. diff --git a/validmind/tests/data_validation/nlp/Punctuations.py b/validmind/tests/data_validation/nlp/Punctuations.py index 352b28a0e..392c107f8 100644 --- a/validmind/tests/data_validation/nlp/Punctuations.py +++ b/validmind/tests/data_validation/nlp/Punctuations.py @@ -8,6 +8,7 @@ import string from collections import defaultdict +from typing import Tuple import plotly.graph_objects as go @@ -16,7 +17,7 @@ @tags("nlp", "text_data", "visualization", "frequency_analysis") @tasks("text_classification", "text_summarization", "nlp") -def Punctuations(dataset, count_mode="token"): +def Punctuations(dataset, count_mode="token") -> Tuple[go.Figure, RawData]: """ Analyzes and visualizes the frequency distribution of punctuation usage in a given text dataset. diff --git a/validmind/tests/data_validation/nlp/Sentiment.py b/validmind/tests/data_validation/nlp/Sentiment.py index 1c8ef1932..8df9b88a4 100644 --- a/validmind/tests/data_validation/nlp/Sentiment.py +++ b/validmind/tests/data_validation/nlp/Sentiment.py @@ -3,6 +3,8 @@ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + import matplotlib.pyplot as plt import nltk import seaborn as sns @@ -13,7 +15,7 @@ @tags("nlp", "text_data", "data_validation") @tasks("nlp") -def Sentiment(dataset): +def Sentiment(dataset) -> Tuple[plt.Figure, RawData]: """ Analyzes the sentiment of text data within a dataset using the VADER sentiment analysis tool. diff --git a/validmind/tests/data_validation/nlp/StopWords.py b/validmind/tests/data_validation/nlp/StopWords.py index ac3236294..789ef79e5 100644 --- a/validmind/tests/data_validation/nlp/StopWords.py +++ b/validmind/tests/data_validation/nlp/StopWords.py @@ -7,6 +7,7 @@ """ from collections import defaultdict +from typing import Dict, Tuple import nltk import pandas as pd @@ -21,7 +22,7 @@ @tasks("text_classification", "text_summarization") def StopWords( dataset: VMDataset, min_percent_threshold: float = 0.5, num_words: int = 25 -): +) -> Tuple[Dict[str, pd.DataFrame], go.Figure, bool, RawData]: """ Evaluates and visualizes the frequency of English stop words in a text dataset against a defined threshold. diff --git a/validmind/tests/data_validation/nlp/TextDescription.py b/validmind/tests/data_validation/nlp/TextDescription.py index 69f3c17eb..e6cde5c7d 100644 --- a/validmind/tests/data_validation/nlp/TextDescription.py +++ b/validmind/tests/data_validation/nlp/TextDescription.py @@ -3,10 +3,12 @@ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial import string +from typing import Tuple import nltk import pandas as pd import plotly.express as px +import plotly.graph_objects as go from nltk.corpus import stopwords from validmind import RawData, tags, tasks @@ -94,7 +96,7 @@ def TextDescription( "``", }, lang: str = "english", -): +) -> Tuple[go.Figure, RawData]: """ Conducts comprehensive textual analysis on a dataset using NLTK to evaluate various parameters and generate visualizations. diff --git a/validmind/tests/data_validation/nlp/Toxicity.py b/validmind/tests/data_validation/nlp/Toxicity.py index cae2a2ff3..b6ad32beb 100644 --- a/validmind/tests/data_validation/nlp/Toxicity.py +++ b/validmind/tests/data_validation/nlp/Toxicity.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + import evaluate import matplotlib.pyplot as plt import seaborn as sns @@ -11,7 +13,7 @@ @tags("nlp", "text_data", "data_validation") @tasks("nlp") -def Toxicity(dataset): +def Toxicity(dataset) -> Tuple[plt.Figure, RawData]: """ Assesses the toxicity of text data within a dataset to visualize the distribution of toxicity scores. diff --git a/validmind/tests/load.py b/validmind/tests/load.py index 697c64219..f1b1e7b84 100644 --- a/validmind/tests/load.py +++ b/validmind/tests/load.py @@ -7,7 +7,17 @@ import inspect import json from pprint import pformat -from typing import Any, Callable, Dict, List, Optional, Tuple, Union +from typing import ( + Any, + Callable, + Dict, + List, + Optional, + Tuple, + Union, + get_args, + get_origin, +) from uuid import uuid4 import pandas as pd @@ -18,12 +28,31 @@ from ..logging import get_logger from ..utils import display, format_dataframe, fuzzy_match, md_to_html, test_id_to_name from ..vm_models import VMDataset, VMModel +from ..vm_models.figure import Figure +from ..vm_models.result import ResultTable from .__types__ import TestID from ._store import test_provider_store, test_store logger = get_logger(__name__) +try: + from matplotlib.figure import Figure as MatplotlibFigure +except ImportError: + MatplotlibFigure = None + +try: + from plotly.graph_objects import Figure as PlotlyFigure +except ImportError: + PlotlyFigure = None + +FIGURE_TYPES = tuple( + item for item in (Figure, MatplotlibFigure, PlotlyFigure) if inspect.isclass(item) +) +TABLE_TYPES = (pd.DataFrame, ResultTable) +GENERIC_TABLE_TYPES = (list, dict) + + INPUT_TYPE_MAP = { "dataset": VMDataset, "datasets": List[VMDataset], @@ -32,6 +61,45 @@ } +def _inspect_return_type(annotation: Any) -> Tuple[bool, bool]: + """ + Inspects a return type annotation to determine if it contains a Figure or Table. + + Returns a tuple (has_figure, has_table). + """ + has_figure = False + has_table = False + + origin = get_origin(annotation) + args = get_args(annotation) + + # A Union means the return type could be one of several types. + # A tuple in a type hint means multiple return values. + # We recursively inspect the arguments of Union and tuple. + if origin is Union or origin is tuple: + for arg in args: + fig, table = _inspect_return_type(arg) + has_figure |= fig + has_table |= table + return has_figure, has_table + + check_type = origin if origin is not None else annotation + + if not inspect.isclass(check_type): + return has_figure, has_table # Can't do issubclass on non-class like Any + + if FIGURE_TYPES and issubclass(check_type, FIGURE_TYPES): + has_figure = True + + if TABLE_TYPES and issubclass(check_type, TABLE_TYPES): + has_table = True + + if check_type in GENERIC_TABLE_TYPES: + has_table = True + + return has_figure, has_table + + def _inspect_signature( test_func: Callable[..., Any], ) -> Tuple[Dict[str, Dict[str, Any]], Dict[str, Dict[str, Any]]]: @@ -173,23 +241,29 @@ def _pretty_list_tests( tests: Dict[str, Callable[..., Any]], truncate: bool = True ) -> None: """Pretty print a list of tests""" - table = [ - { - "ID": test_id, - "Name": test_id_to_name(test_id), - "Description": _test_description( - inspect.getdoc(test), - num_lines=(5 if truncate else 999999), - ), - "Required Inputs": list(test.inputs.keys()), - "Params": test.params, - "Tags": test.__tags__, - "Tasks": test.__tasks__, - } - for test_id, test in tests.items() - ] + rows = [] + for test_id, test in tests.items(): + has_figure, has_table = _inspect_return_type( + inspect.signature(test).return_annotation + ) + rows.append( + { + "ID": test_id, + "Name": test_id_to_name(test_id), + "Description": _test_description( + inspect.getdoc(test), + num_lines=(5 if truncate else 999999), + ), + "Has Figure": has_figure, + "Has Table": has_table, + "Required Inputs": list(test.inputs.keys()), + "Params": test.params, + "Tags": test.__tags__, + "Tasks": test.__tasks__, + } + ) - return format_dataframe(pd.DataFrame(table)) + return format_dataframe(pd.DataFrame(rows)) def list_tags() -> List[str]: diff --git a/validmind/tests/model_validation/BertScore.py b/validmind/tests/model_validation/BertScore.py index 72f04243d..b26184016 100644 --- a/validmind/tests/model_validation/BertScore.py +++ b/validmind/tests/model_validation/BertScore.py @@ -2,21 +2,24 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + import evaluate import pandas as pd import plotly.graph_objects as go from validmind import RawData, tags, tasks from validmind.tests.utils import validate_prediction +from validmind.vm_models import VMDataset, VMModel @tags("nlp", "text_data", "visualization") @tasks("text_classification", "text_summarization") def BertScore( - dataset, - model, + dataset: VMDataset, + model: VMModel, evaluation_model="distilbert-base-uncased", -): +) -> Tuple[pd.DataFrame, go.Figure, RawData]: """ Assesses the quality of machine-generated text using BERTScore metrics and visualizes results through histograms and bar charts, alongside compiling a comprehensive table of descriptive statistics. diff --git a/validmind/tests/model_validation/BleuScore.py b/validmind/tests/model_validation/BleuScore.py index 8d32fe0c1..5b51795ee 100644 --- a/validmind/tests/model_validation/BleuScore.py +++ b/validmind/tests/model_validation/BleuScore.py @@ -2,17 +2,22 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + import evaluate import pandas as pd import plotly.graph_objects as go from validmind import RawData, tags, tasks from validmind.tests.utils import validate_prediction +from validmind.vm_models import VMDataset, VMModel @tags("nlp", "text_data", "visualization") @tasks("text_classification", "text_summarization") -def BleuScore(dataset, model): +def BleuScore( + dataset: VMDataset, model: VMModel +) -> Tuple[pd.DataFrame, go.Figure, RawData]: """ Evaluates the quality of machine-generated text using BLEU metrics and visualizes the results through histograms and bar charts, alongside compiling a comprehensive table of descriptive statistics for BLEU scores. diff --git a/validmind/tests/model_validation/ClusterSizeDistribution.py b/validmind/tests/model_validation/ClusterSizeDistribution.py index 4cddbe40f..7bb1443a4 100644 --- a/validmind/tests/model_validation/ClusterSizeDistribution.py +++ b/validmind/tests/model_validation/ClusterSizeDistribution.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + import pandas as pd import plotly.graph_objects as go @@ -11,7 +13,9 @@ @tags("sklearn", "model_performance") @tasks("clustering") -def ClusterSizeDistribution(dataset: VMDataset, model: VMModel): +def ClusterSizeDistribution( + dataset: VMDataset, model: VMModel +) -> Tuple[go.Figure, RawData]: """ Assesses the performance of clustering models by comparing the distribution of cluster sizes in model predictions with the actual data. diff --git a/validmind/tests/model_validation/ContextualRecall.py b/validmind/tests/model_validation/ContextualRecall.py index 5930e9f63..9ce407ecc 100644 --- a/validmind/tests/model_validation/ContextualRecall.py +++ b/validmind/tests/model_validation/ContextualRecall.py @@ -2,17 +2,22 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + import nltk import pandas as pd import plotly.graph_objects as go from validmind import RawData, tags, tasks from validmind.tests.utils import validate_prediction +from validmind.vm_models import VMDataset, VMModel @tags("nlp", "text_data", "visualization") @tasks("text_classification", "text_summarization") -def ContextualRecall(dataset, model): +def ContextualRecall( + dataset: VMDataset, model: VMModel +) -> Tuple[pd.DataFrame, go.Figure, RawData]: """ Evaluates a Natural Language Generation model's ability to generate contextually relevant and factually correct text, visualizing the results through histograms and bar charts, alongside compiling a comprehensive table of diff --git a/validmind/tests/model_validation/FeaturesAUC.py b/validmind/tests/model_validation/FeaturesAUC.py index 44c218c8f..403c7a938 100644 --- a/validmind/tests/model_validation/FeaturesAUC.py +++ b/validmind/tests/model_validation/FeaturesAUC.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + import numpy as np import pandas as pd import plotly.graph_objects as go @@ -17,7 +19,9 @@ @tags("feature_importance", "AUC", "visualization") @tasks("classification") -def FeaturesAUC(dataset: VMDataset, fontsize: int = 12, figure_height: int = 500): +def FeaturesAUC( + dataset: VMDataset, fontsize: int = 12, figure_height: int = 500 +) -> Tuple[go.Figure, RawData]: """ Evaluates the discriminatory power of each individual feature within a binary classification model by calculating the Area Under the Curve (AUC) for each feature separately. diff --git a/validmind/tests/model_validation/MeteorScore.py b/validmind/tests/model_validation/MeteorScore.py index 10ed5b7fb..bc735fc15 100644 --- a/validmind/tests/model_validation/MeteorScore.py +++ b/validmind/tests/model_validation/MeteorScore.py @@ -2,17 +2,22 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + import evaluate import pandas as pd import plotly.graph_objects as go from validmind import RawData, tags, tasks from validmind.tests.utils import validate_prediction +from validmind.vm_models import VMDataset, VMModel @tags("nlp", "text_data", "visualization") @tasks("text_classification", "text_summarization") -def MeteorScore(dataset, model): +def MeteorScore( + dataset: VMDataset, model: VMModel +) -> Tuple[pd.DataFrame, go.Figure, RawData]: """ Assesses the quality of machine-generated translations by comparing them to human-produced references using the METEOR score, which evaluates precision, recall, and word order. diff --git a/validmind/tests/model_validation/ModelMetadata.py b/validmind/tests/model_validation/ModelMetadata.py index 5b3914877..11082684c 100644 --- a/validmind/tests/model_validation/ModelMetadata.py +++ b/validmind/tests/model_validation/ModelMetadata.py @@ -6,11 +6,12 @@ from validmind import tags, tasks from validmind.utils import get_model_info +from validmind.vm_models import VMModel @tags("model_training", "metadata") @tasks("regression", "time_series_forecasting") -def ModelMetadata(model): +def ModelMetadata(model: VMModel) -> pd.DataFrame: """ Compare metadata of different models and generate a summary table with the results. diff --git a/validmind/tests/model_validation/ModelPredictionResiduals.py b/validmind/tests/model_validation/ModelPredictionResiduals.py index 26a4f837c..e0db2a1e2 100644 --- a/validmind/tests/model_validation/ModelPredictionResiduals.py +++ b/validmind/tests/model_validation/ModelPredictionResiduals.py @@ -2,18 +2,26 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Optional, Tuple + import pandas as pd import plotly.graph_objects as go from scipy.stats import kstest from validmind import RawData, tags, tasks +from validmind.vm_models import VMDataset, VMModel @tags("regression") @tasks("residual_analysis", "visualization") def ModelPredictionResiduals( - dataset, model, nbins=100, p_value_threshold=0.05, start_date=None, end_date=None -): + dataset: VMDataset, + model: VMModel, + nbins: int = 100, + p_value_threshold: float = 0.05, + start_date: Optional[str] = None, + end_date: Optional[str] = None, +) -> Tuple[pd.DataFrame, go.Figure, RawData]: """ Assesses normality and behavior of residuals in regression models through visualization and statistical tests. diff --git a/validmind/tests/model_validation/RegardScore.py b/validmind/tests/model_validation/RegardScore.py index 6a362f09e..119eafe04 100644 --- a/validmind/tests/model_validation/RegardScore.py +++ b/validmind/tests/model_validation/RegardScore.py @@ -2,17 +2,23 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + import evaluate import pandas as pd import plotly.graph_objects as go from validmind import RawData, tags, tasks from validmind.tests.utils import validate_prediction +from validmind.vm_models import VMDataset, VMModel @tags("nlp", "text_data", "visualization") @tasks("text_classification", "text_summarization") -def RegardScore(dataset, model): +def RegardScore( + dataset: VMDataset, + model: VMModel, +) -> Tuple[pd.DataFrame, go.Figure, RawData]: """ Assesses the sentiment and potential biases in text generated by NLP models by computing and visualizing regard scores. diff --git a/validmind/tests/model_validation/RegressionResidualsPlot.py b/validmind/tests/model_validation/RegressionResidualsPlot.py index 813f6bb62..f8afbf43a 100644 --- a/validmind/tests/model_validation/RegressionResidualsPlot.py +++ b/validmind/tests/model_validation/RegressionResidualsPlot.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + import numpy as np import plotly.figure_factory as ff import plotly.graph_objects as go @@ -12,7 +14,9 @@ @tags("model_performance", "visualization") @tasks("regression") -def RegressionResidualsPlot(model: VMModel, dataset: VMDataset, bin_size: float = 0.1): +def RegressionResidualsPlot( + model: VMModel, dataset: VMDataset, bin_size: float = 0.1 +) -> Tuple[go.Figure, go.Figure, RawData]: """ Evaluates regression model performance using residual distribution and actual vs. predicted plots. diff --git a/validmind/tests/model_validation/RougeScore.py b/validmind/tests/model_validation/RougeScore.py index 82a046143..e83b0f835 100644 --- a/validmind/tests/model_validation/RougeScore.py +++ b/validmind/tests/model_validation/RougeScore.py @@ -2,16 +2,23 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + import pandas as pd import plotly.graph_objects as go from rouge import Rouge from validmind import RawData, tags, tasks +from validmind.vm_models import VMDataset, VMModel @tags("nlp", "text_data", "visualization") @tasks("text_classification", "text_summarization") -def RougeScore(dataset, model, metric="rouge-1"): +def RougeScore( + dataset: VMDataset, + model: VMModel, + metric: str = "rouge-1", +) -> Tuple[pd.DataFrame, go.Figure, RawData]: """ Assesses the quality of machine-generated text using ROUGE metrics and visualizes the results to provide comprehensive performance insights. diff --git a/validmind/tests/model_validation/TimeSeriesPredictionWithCI.py b/validmind/tests/model_validation/TimeSeriesPredictionWithCI.py index 6edaed08f..c5f28b96d 100644 --- a/validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +++ b/validmind/tests/model_validation/TimeSeriesPredictionWithCI.py @@ -2,17 +2,24 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + import numpy as np import pandas as pd import plotly.graph_objects as go from scipy.stats import norm from validmind import RawData, tags, tasks +from validmind.vm_models import VMDataset, VMModel @tags("model_predictions", "visualization") @tasks("regression", "time_series_forecasting") -def TimeSeriesPredictionWithCI(dataset, model, confidence=0.95): +def TimeSeriesPredictionWithCI( + dataset: VMDataset, + model: VMModel, + confidence: float = 0.95, +) -> Tuple[pd.DataFrame, go.Figure, RawData]: """ Assesses predictive accuracy and uncertainty in time series models, highlighting breaches beyond confidence intervals. diff --git a/validmind/tests/model_validation/TimeSeriesPredictionsPlot.py b/validmind/tests/model_validation/TimeSeriesPredictionsPlot.py index 9a2239fe6..d6260b8c8 100644 --- a/validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +++ b/validmind/tests/model_validation/TimeSeriesPredictionsPlot.py @@ -2,14 +2,20 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + import plotly.graph_objects as go from validmind import RawData, tags, tasks +from validmind.vm_models import VMDataset, VMModel @tags("model_predictions", "visualization") @tasks("regression", "time_series_forecasting") -def TimeSeriesPredictionsPlot(dataset, model): +def TimeSeriesPredictionsPlot( + dataset: VMDataset, + model: VMModel, +) -> Tuple[go.Figure, RawData]: """ Plot actual vs predicted values for time series data and generate a visual comparison for the model. diff --git a/validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py b/validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py index 3de28df23..740c5e3cb 100644 --- a/validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +++ b/validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py @@ -2,17 +2,22 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Optional, Tuple import pandas as pd import plotly.express as px +import plotly.graph_objects as go from sklearn import metrics from validmind import RawData, tags, tasks +from validmind.vm_models import VMDataset, VMModel @tags("model_performance", "sklearn") @tasks("regression", "time_series_forecasting") -def TimeSeriesR2SquareBySegments(dataset, model, segments=None): +def TimeSeriesR2SquareBySegments( + dataset: VMDataset, model: VMModel, segments: Optional[int] = None +) -> Tuple[pd.DataFrame, go.Figure, RawData]: """ Evaluates the R-Squared values of regression models over specified time segments in time series data to assess segment-wise model performance. diff --git a/validmind/tests/model_validation/TokenDisparity.py b/validmind/tests/model_validation/TokenDisparity.py index 5d6961a2e..7d9709340 100644 --- a/validmind/tests/model_validation/TokenDisparity.py +++ b/validmind/tests/model_validation/TokenDisparity.py @@ -2,15 +2,20 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + import pandas as pd import plotly.graph_objects as go from validmind import RawData, tags, tasks +from validmind.vm_models import VMDataset, VMModel @tags("nlp", "text_data", "visualization") @tasks("text_classification", "text_summarization") -def TokenDisparity(dataset, model): +def TokenDisparity( + dataset: VMDataset, model: VMModel +) -> Tuple[pd.DataFrame, go.Figure, RawData]: """ Evaluates the token disparity between reference and generated texts, visualizing the results through histograms and bar charts, alongside compiling a comprehensive table of descriptive statistics for token counts. diff --git a/validmind/tests/model_validation/ToxicityScore.py b/validmind/tests/model_validation/ToxicityScore.py index 37a5a5ce8..62519d522 100644 --- a/validmind/tests/model_validation/ToxicityScore.py +++ b/validmind/tests/model_validation/ToxicityScore.py @@ -2,16 +2,21 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + import evaluate import pandas as pd import plotly.graph_objects as go from validmind import RawData, tags, tasks +from validmind.vm_models import VMDataset, VMModel @tags("nlp", "text_data", "visualization") @tasks("text_classification", "text_summarization") -def ToxicityScore(dataset, model): +def ToxicityScore( + dataset: VMDataset, model: VMModel +) -> Tuple[pd.DataFrame, go.Figure, RawData]: """ Assesses the toxicity levels of texts generated by NLP models to identify and mitigate harmful or offensive content. diff --git a/validmind/tests/model_validation/embeddings/ClusterDistribution.py b/validmind/tests/model_validation/embeddings/ClusterDistribution.py index 7c604d527..107a9dc83 100644 --- a/validmind/tests/model_validation/embeddings/ClusterDistribution.py +++ b/validmind/tests/model_validation/embeddings/ClusterDistribution.py @@ -2,7 +2,10 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + import plotly.express as px +import plotly.graph_objects as go from sklearn.cluster import KMeans from validmind import RawData, tags, tasks @@ -11,7 +14,9 @@ @tags("llm", "text_data", "embeddings", "visualization") @tasks("feature_extraction") -def ClusterDistribution(model: VMModel, dataset: VMDataset, num_clusters: int = 5): +def ClusterDistribution( + model: VMModel, dataset: VMDataset, num_clusters: int = 5 +) -> Tuple[go.Figure, RawData]: """ Assesses the distribution of text embeddings across clusters produced by a model using KMeans clustering. diff --git a/validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py b/validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py index 27e919d61..c04cb4ed8 100644 --- a/validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +++ b/validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py @@ -3,18 +3,23 @@ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial from itertools import combinations +from typing import List, Tuple import numpy as np import pandas as pd import plotly.express as px +import plotly.graph_objects as go from sklearn.metrics.pairwise import cosine_similarity from validmind import RawData, tags, tasks +from validmind.vm_models import VMDataset, VMModel @tags("visualization", "dimensionality_reduction", "embeddings") @tasks("text_qa", "text_generation", "text_summarization") -def CosineSimilarityComparison(dataset, models): +def CosineSimilarityComparison( + dataset: VMDataset, models: List[VMModel] +) -> Tuple[go.Figure, RawData, pd.DataFrame]: """ Assesses the similarity between embeddings generated by different models using Cosine Similarity, providing both statistical and visual insights. diff --git a/validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py b/validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py index 723a54028..f0e26f1e6 100644 --- a/validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +++ b/validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py @@ -2,7 +2,10 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + import plotly.express as px +import plotly.graph_objects as go from sklearn.metrics.pairwise import cosine_similarity from validmind import RawData, tags, tasks @@ -11,7 +14,9 @@ @tags("llm", "text_data", "embeddings", "visualization") @tasks("feature_extraction") -def CosineSimilarityDistribution(dataset: VMDataset, model: VMModel): +def CosineSimilarityDistribution( + dataset: VMDataset, model: VMModel +) -> Tuple[go.Figure, RawData]: """ Assesses the similarity between predicted text embeddings from a model using a Cosine Similarity distribution histogram. diff --git a/validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py b/validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py index ae0e8fe8c..383b5c7ec 100644 --- a/validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +++ b/validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py @@ -2,24 +2,28 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + import numpy as np import plotly.express as px +import plotly.graph_objects as go from sklearn.metrics.pairwise import cosine_similarity from validmind import RawData, tags, tasks +from validmind.vm_models import VMDataset, VMModel @tags("visualization", "dimensionality_reduction", "embeddings") @tasks("text_qa", "text_generation", "text_summarization") def CosineSimilarityHeatmap( - dataset, - model, + dataset: VMDataset, + model: VMModel, title="Cosine Similarity Matrix", color="Cosine Similarity", xaxis_title="Index", yaxis_title="Index", color_scale="Blues", -): +) -> Tuple[go.Figure, RawData]: """ Generates an interactive heatmap to visualize the cosine similarities among embeddings derived from a given model. diff --git a/validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py b/validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py index 91a5ec851..ea551955b 100644 --- a/validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +++ b/validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py @@ -2,8 +2,11 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + import numpy as np import plotly.express as px +import plotly.graph_objects as go from validmind import RawData, tags, tasks from validmind.vm_models import VMDataset, VMModel @@ -11,7 +14,9 @@ @tags("llm", "text_data", "embeddings", "visualization") @tasks("feature_extraction") -def DescriptiveAnalytics(dataset: VMDataset, model: VMModel): +def DescriptiveAnalytics( + dataset: VMDataset, model: VMModel +) -> Tuple[go.Figure, go.Figure, go.Figure, RawData]: """ Evaluates statistical properties of text embeddings in an ML model via mean, median, and standard deviation histograms. diff --git a/validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py b/validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py index f2a254f45..89dee1bc6 100644 --- a/validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +++ b/validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py @@ -2,9 +2,10 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial -from typing import Union +from typing import Tuple, Union import plotly.express as px +import plotly.graph_objects as go from sklearn.manifold import TSNE from validmind import RawData, tags, tasks @@ -17,11 +18,11 @@ @tags("llm", "text_data", "embeddings", "visualization") @tasks("feature_extraction") def EmbeddingsVisualization2D( - model: VMModel, dataset: VMDataset, + model: VMModel, cluster_column: Union[str, None] = None, perplexity: int = 30, -): +) -> Tuple[go.Figure, RawData]: """ Visualizes 2D representation of text embeddings generated by a model using t-SNE technique. diff --git a/validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py b/validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py index eaca371c7..2d272990d 100644 --- a/validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +++ b/validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py @@ -3,18 +3,23 @@ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial from itertools import combinations +from typing import List, Tuple import numpy as np import pandas as pd import plotly.express as px +import plotly.graph_objects as go from sklearn.metrics.pairwise import euclidean_distances from validmind import RawData, tags, tasks +from validmind.vm_models import VMDataset, VMModel @tags("visualization", "dimensionality_reduction", "embeddings") @tasks("text_qa", "text_generation", "text_summarization") -def EuclideanDistanceComparison(dataset, models): +def EuclideanDistanceComparison( + dataset: VMDataset, models: List[VMModel] +) -> Tuple[pd.DataFrame, go.Figure, RawData]: """ Assesses and visualizes the dissimilarity between model embeddings using Euclidean distance, providing insights into model behavior and potential redundancy or diversity. diff --git a/validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py b/validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py index da68b0764..49352ed2f 100644 --- a/validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +++ b/validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py @@ -2,24 +2,28 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + import numpy as np import plotly.express as px +import plotly.graph_objects as go from sklearn.metrics.pairwise import euclidean_distances from validmind import RawData, tags, tasks +from validmind.vm_models import VMDataset, VMModel @tags("visualization", "dimensionality_reduction", "embeddings") @tasks("text_qa", "text_generation", "text_summarization") def EuclideanDistanceHeatmap( - dataset, - model, + dataset: VMDataset, + model: VMModel, title="Euclidean Distance Matrix", color="Euclidean Distance", xaxis_title="Index", yaxis_title="Index", color_scale="Blues", -): +) -> Tuple[go.Figure, RawData]: """ Generates an interactive heatmap to visualize the Euclidean distances among embeddings derived from a given model. diff --git a/validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py b/validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py index a62df5d72..a3cb21813 100644 --- a/validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +++ b/validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py @@ -3,19 +3,24 @@ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial import itertools +from typing import Tuple import numpy as np import pandas as pd import plotly.express as px +import plotly.graph_objects as go from sklearn.decomposition import PCA from sklearn.preprocessing import StandardScaler from validmind import RawData, tags, tasks +from validmind.vm_models import VMDataset, VMModel @tags("visualization", "dimensionality_reduction", "embeddings") @tasks("text_qa", "text_generation", "text_summarization") -def PCAComponentsPairwisePlots(dataset, model, n_components=3): +def PCAComponentsPairwisePlots( + dataset: VMDataset, model: VMModel, n_components: int = 3 +) -> Tuple[go.Figure, RawData]: """ Generates scatter plots for pairwise combinations of principal component analysis (PCA) components of model embeddings. diff --git a/validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py b/validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py index c39e92def..34cc47218 100644 --- a/validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +++ b/validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py @@ -3,7 +3,10 @@ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial import re -from typing import Dict +from typing import Dict, Tuple + +import pandas as pd +import plotly.graph_objects as go from validmind import RawData, tags, tasks from validmind.vm_models import VMDataset, VMModel @@ -18,7 +21,7 @@ def StabilityAnalysisKeyword( model: VMModel, keyword_dict: Dict[str, str], mean_similarity_threshold: float = 0.7, -): +) -> Tuple[go.Figure, pd.DataFrame, RawData]: """ Evaluates robustness of embedding models to keyword swaps in the test dataset. diff --git a/validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py b/validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py index 92e250755..8b3103b0d 100644 --- a/validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +++ b/validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py @@ -4,6 +4,10 @@ import random import string +from typing import Tuple + +import pandas as pd +import plotly.graph_objects as go from validmind import RawData, tags, tasks from validmind.vm_models import VMDataset, VMModel @@ -69,7 +73,7 @@ def StabilityAnalysisRandomNoise( model: VMModel, probability: float = 0.02, mean_similarity_threshold: float = 0.7, -): +) -> Tuple[go.Figure, pd.DataFrame, RawData]: """ Assesses the robustness of text embeddings models to random noise introduced via text perturbations. diff --git a/validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py b/validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py index a8f14f081..5b4c822a2 100644 --- a/validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +++ b/validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py @@ -3,8 +3,11 @@ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial import random +from typing import Tuple import nltk +import pandas as pd +import plotly.graph_objects as go from nltk.corpus import wordnet as wn from validmind import RawData, tags, tasks @@ -20,7 +23,7 @@ def StabilityAnalysisSynonyms( model: VMModel, probability: float = 0.02, mean_similarity_threshold: float = 0.7, -): +) -> Tuple[go.Figure, pd.DataFrame, RawData]: """ Evaluates the stability of text embeddings models when words in test data are replaced by their synonyms randomly. diff --git a/validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py b/validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py index 6dce9a58a..877572a11 100644 --- a/validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +++ b/validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py @@ -2,6 +2,10 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + +import pandas as pd +import plotly.graph_objects as go from transformers import MarianMTModel, MarianTokenizer from validmind import RawData, tags, tasks @@ -21,7 +25,7 @@ def StabilityAnalysisTranslation( source_lang: str = "en", target_lang: str = "fr", mean_similarity_threshold: float = 0.7, -): +) -> Tuple[go.Figure, pd.DataFrame, RawData]: """ Evaluates robustness of text embeddings models to noise introduced by translating the original text to another language and back. diff --git a/validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py b/validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py index 816004a0b..2e81c2637 100644 --- a/validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +++ b/validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py @@ -3,25 +3,28 @@ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial import itertools +from typing import Tuple import numpy as np import pandas as pd import plotly.express as px +import plotly.graph_objects as go from sklearn.manifold import TSNE from sklearn.preprocessing import StandardScaler from validmind import RawData, tags, tasks +from validmind.vm_models import VMDataset, VMModel @tags("visualization", "dimensionality_reduction", "embeddings") @tasks("text_qa", "text_generation", "text_summarization") def TSNEComponentsPairwisePlots( - dataset, - model, - n_components=2, - perplexity=30, - title="t-SNE", -): + dataset: VMDataset, + model: VMModel, + n_components: int = 2, + perplexity: int = 30, + title: str = "t-SNE", +) -> Tuple[go.Figure, RawData]: """ Creates scatter plots for pairwise combinations of t-SNE components to visualize embeddings and highlight potential clustering structures. diff --git a/validmind/tests/model_validation/ragas/AnswerCorrectness.py b/validmind/tests/model_validation/ragas/AnswerCorrectness.py index 51936ab41..fa96df436 100644 --- a/validmind/tests/model_validation/ragas/AnswerCorrectness.py +++ b/validmind/tests/model_validation/ragas/AnswerCorrectness.py @@ -3,12 +3,15 @@ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial import warnings +from typing import Dict, Tuple import plotly.express as px +import plotly.graph_objects as go from datasets import Dataset from validmind import RawData, tags, tasks from validmind.errors import MissingDependencyError +from validmind.vm_models import VMDataset from .utils import get_ragas_config, get_renamed_columns @@ -30,13 +33,13 @@ @tags("ragas", "llm") @tasks("text_qa", "text_generation", "text_summarization") def AnswerCorrectness( - dataset, - user_input_column="user_input", - response_column="response", - reference_column="reference", + dataset: VMDataset, + user_input_column: str = "user_input", + response_column: str = "response", + reference_column: str = "reference", judge_llm=None, judge_embeddings=None, -): +) -> Tuple[Dict[str, list], go.Figure, go.Figure, RawData]: """ Evaluates the correctness of answers in a dataset with respect to the provided ground truths and visualizes the results in a histogram. diff --git a/validmind/tests/model_validation/ragas/AspectCritic.py b/validmind/tests/model_validation/ragas/AspectCritic.py index 8eb3e921b..4eba617b5 100644 --- a/validmind/tests/model_validation/ragas/AspectCritic.py +++ b/validmind/tests/model_validation/ragas/AspectCritic.py @@ -3,12 +3,15 @@ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial import warnings +from typing import Dict, List, Optional, Tuple import plotly.express as px +import plotly.graph_objects as go from datasets import Dataset from validmind import RawData, tags, tasks from validmind.errors import MissingDependencyError +from validmind.vm_models import VMDataset from .utils import get_ragas_config, get_renamed_columns @@ -39,21 +42,21 @@ @tags("ragas", "llm", "qualitative") @tasks("text_summarization", "text_generation", "text_qa") def AspectCritic( - dataset, - user_input_column="user_input", - response_column="response", - retrieved_contexts_column=None, - aspects: list = [ + dataset: VMDataset, + user_input_column: str = "user_input", + response_column: str = "response", + retrieved_contexts_column: Optional[str] = None, + aspects: List[str] = [ "coherence", "conciseness", "correctness", "harmfulness", "maliciousness", ], - additional_aspects: list = None, + additional_aspects: Optional[List[Tuple[str, str]]] = None, judge_llm=None, judge_embeddings=None, -): +) -> Tuple[Dict[str, list], go.Figure, RawData]: """ Evaluates generations against the following aspects: harmfulness, maliciousness, coherence, correctness, and conciseness. diff --git a/validmind/tests/model_validation/ragas/ContextEntityRecall.py b/validmind/tests/model_validation/ragas/ContextEntityRecall.py index 3f5aa6a35..31c395212 100644 --- a/validmind/tests/model_validation/ragas/ContextEntityRecall.py +++ b/validmind/tests/model_validation/ragas/ContextEntityRecall.py @@ -3,12 +3,15 @@ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial import warnings +from typing import Dict, Tuple import plotly.express as px +import plotly.graph_objects as go from datasets import Dataset from validmind import RawData, tags, tasks from validmind.errors import MissingDependencyError +from validmind.vm_models import VMDataset from .utils import get_ragas_config, get_renamed_columns @@ -30,12 +33,12 @@ @tags("ragas", "llm", "retrieval_performance") @tasks("text_qa", "text_generation", "text_summarization") def ContextEntityRecall( - dataset, + dataset: VMDataset, retrieved_contexts_column: str = "retrieved_contexts", reference_column: str = "reference", judge_llm=None, judge_embeddings=None, -): +) -> Tuple[Dict[str, list], go.Figure, go.Figure, RawData]: """ Evaluates the context entity recall for dataset entries and visualizes the results. diff --git a/validmind/tests/model_validation/ragas/ContextPrecision.py b/validmind/tests/model_validation/ragas/ContextPrecision.py index 9eb455f3e..bb7dae2f3 100644 --- a/validmind/tests/model_validation/ragas/ContextPrecision.py +++ b/validmind/tests/model_validation/ragas/ContextPrecision.py @@ -3,12 +3,15 @@ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial import warnings +from typing import Dict, Tuple import plotly.express as px +import plotly.graph_objects as go from datasets import Dataset from validmind import RawData, tags, tasks from validmind.errors import MissingDependencyError +from validmind.vm_models import VMDataset from .utils import get_ragas_config, get_renamed_columns @@ -30,13 +33,13 @@ @tags("ragas", "llm", "retrieval_performance") @tasks("text_qa", "text_generation", "text_summarization", "text_classification") def ContextPrecision( - dataset, + dataset: VMDataset, user_input_column: str = "user_input", retrieved_contexts_column: str = "retrieved_contexts", reference_column: str = "reference", judge_llm=None, judge_embeddings=None, -): # noqa: B950 +) -> Tuple[Dict[str, list], go.Figure, go.Figure, RawData]: """ Context Precision is a metric that evaluates whether all of the ground-truth relevant items present in the contexts are ranked higher or not. Ideally all the diff --git a/validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py b/validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py index 5ac58cab9..789218e4e 100644 --- a/validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +++ b/validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py @@ -3,12 +3,15 @@ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial import warnings +from typing import Dict, Tuple import plotly.express as px +import plotly.graph_objects as go from datasets import Dataset from validmind import RawData, tags, tasks from validmind.errors import MissingDependencyError +from validmind.vm_models import VMDataset from .utils import get_ragas_config, get_renamed_columns @@ -30,13 +33,13 @@ @tags("ragas", "llm", "retrieval_performance") @tasks("text_qa", "text_generation", "text_summarization", "text_classification") def ContextPrecisionWithoutReference( - dataset, + dataset: VMDataset, user_input_column: str = "user_input", retrieved_contexts_column: str = "retrieved_contexts", response_column: str = "response", judge_llm=None, judge_embeddings=None, -): # noqa: B950 +) -> Tuple[Dict[str, list], go.Figure, go.Figure, RawData]: """ Context Precision Without Reference is a metric used to evaluate the relevance of retrieved contexts compared to the expected response for a given user input. This diff --git a/validmind/tests/model_validation/ragas/ContextRecall.py b/validmind/tests/model_validation/ragas/ContextRecall.py index 094927037..bdf681c99 100644 --- a/validmind/tests/model_validation/ragas/ContextRecall.py +++ b/validmind/tests/model_validation/ragas/ContextRecall.py @@ -3,12 +3,15 @@ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial import warnings +from typing import Dict, Tuple import plotly.express as px +import plotly.graph_objects as go from datasets import Dataset from validmind import RawData, tags, tasks from validmind.errors import MissingDependencyError +from validmind.vm_models import VMDataset from .utils import get_ragas_config, get_renamed_columns @@ -30,13 +33,13 @@ @tags("ragas", "llm", "retrieval_performance") @tasks("text_qa", "text_generation", "text_summarization", "text_classification") def ContextRecall( - dataset, + dataset: VMDataset, user_input_column: str = "user_input", retrieved_contexts_column: str = "retrieved_contexts", reference_column: str = "reference", judge_llm=None, judge_embeddings=None, -): +) -> Tuple[Dict[str, list], go.Figure, go.Figure, RawData]: """ Context recall measures the extent to which the retrieved context aligns with the annotated answer, treated as the ground truth. It is computed based on the `ground diff --git a/validmind/tests/model_validation/ragas/Faithfulness.py b/validmind/tests/model_validation/ragas/Faithfulness.py index 6b83f26c8..f8fe77f50 100644 --- a/validmind/tests/model_validation/ragas/Faithfulness.py +++ b/validmind/tests/model_validation/ragas/Faithfulness.py @@ -3,12 +3,15 @@ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial import warnings +from typing import Dict, Tuple import plotly.express as px +import plotly.graph_objects as go from datasets import Dataset from validmind import RawData, tags, tasks from validmind.errors import MissingDependencyError +from validmind.vm_models import VMDataset from .utils import get_ragas_config, get_renamed_columns @@ -30,13 +33,13 @@ @tags("ragas", "llm", "rag_performance") @tasks("text_qa", "text_generation", "text_summarization") def Faithfulness( - dataset, - user_input_column="user_input", - response_column="response", - retrieved_contexts_column="retrieved_contexts", + dataset: VMDataset, + user_input_column: str = "user_input", + response_column: str = "response", + retrieved_contexts_column: str = "retrieved_contexts", judge_llm=None, judge_embeddings=None, -): # noqa +) -> Tuple[Dict[str, list], go.Figure, go.Figure, RawData]: """ Evaluates the faithfulness of the generated answers with respect to retrieved contexts. diff --git a/validmind/tests/model_validation/ragas/NoiseSensitivity.py b/validmind/tests/model_validation/ragas/NoiseSensitivity.py index 9a5d6e218..a6dd510d9 100644 --- a/validmind/tests/model_validation/ragas/NoiseSensitivity.py +++ b/validmind/tests/model_validation/ragas/NoiseSensitivity.py @@ -3,12 +3,15 @@ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial import warnings +from typing import Dict, Tuple import plotly.express as px +import plotly.graph_objects as go from datasets import Dataset from validmind import RawData, tags, tasks from validmind.errors import MissingDependencyError +from validmind.vm_models import VMDataset from .utils import get_ragas_config, get_renamed_columns @@ -32,15 +35,15 @@ @tags("ragas", "llm", "rag_performance") @tasks("text_qa", "text_generation", "text_summarization") def NoiseSensitivity( - dataset, - response_column="response", - retrieved_contexts_column="retrieved_contexts", - reference_column="reference", - focus="relevant", - user_input_column="user_input", + dataset: VMDataset, + response_column: str = "response", + retrieved_contexts_column: str = "retrieved_contexts", + reference_column: str = "reference", + focus: str = "relevant", + user_input_column: str = "user_input", judge_llm=None, judge_embeddings=None, -): +) -> Tuple[Dict[str, list], go.Figure, go.Figure, RawData]: """ Assesses the sensitivity of a Large Language Model (LLM) to noise in retrieved context by measuring how often it generates incorrect responses. diff --git a/validmind/tests/model_validation/ragas/ResponseRelevancy.py b/validmind/tests/model_validation/ragas/ResponseRelevancy.py index 63d633355..6f9a9276c 100644 --- a/validmind/tests/model_validation/ragas/ResponseRelevancy.py +++ b/validmind/tests/model_validation/ragas/ResponseRelevancy.py @@ -3,12 +3,15 @@ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial import warnings +from typing import Dict, Tuple import plotly.express as px +import plotly.graph_objects as go from datasets import Dataset from validmind import RawData, tags, tasks from validmind.errors import MissingDependencyError +from validmind.vm_models import VMDataset from .utils import get_ragas_config, get_renamed_columns @@ -30,13 +33,13 @@ @tags("ragas", "llm", "rag_performance") @tasks("text_qa", "text_generation", "text_summarization") def ResponseRelevancy( - dataset, - user_input_column="user_input", - retrieved_contexts_column=None, - response_column="response", + dataset: VMDataset, + user_input_column: str = "user_input", + retrieved_contexts_column: str = None, + response_column: str = "response", judge_llm=None, judge_embeddings=None, -): +) -> Tuple[Dict[str, list], go.Figure, go.Figure, RawData]: """ Assesses how pertinent the generated answer is to the given prompt. diff --git a/validmind/tests/model_validation/ragas/SemanticSimilarity.py b/validmind/tests/model_validation/ragas/SemanticSimilarity.py index c963dec5a..81ffe9588 100644 --- a/validmind/tests/model_validation/ragas/SemanticSimilarity.py +++ b/validmind/tests/model_validation/ragas/SemanticSimilarity.py @@ -3,12 +3,15 @@ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial import warnings +from typing import Dict, Tuple import plotly.express as px +import plotly.graph_objects as go from datasets import Dataset from validmind import RawData, tags, tasks from validmind.errors import MissingDependencyError +from validmind.vm_models import VMDataset from .utils import get_ragas_config, get_renamed_columns @@ -30,12 +33,12 @@ @tags("ragas", "llm") @tasks("text_qa", "text_generation", "text_summarization") def SemanticSimilarity( - dataset, - response_column="response", - reference_column="reference", + dataset: VMDataset, + response_column: str = "response", + reference_column: str = "reference", judge_llm=None, judge_embeddings=None, -): +) -> Tuple[Dict[str, list], go.Figure, go.Figure, RawData]: """ Calculates the semantic similarity between generated responses and ground truths diff --git a/validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py b/validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py index 5792ffdef..cc5838d1f 100644 --- a/validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +++ b/validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Dict, List, Tuple + from sklearn.metrics import adjusted_mutual_info_score from validmind import RawData, tags, tasks @@ -10,7 +12,9 @@ @tags("sklearn", "model_performance", "clustering") @tasks("clustering") -def AdjustedMutualInformation(model: VMModel, dataset: VMDataset): +def AdjustedMutualInformation( + model: VMModel, dataset: VMDataset +) -> Tuple[List[Dict[str, float]], RawData]: """ Evaluates clustering model performance by measuring mutual information between true and predicted labels, adjusting for chance. diff --git a/validmind/tests/model_validation/sklearn/AdjustedRandIndex.py b/validmind/tests/model_validation/sklearn/AdjustedRandIndex.py index 7d529d8d9..572a17687 100644 --- a/validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +++ b/validmind/tests/model_validation/sklearn/AdjustedRandIndex.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Dict, List, Tuple + from sklearn.metrics import adjusted_rand_score from validmind import RawData, tags, tasks @@ -10,7 +12,9 @@ @tags("sklearn", "model_performance", "clustering") @tasks("clustering") -def AdjustedRandIndex(model: VMModel, dataset: VMDataset): +def AdjustedRandIndex( + model: VMModel, dataset: VMDataset +) -> Tuple[List[Dict[str, float]], RawData]: """ Measures the similarity between two data clusters using the Adjusted Rand Index (ARI) metric in clustering machine learning models. diff --git a/validmind/tests/model_validation/sklearn/CalibrationCurve.py b/validmind/tests/model_validation/sklearn/CalibrationCurve.py index 90bfd5bf8..52fbda934 100644 --- a/validmind/tests/model_validation/sklearn/CalibrationCurve.py +++ b/validmind/tests/model_validation/sklearn/CalibrationCurve.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + import plotly.graph_objects as go from sklearn.calibration import calibration_curve @@ -12,7 +14,9 @@ @tags("sklearn", "model_performance", "classification") @tasks("classification") -def CalibrationCurve(model: VMModel, dataset: VMDataset, n_bins: int = 10): +def CalibrationCurve( + model: VMModel, dataset: VMDataset, n_bins: int = 10 +) -> Tuple[go.Figure, RawData]: """ Evaluates the calibration of probability estimates by comparing predicted probabilities against observed frequencies. diff --git a/validmind/tests/model_validation/sklearn/ClassifierPerformance.py b/validmind/tests/model_validation/sklearn/ClassifierPerformance.py index a5415d67a..6a4af3ca3 100644 --- a/validmind/tests/model_validation/sklearn/ClassifierPerformance.py +++ b/validmind/tests/model_validation/sklearn/ClassifierPerformance.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Dict, List + import numpy as np from sklearn.metrics import classification_report, roc_auc_score from sklearn.preprocessing import LabelBinarizer @@ -20,7 +22,9 @@ def multiclass_roc_auc_score(y_test, y_pred, average="macro"): "sklearn", "binary_classification", "multiclass_classification", "model_performance" ) @tasks("classification", "text_classification") -def ClassifierPerformance(dataset: VMDataset, model: VMModel, average: str = "macro"): +def ClassifierPerformance( + dataset: VMDataset, model: VMModel, average: str = "macro" +) -> Dict[str, List[Dict[str, float]]]: """ Evaluates performance of binary or multiclass classification models using precision, recall, F1-Score, accuracy, and ROC AUC scores. diff --git a/validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py b/validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py index 6f393eaef..22e3e618d 100644 --- a/validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +++ b/validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Dict, List, Tuple + import numpy as np from sklearn.metrics.pairwise import cosine_similarity @@ -12,7 +14,9 @@ @tags("sklearn", "model_performance", "clustering") @tasks("clustering") -def ClusterCosineSimilarity(model: VMModel, dataset: VMDataset): +def ClusterCosineSimilarity( + model: VMModel, dataset: VMDataset +) -> Tuple[List[Dict[str, float]], RawData]: """ Measures the intra-cluster similarity of a clustering model using cosine similarity. diff --git a/validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py b/validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py index ccb737be0..6c8c74dee 100644 --- a/validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +++ b/validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Dict, List, Tuple + from sklearn.metrics import ( adjusted_mutual_info_score, adjusted_rand_score, @@ -69,7 +71,9 @@ @tags("sklearn", "model_performance", "clustering") @tasks("clustering") -def ClusterPerformanceMetrics(model: VMModel, dataset: VMDataset): +def ClusterPerformanceMetrics( + model: VMModel, dataset: VMDataset +) -> Tuple[List[Dict[str, float]], RawData]: """ Evaluates the performance of clustering machine learning models using multiple established metrics. diff --git a/validmind/tests/model_validation/sklearn/CompletenessScore.py b/validmind/tests/model_validation/sklearn/CompletenessScore.py index ebf990ee4..190582e3a 100644 --- a/validmind/tests/model_validation/sklearn/CompletenessScore.py +++ b/validmind/tests/model_validation/sklearn/CompletenessScore.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Dict, List, Tuple + from sklearn.metrics import completeness_score from validmind import RawData, tags, tasks @@ -10,7 +12,9 @@ @tags("sklearn", "model_performance", "clustering") @tasks("clustering") -def CompletenessScore(model: VMModel, dataset: VMDataset): +def CompletenessScore( + model: VMModel, dataset: VMDataset +) -> Tuple[List[Dict[str, float]], RawData]: """ Evaluates a clustering model's capacity to categorize instances from a single class into the same cluster. diff --git a/validmind/tests/model_validation/sklearn/ConfusionMatrix.py b/validmind/tests/model_validation/sklearn/ConfusionMatrix.py index 81017c87b..281cb0108 100644 --- a/validmind/tests/model_validation/sklearn/ConfusionMatrix.py +++ b/validmind/tests/model_validation/sklearn/ConfusionMatrix.py @@ -3,8 +3,11 @@ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + import numpy as np import plotly.figure_factory as ff +import plotly.graph_objects as go from sklearn.metrics import confusion_matrix from validmind import RawData, tags, tasks @@ -23,7 +26,7 @@ def ConfusionMatrix( dataset: VMDataset, model: VMModel, threshold: float = 0.5, -): +) -> Tuple[go.Figure, RawData]: """ Evaluates and visually represents the classification ML model's predictive performance using a Confusion Matrix heatmap. diff --git a/validmind/tests/model_validation/sklearn/FeatureImportance.py b/validmind/tests/model_validation/sklearn/FeatureImportance.py index 8f3ed77ca..4dff302f4 100644 --- a/validmind/tests/model_validation/sklearn/FeatureImportance.py +++ b/validmind/tests/model_validation/sklearn/FeatureImportance.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + import pandas as pd from sklearn.inspection import permutation_importance @@ -11,7 +13,9 @@ @tags("model_explainability", "sklearn") @tasks("regression", "time_series_forecasting") -def FeatureImportance(dataset: VMDataset, model: VMModel, num_features: int = 3): +def FeatureImportance( + dataset: VMDataset, model: VMModel, num_features: int = 3 +) -> Tuple[pd.DataFrame, RawData]: """ Compute feature importance scores for a given model and generate a summary table with the top important features. diff --git a/validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py b/validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py index cf314fe83..485c3f028 100644 --- a/validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +++ b/validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Dict, List, Tuple + from sklearn import metrics from validmind import RawData, tags, tasks @@ -10,7 +12,9 @@ @tags("sklearn", "model_performance") @tasks("clustering") -def FowlkesMallowsScore(dataset: VMDataset, model: VMModel): +def FowlkesMallowsScore( + dataset: VMDataset, model: VMModel +) -> Tuple[List[Dict[str, float]], RawData]: """ Evaluates the similarity between predicted and actual cluster assignments in a model using the Fowlkes-Mallows score. diff --git a/validmind/tests/model_validation/sklearn/HomogeneityScore.py b/validmind/tests/model_validation/sklearn/HomogeneityScore.py index 739d6b086..31442cf68 100644 --- a/validmind/tests/model_validation/sklearn/HomogeneityScore.py +++ b/validmind/tests/model_validation/sklearn/HomogeneityScore.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Dict, List, Tuple + from sklearn import metrics from validmind import RawData, tags, tasks @@ -10,7 +12,9 @@ @tags("sklearn", "model_performance") @tasks("clustering") -def HomogeneityScore(dataset: VMDataset, model: VMModel): +def HomogeneityScore( + dataset: VMDataset, model: VMModel +) -> Tuple[List[Dict[str, float]], RawData]: """ Assesses clustering homogeneity by comparing true and predicted labels, scoring from 0 (heterogeneous) to 1 (homogeneous). diff --git a/validmind/tests/model_validation/sklearn/HyperParametersTuning.py b/validmind/tests/model_validation/sklearn/HyperParametersTuning.py index c12d12b29..0b93decd9 100644 --- a/validmind/tests/model_validation/sklearn/HyperParametersTuning.py +++ b/validmind/tests/model_validation/sklearn/HyperParametersTuning.py @@ -2,7 +2,7 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial -from typing import Dict, List, Union +from typing import Dict, List, Tuple, Union from sklearn.metrics import make_scorer, recall_score from sklearn.model_selection import GridSearchCV @@ -11,8 +11,6 @@ from validmind.vm_models import VMDataset, VMModel -@tags("sklearn", "model_performance") -@tasks("classification", "clustering") def custom_recall(y_true, y_pred_proba, threshold=0.5): y_pred = (y_pred_proba >= threshold).astype(int) return recall_score(y_true, y_pred) @@ -65,7 +63,7 @@ def HyperParametersTuning( scoring: Union[str, List, Dict] = None, thresholds: Union[float, List[float]] = None, fit_params: dict = None, -): +) -> Tuple[List[Dict[str, float]], RawData]: """ Performs exhaustive grid search over specified parameter ranges to find optimal model configurations across different metrics and decision thresholds. diff --git a/validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py b/validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py index 485f93b34..6d7496451 100644 --- a/validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +++ b/validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py @@ -2,7 +2,7 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial -from typing import List, Union +from typing import List, Optional, Tuple import numpy as np import plotly.graph_objects as go @@ -19,8 +19,8 @@ @tags("sklearn", "model_performance", "kmeans") @tasks("clustering") def KMeansClustersOptimization( - model: VMModel, dataset: VMDataset, n_clusters: Union[List[int], None] = None -): + model: VMModel, dataset: VMDataset, n_clusters: Optional[List[int]] = None +) -> Tuple[go.Figure, RawData]: """ Optimizes the number of clusters in K-means models using Elbow and Silhouette methods. diff --git a/validmind/tests/model_validation/sklearn/MinimumAccuracy.py b/validmind/tests/model_validation/sklearn/MinimumAccuracy.py index 7e470cfc1..16a87542e 100644 --- a/validmind/tests/model_validation/sklearn/MinimumAccuracy.py +++ b/validmind/tests/model_validation/sklearn/MinimumAccuracy.py @@ -1,6 +1,8 @@ # Copyright © 2023-2024 ValidMind Inc. All rights reserved. # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Dict, List, Tuple + from sklearn.metrics import accuracy_score from validmind import RawData @@ -12,7 +14,9 @@ "sklearn", "binary_classification", "multiclass_classification", "model_performance" ) @tasks("classification", "text_classification") -def MinimumAccuracy(dataset: VMDataset, model: VMModel, min_threshold: float = 0.7): +def MinimumAccuracy( + dataset: VMDataset, model: VMModel, min_threshold: float = 0.7 +) -> Tuple[List[Dict[str, float]], bool, RawData]: """ Checks if the model's prediction accuracy meets or surpasses a specified threshold. diff --git a/validmind/tests/model_validation/sklearn/MinimumF1Score.py b/validmind/tests/model_validation/sklearn/MinimumF1Score.py index f3894d70c..1494fdaa8 100644 --- a/validmind/tests/model_validation/sklearn/MinimumF1Score.py +++ b/validmind/tests/model_validation/sklearn/MinimumF1Score.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Dict, List, Tuple + import numpy as np from sklearn.metrics import f1_score @@ -14,7 +16,9 @@ "sklearn", "binary_classification", "multiclass_classification", "model_performance" ) @tasks("classification", "text_classification") -def MinimumF1Score(dataset: VMDataset, model: VMModel, min_threshold: float = 0.5): +def MinimumF1Score( + dataset: VMDataset, model: VMModel, min_threshold: float = 0.5 +) -> Tuple[List[Dict[str, float]], bool, RawData]: """ Assesses if the model's F1 score on the validation set meets a predefined minimum threshold, ensuring balanced performance between precision and recall. diff --git a/validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py b/validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py index 61b945f10..f37a50f17 100644 --- a/validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +++ b/validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Dict, List, Tuple + import numpy as np from sklearn.metrics import roc_auc_score from sklearn.preprocessing import LabelBinarizer @@ -14,7 +16,9 @@ "sklearn", "binary_classification", "multiclass_classification", "model_performance" ) @tasks("classification", "text_classification") -def MinimumROCAUCScore(dataset: VMDataset, model: VMModel, min_threshold: float = 0.5): +def MinimumROCAUCScore( + dataset: VMDataset, model: VMModel, min_threshold: float = 0.5 +) -> Tuple[List[Dict[str, float]], bool, RawData]: """ Validates model by checking if the ROC AUC score meets or surpasses a specified threshold. diff --git a/validmind/tests/model_validation/sklearn/ModelParameters.py b/validmind/tests/model_validation/sklearn/ModelParameters.py index f24d83dd5..789361381 100644 --- a/validmind/tests/model_validation/sklearn/ModelParameters.py +++ b/validmind/tests/model_validation/sklearn/ModelParameters.py @@ -2,14 +2,19 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import List, Optional + import pandas as pd from validmind import tags, tasks +from validmind.vm_models import VMModel @tags("model_training", "metadata") @tasks("classification", "regression") -def ModelParameters(model, model_params=None): +def ModelParameters( + model: VMModel, model_params: Optional[List[str]] = None +) -> pd.DataFrame: """ Extracts and displays model parameters in a structured format for transparency and reproducibility. diff --git a/validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py b/validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py index a0f9002bb..69d524a6e 100644 --- a/validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +++ b/validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Dict, List + import numpy as np from sklearn.metrics import classification_report @@ -19,7 +21,9 @@ "model_comparison", ) @tasks("classification", "text_classification") -def ModelsPerformanceComparison(dataset: VMDataset, models: list[VMModel]): +def ModelsPerformanceComparison( + dataset: VMDataset, models: list[VMModel] +) -> Dict[str, List[Dict[str, float]]]: """ Evaluates and compares the performance of multiple Machine Learning models using various metrics like accuracy, precision, recall, and F1 score. diff --git a/validmind/tests/model_validation/sklearn/OverfitDiagnosis.py b/validmind/tests/model_validation/sklearn/OverfitDiagnosis.py index 9994efd82..c7097b047 100644 --- a/validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +++ b/validmind/tests/model_validation/sklearn/OverfitDiagnosis.py @@ -2,11 +2,12 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial -from typing import List +from typing import Dict, List, Tuple import matplotlib.pyplot as plt import numpy as np import pandas as pd +import plotly.graph_objects as go import seaborn as sns from sklearn import metrics @@ -178,7 +179,7 @@ def OverfitDiagnosis( datasets: List[VMDataset], metric: str = None, cut_off_threshold: float = DEFAULT_THRESHOLD, -): +) -> Tuple[Dict[str, List[Dict[str, float]]], go.Figure, RawData]: """ Assesses potential overfitting in a model's predictions, identifying regions where performance between training and testing sets deviates significantly. diff --git a/validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py b/validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py index 3f1d0d196..87ef2daa1 100644 --- a/validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +++ b/validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py @@ -2,7 +2,7 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial -from typing import Union +from typing import Optional, Tuple import plotly.graph_objects as go from sklearn.inspection import permutation_importance @@ -26,9 +26,9 @@ def PermutationFeatureImportance( model: VMModel, dataset: VMDataset, - fontsize: Union[int, None] = None, - figure_height: Union[int, None] = None, -): + fontsize: Optional[int] = None, + figure_height: Optional[int] = None, +) -> Tuple[go.Figure, RawData]: """ Assesses the significance of each feature in a model by evaluating the impact on model performance when feature values are randomly rearranged. diff --git a/validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py b/validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py index 3c3ac8ee6..45791bf34 100644 --- a/validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +++ b/validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py @@ -2,7 +2,7 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial -from typing import List +from typing import Dict, List, Tuple import numpy as np import pandas as pd @@ -82,7 +82,7 @@ def calculate_psi(score_initial, score_new, num_bins=10, mode="fixed"): @tasks("classification", "text_classification") def PopulationStabilityIndex( datasets: List[VMDataset], model: VMModel, num_bins: int = 10, mode: str = "fixed" -): +) -> Tuple[Dict[str, List[Dict[str, float]]], go.Figure, RawData]: """ Assesses the Population Stability Index (PSI) to quantify the stability of an ML model's predictions across different datasets. diff --git a/validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py b/validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py index b521a333a..d40167c92 100644 --- a/validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +++ b/validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + import numpy as np import plotly.graph_objects as go from sklearn.metrics import precision_recall_curve @@ -14,7 +16,9 @@ @tags("sklearn", "binary_classification", "model_performance", "visualization") @tasks("classification", "text_classification") -def PrecisionRecallCurve(model: VMModel, dataset: VMDataset): +def PrecisionRecallCurve( + model: VMModel, dataset: VMDataset +) -> Tuple[go.Figure, RawData]: """ Evaluates the precision-recall trade-off for binary classification models and visualizes the Precision-Recall curve. diff --git a/validmind/tests/model_validation/sklearn/ROCCurve.py b/validmind/tests/model_validation/sklearn/ROCCurve.py index 9f3161931..47b25df03 100644 --- a/validmind/tests/model_validation/sklearn/ROCCurve.py +++ b/validmind/tests/model_validation/sklearn/ROCCurve.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + import numpy as np import plotly.graph_objects as go from sklearn.metrics import roc_auc_score, roc_curve @@ -19,7 +21,7 @@ "visualization", ) @tasks("classification", "text_classification") -def ROCCurve(model: VMModel, dataset: VMDataset): +def ROCCurve(model: VMModel, dataset: VMDataset) -> Tuple[go.Figure, RawData]: """ Evaluates binary classification model performance by generating and plotting the Receiver Operating Characteristic (ROC) curve and calculating the Area Under Curve (AUC) score. diff --git a/validmind/tests/model_validation/sklearn/RegressionErrors.py b/validmind/tests/model_validation/sklearn/RegressionErrors.py index f10ce3a48..a3610243a 100644 --- a/validmind/tests/model_validation/sklearn/RegressionErrors.py +++ b/validmind/tests/model_validation/sklearn/RegressionErrors.py @@ -2,16 +2,21 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + import numpy as np import pandas as pd from sklearn import metrics from validmind import RawData, tags, tasks +from validmind.vm_models import VMDataset, VMModel @tags("sklearn", "model_performance") @tasks("regression", "classification") -def RegressionErrors(model, dataset): +def RegressionErrors( + model: VMModel, dataset: VMDataset +) -> Tuple[pd.DataFrame, RawData]: """ Assesses the performance and error distribution of a regression model using various error metrics. diff --git a/validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py b/validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py index 3a88c1405..be7dd8b74 100644 --- a/validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +++ b/validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py @@ -2,19 +2,24 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import List + import numpy as np import pandas as pd from sklearn import metrics from validmind import tags, tasks from validmind.logging import get_logger +from validmind.vm_models import VMDataset, VMModel logger = get_logger(__name__) @tags("model_performance", "sklearn") @tasks("regression", "time_series_forecasting") -def RegressionErrorsComparison(datasets, models): +def RegressionErrorsComparison( + datasets: List[VMDataset], models: List[VMModel] +) -> pd.DataFrame: """ Assesses multiple regression error metrics to compare model performance across different datasets, emphasizing systematic overestimation or underestimation and large percentage errors. diff --git a/validmind/tests/model_validation/sklearn/RegressionPerformance.py b/validmind/tests/model_validation/sklearn/RegressionPerformance.py index 2c1eed0e4..67fea3fee 100644 --- a/validmind/tests/model_validation/sklearn/RegressionPerformance.py +++ b/validmind/tests/model_validation/sklearn/RegressionPerformance.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Dict, List, Tuple + import numpy as np from sklearn.metrics import mean_absolute_error, mean_squared_error @@ -14,7 +16,9 @@ @tags("sklearn", "model_performance") @tasks("regression") -def RegressionPerformance(model: VMModel, dataset: VMDataset): +def RegressionPerformance( + model: VMModel, dataset: VMDataset +) -> Tuple[List[Dict[str, float]], RawData]: """ Evaluates the performance of a regression model using five different metrics: MAE, MSE, RMSE, MAPE, and MBD. diff --git a/validmind/tests/model_validation/sklearn/RegressionR2Square.py b/validmind/tests/model_validation/sklearn/RegressionR2Square.py index 302184e6d..62fb65a9b 100644 --- a/validmind/tests/model_validation/sklearn/RegressionR2Square.py +++ b/validmind/tests/model_validation/sklearn/RegressionR2Square.py @@ -2,16 +2,21 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + import pandas as pd from sklearn import metrics from validmind import RawData, tags, tasks from validmind.tests.model_validation.statsmodels.statsutils import adj_r2_score +from validmind.vm_models import VMDataset, VMModel @tags("sklearn", "model_performance") @tasks("regression") -def RegressionR2Square(dataset, model): +def RegressionR2Square( + dataset: VMDataset, model: VMModel +) -> Tuple[pd.DataFrame, RawData]: """ Assesses the overall goodness-of-fit of a regression model by evaluating R-squared (R2) and Adjusted R-squared (Adj R2) scores to determine the model's explanatory power over the dependent variable. diff --git a/validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py b/validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py index 5245bd8e2..03679c47a 100644 --- a/validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +++ b/validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py @@ -2,16 +2,21 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import List + import pandas as pd from sklearn import metrics from validmind import tags, tasks from validmind.tests.model_validation.statsmodels.statsutils import adj_r2_score +from validmind.vm_models import VMDataset, VMModel @tags("model_performance", "sklearn") @tasks("regression", "time_series_forecasting") -def RegressionR2SquareComparison(datasets, models): +def RegressionR2SquareComparison( + datasets: List[VMDataset], models: List[VMModel] +) -> pd.DataFrame: """ Compares R-Squared and Adjusted R-Squared values for different regression models across multiple datasets to assess model performance and relevance of features. diff --git a/validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py b/validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py index 604e74815..b688d68a1 100644 --- a/validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +++ b/validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py @@ -12,9 +12,9 @@ import seaborn as sns from sklearn import metrics +from validmind import tags, tasks from validmind.errors import MissingOrInvalidModelPredictFnError from validmind.logging import get_logger -from validmind.tests import tags, tasks from validmind.vm_models import VMDataset, VMModel logger = get_logger(__name__) @@ -221,7 +221,7 @@ def RobustnessDiagnosis( metric: str = None, scaling_factor_std_dev_list: List[float] = DEFAULT_STD_DEV_LIST, performance_decay_threshold: float = DEFAULT_DECAY_THRESHOLD, -): +) -> Tuple[pd.DataFrame, go.Figure, bool]: """ Assesses the robustness of a machine learning model by evaluating performance decay under noisy conditions. diff --git a/validmind/tests/model_validation/sklearn/ScoreProbabilityAlignment.py b/validmind/tests/model_validation/sklearn/ScoreProbabilityAlignment.py index 6c2d6c08b..452c1ed48 100644 --- a/validmind/tests/model_validation/sklearn/ScoreProbabilityAlignment.py +++ b/validmind/tests/model_validation/sklearn/ScoreProbabilityAlignment.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + import pandas as pd import plotly.graph_objects as go @@ -13,7 +15,7 @@ @tasks("classification") def ScoreProbabilityAlignment( model: VMModel, dataset: VMDataset, score_column: str = "score", n_bins: int = 10 -): +) -> Tuple[pd.DataFrame, go.Figure, RawData]: """ Analyzes the alignment between credit scores and predicted probabilities. diff --git a/validmind/tests/model_validation/sklearn/SilhouettePlot.py b/validmind/tests/model_validation/sklearn/SilhouettePlot.py index 2247a33ac..55273f0f1 100644 --- a/validmind/tests/model_validation/sklearn/SilhouettePlot.py +++ b/validmind/tests/model_validation/sklearn/SilhouettePlot.py @@ -2,8 +2,11 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Dict, Tuple + import matplotlib.pyplot as plt import numpy as np +import plotly.graph_objects as go from sklearn.metrics import silhouette_samples, silhouette_score from validmind import RawData, tags, tasks @@ -12,7 +15,9 @@ @tags("sklearn", "model_performance") @tasks("clustering") -def SilhouettePlot(model: VMModel, dataset: VMDataset): +def SilhouettePlot( + model: VMModel, dataset: VMDataset +) -> Tuple[Dict[str, float], go.Figure, RawData]: """ Calculates and visualizes Silhouette Score, assessing the degree of data point suitability to its cluster in ML models. diff --git a/validmind/tests/model_validation/sklearn/TrainingTestDegradation.py b/validmind/tests/model_validation/sklearn/TrainingTestDegradation.py index 1e023ea09..02eeec3fc 100644 --- a/validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +++ b/validmind/tests/model_validation/sklearn/TrainingTestDegradation.py @@ -2,7 +2,7 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial -from typing import List +from typing import Dict, List, Tuple from numpy import unique from sklearn.metrics import classification_report @@ -22,7 +22,7 @@ @tasks("classification", "text_classification") def TrainingTestDegradation( datasets: List[VMDataset], model: VMModel, max_threshold: float = 0.10 -): +) -> Tuple[List[Dict[str, float]], bool, RawData]: """ Tests if model performance degradation between training and test datasets exceeds a predefined threshold. diff --git a/validmind/tests/model_validation/sklearn/VMeasure.py b/validmind/tests/model_validation/sklearn/VMeasure.py index 9153ed392..b51f8d656 100644 --- a/validmind/tests/model_validation/sklearn/VMeasure.py +++ b/validmind/tests/model_validation/sklearn/VMeasure.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Dict, List, Tuple + from sklearn import metrics from validmind import RawData, tags, tasks @@ -10,7 +12,9 @@ @tags("sklearn", "model_performance") @tasks("clustering") -def VMeasure(dataset: VMDataset, model: VMModel): +def VMeasure( + dataset: VMDataset, model: VMModel +) -> Tuple[List[Dict[str, float]], RawData]: """ Evaluates homogeneity and completeness of a clustering model using the V Measure Score. diff --git a/validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py b/validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py index 591dccedb..6dc8a6180 100644 --- a/validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +++ b/validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py @@ -2,10 +2,11 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial -from typing import Callable, Dict, List, Tuple, Union +from typing import Callable, Dict, List, Optional, Tuple import matplotlib.pyplot as plt import pandas as pd +import plotly.graph_objects as go import seaborn as sns from sklearn import metrics @@ -157,10 +158,10 @@ def _plot_weak_spots( def WeakspotsDiagnosis( datasets: List[VMDataset], model: VMModel, - features_columns: Union[List[str], None] = None, - metrics: Union[Dict[str, Callable], None] = None, - thresholds: Union[Dict[str, float], None] = None, -): + features_columns: Optional[List[str]] = None, + metrics: Optional[Dict[str, Callable]] = None, + thresholds: Optional[Dict[str, float]] = None, +) -> Tuple[pd.DataFrame, go.Figure, bool]: """ Identifies and visualizes weak spots in a machine learning model's performance across various sections of the feature space. diff --git a/validmind/tests/model_validation/statsmodels/AutoARIMA.py b/validmind/tests/model_validation/statsmodels/AutoARIMA.py index b74626303..e63c70b87 100644 --- a/validmind/tests/model_validation/statsmodels/AutoARIMA.py +++ b/validmind/tests/model_validation/statsmodels/AutoARIMA.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Dict, List + from statsmodels.tsa.arima.model import ARIMA from statsmodels.tsa.stattools import adfuller @@ -14,7 +16,7 @@ @tags("time_series_data", "forecasting", "model_selection", "statsmodels") @tasks("regression") -def AutoARIMA(model: VMModel, dataset: VMDataset): +def AutoARIMA(model: VMModel, dataset: VMDataset) -> List[Dict[str, float]]: """ Evaluates ARIMA models for time-series forecasting, ranking them using Bayesian and Akaike Information Criteria. diff --git a/validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py b/validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py index 733787529..bd5f1e4f4 100644 --- a/validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +++ b/validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py @@ -2,16 +2,21 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + import numpy as np import plotly.graph_objects as go from matplotlib import cm from validmind import RawData, tags, tasks +from validmind.vm_models import VMDataset, VMModel @tags("visualization", "credit_risk") @tasks("classification") -def CumulativePredictionProbabilities(dataset, model, title="Cumulative Probabilities"): +def CumulativePredictionProbabilities( + dataset: VMDataset, model: VMModel, title: str = "Cumulative Probabilities" +) -> Tuple[go.Figure, RawData]: """ Visualizes cumulative probabilities of positive and negative classes for both training and testing in classification models. diff --git a/validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py b/validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py index 02147456b..a2de47609 100644 --- a/validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +++ b/validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py @@ -2,15 +2,20 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import List, Tuple + import pandas as pd from statsmodels.stats.stattools import durbin_watson from validmind import RawData, tags, tasks +from validmind.vm_models import VMDataset, VMModel @tasks("regression") @tags("time_series_data", "forecasting", "statistical_test", "statsmodels") -def DurbinWatsonTest(dataset, model, threshold=[1.5, 2.5]): +def DurbinWatsonTest( + dataset: VMDataset, model: VMModel, threshold: List[float] = [1.5, 2.5] +) -> Tuple[pd.DataFrame, RawData]: """ Assesses autocorrelation in time series data features using the Durbin-Watson statistic. diff --git a/validmind/tests/model_validation/statsmodels/GINITable.py b/validmind/tests/model_validation/statsmodels/GINITable.py index 39a8815c3..54ab86a47 100644 --- a/validmind/tests/model_validation/statsmodels/GINITable.py +++ b/validmind/tests/model_validation/statsmodels/GINITable.py @@ -2,16 +2,19 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + import numpy as np import pandas as pd from sklearn.metrics import roc_auc_score, roc_curve from validmind import RawData, tags, tasks +from validmind.vm_models import VMDataset, VMModel @tags("model_performance") @tasks("classification") -def GINITable(dataset, model): +def GINITable(dataset: VMDataset, model: VMModel) -> Tuple[pd.DataFrame, RawData]: """ Evaluates classification model performance using AUC, GINI, and KS metrics for training and test datasets. diff --git a/validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py b/validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py index 61f76390b..46d94c82d 100644 --- a/validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +++ b/validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Dict, List, Tuple + from statsmodels.stats.diagnostic import kstest_normal from validmind import RawData, tags, tasks @@ -11,7 +13,9 @@ @tags("tabular_data", "data_distribution", "statistical_test", "statsmodels") @tasks("classification", "regression") -def KolmogorovSmirnov(model: VMModel, dataset: VMDataset, dist: str = "norm"): +def KolmogorovSmirnov( + model: VMModel, dataset: VMDataset, dist: str = "norm" +) -> Tuple[List[Dict[str, float]], RawData]: """ Assesses whether each feature in the dataset aligns with a normal distribution using the Kolmogorov-Smirnov test. diff --git a/validmind/tests/model_validation/statsmodels/Lilliefors.py b/validmind/tests/model_validation/statsmodels/Lilliefors.py index e51307d71..b7c3744b5 100644 --- a/validmind/tests/model_validation/statsmodels/Lilliefors.py +++ b/validmind/tests/model_validation/statsmodels/Lilliefors.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Dict, List + from statsmodels.stats.diagnostic import lilliefors from validmind import tags, tasks @@ -10,7 +12,7 @@ @tags("tabular_data", "data_distribution", "statistical_test", "statsmodels") @tasks("classification", "regression") -def Lilliefors(dataset: VMDataset): +def Lilliefors(dataset: VMDataset) -> List[Dict[str, float]]: """ Assesses the normality of feature distributions in an ML model's training dataset using the Lilliefors test. diff --git a/validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py b/validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py index 05887165d..820f7dfa5 100644 --- a/validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +++ b/validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py @@ -2,18 +2,22 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple import plotly.graph_objects as go from matplotlib import cm from validmind import RawData, tags, tasks +from validmind.vm_models import VMDataset, VMModel @tags("visualization", "credit_risk") @tasks("classification") def PredictionProbabilitiesHistogram( - dataset, model, title="Histogram of Predictive Probabilities" -): + dataset: VMDataset, + model: VMModel, + title: str = "Histogram of Predictive Probabilities", +) -> Tuple[go.Figure, RawData]: """ Assesses the predictive probability distribution for binary classification to evaluate model performance and potential overfitting or bias. diff --git a/validmind/tests/model_validation/statsmodels/RegressionCoeffs.py b/validmind/tests/model_validation/statsmodels/RegressionCoeffs.py index aee3cbe90..8aa4a25c8 100644 --- a/validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +++ b/validmind/tests/model_validation/statsmodels/RegressionCoeffs.py @@ -3,17 +3,20 @@ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + import pandas as pd import plotly.graph_objects as go from scipy import stats from validmind import RawData, tags, tasks from validmind.errors import SkipTestError +from validmind.vm_models import VMModel @tags("tabular_data", "visualization", "model_training") @tasks("regression") -def RegressionCoeffs(model): +def RegressionCoeffs(model: VMModel) -> Tuple[go.Figure, RawData, pd.DataFrame]: """ Assesses the significance and uncertainty of predictor variables in a regression model through visualization of coefficients and their 95% confidence intervals. diff --git a/validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py b/validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py index 3d017d1c6..28b326712 100644 --- a/validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +++ b/validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py @@ -2,7 +2,10 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + import matplotlib.pyplot as plt +import plotly.graph_objects as go import seaborn as sns from validmind import RawData, tags, tasks @@ -16,8 +19,10 @@ @tags("statistical_test", "model_interpretation", "visualization", "feature_importance") @tasks("regression") def RegressionFeatureSignificance( - model: VMModel, fontsize: int = 10, p_threshold: float = 0.05 -): + model: VMModel, + fontsize: int = 10, + p_threshold: float = 0.05, +) -> Tuple[go.Figure, RawData]: """ Assesses and visualizes the statistical significance of features in a regression model. diff --git a/validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py b/validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py index 95bd0fdd4..1290447f7 100644 --- a/validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +++ b/validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py @@ -2,10 +2,11 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial -from typing import Union +from typing import Optional, Tuple import matplotlib.pyplot as plt import pandas as pd +import plotly.graph_objects as go from validmind import RawData, tags, tasks from validmind.logging import get_logger @@ -19,9 +20,9 @@ def RegressionModelForecastPlot( model: VMModel, dataset: VMDataset, - start_date: Union[str, None] = None, - end_date: Union[str, None] = None, -): + start_date: Optional[str] = None, + end_date: Optional[str] = None, +) -> Tuple[go.Figure, RawData]: """ Generates plots to visually compare the forecasted outcomes of a regression model against actual observed values over a specified date range. diff --git a/validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py b/validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py index 467ea1440..24afb4a3e 100644 --- a/validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +++ b/validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py @@ -2,9 +2,12 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + import matplotlib.pyplot as plt import numpy as np import pandas as pd +import plotly.graph_objects as go from validmind import RawData, tags, tasks from validmind.vm_models import VMDataset, VMModel @@ -23,7 +26,7 @@ def integrate_diff(series_diff, start_value): def RegressionModelForecastPlotLevels( model: VMModel, dataset: VMDataset, -): +) -> Tuple[go.Figure, RawData]: """ Assesses the alignment between forecasted and observed values in regression models through visual plots diff --git a/validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py b/validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py index d8f5c120d..cd16e6f55 100644 --- a/validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +++ b/validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py @@ -2,10 +2,11 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial -from typing import List, Union +from typing import List, Tuple, Union import matplotlib.pyplot as plt import numpy as np +import plotly.graph_objects as go from validmind import RawData, tags, tasks from validmind.logging import get_logger @@ -29,7 +30,7 @@ def RegressionModelSensitivityPlot( model: VMModel, shocks: List[float] = [0.1], transformation: Union[str, None] = None, -): +) -> Tuple[go.Figure, RawData]: """ Assesses the sensitivity of a regression model to changes in independent variables by applying shocks and visualizing the impact. diff --git a/validmind/tests/model_validation/statsmodels/RegressionModelSummary.py b/validmind/tests/model_validation/statsmodels/RegressionModelSummary.py index 88d0b9162..2d40d06f6 100644 --- a/validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +++ b/validmind/tests/model_validation/statsmodels/RegressionModelSummary.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Dict, List, Tuple + from sklearn.metrics import mean_squared_error, r2_score from validmind import RawData, tags, tasks @@ -12,7 +14,9 @@ @tags("model_performance", "regression") @tasks("regression") -def RegressionModelSummary(dataset: VMDataset, model: VMModel): +def RegressionModelSummary( + dataset: VMDataset, model: VMModel +) -> Tuple[List[Dict[str, float]], RawData]: """ Evaluates regression model performance using metrics including R-Squared, Adjusted R-Squared, MSE, and RMSE. diff --git a/validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py b/validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py index 108cd45cf..27b66ba5e 100644 --- a/validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +++ b/validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Tuple + import numpy as np import pandas as pd import plotly.graph_objects as go @@ -19,7 +21,7 @@ @tasks("regression") def RegressionPermutationFeatureImportance( dataset: VMDataset, model: VMModel, fontsize: int = 12, figure_height: int = 500 -): +) -> Tuple[go.Figure, RawData]: """ Assesses the significance of each feature in a model by evaluating the impact on model performance when feature values are randomly rearranged. diff --git a/validmind/tests/model_validation/statsmodels/ScorecardHistogram.py b/validmind/tests/model_validation/statsmodels/ScorecardHistogram.py index 29678e5af..4e08b321a 100644 --- a/validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +++ b/validmind/tests/model_validation/statsmodels/ScorecardHistogram.py @@ -6,11 +6,16 @@ from matplotlib import cm from validmind import tags, tasks +from validmind.vm_models import VMDataset @tags("visualization", "credit_risk", "logistic_regression") @tasks("classification") -def ScorecardHistogram(dataset, title="Histogram of Scores", score_column="score"): +def ScorecardHistogram( + dataset: VMDataset, + title: str = "Histogram of Scores", + score_column: str = "score", +) -> go.Figure: """ The Scorecard Histogram test evaluates the distribution of credit scores between default and non-default instances, providing critical insights into the performance and generalizability of credit-risk models. diff --git a/validmind/tests/ongoing_monitoring/CalibrationCurveDrift.py b/validmind/tests/ongoing_monitoring/CalibrationCurveDrift.py index f29848137..7e2482c04 100644 --- a/validmind/tests/ongoing_monitoring/CalibrationCurveDrift.py +++ b/validmind/tests/ongoing_monitoring/CalibrationCurveDrift.py @@ -2,7 +2,7 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial -from typing import List +from typing import Dict, List, Tuple import numpy as np import pandas as pd @@ -26,7 +26,7 @@ def CalibrationCurveDrift( model: VMModel, n_bins: int = 10, drift_pct_threshold: float = 20, -): +) -> Tuple[go.Figure, Dict[str, pd.DataFrame], bool, RawData]: """ Evaluates changes in probability calibration between reference and monitoring datasets. diff --git a/validmind/tests/ongoing_monitoring/ClassDiscriminationDrift.py b/validmind/tests/ongoing_monitoring/ClassDiscriminationDrift.py index 9e9e62cf4..ee48eedaf 100644 --- a/validmind/tests/ongoing_monitoring/ClassDiscriminationDrift.py +++ b/validmind/tests/ongoing_monitoring/ClassDiscriminationDrift.py @@ -2,7 +2,7 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial -from typing import List +from typing import Dict, List, Tuple import numpy as np import pandas as pd @@ -38,7 +38,7 @@ def calculate_ks_statistic(y_true, y_prob): @tasks("classification", "text_classification") def ClassDiscriminationDrift( datasets: List[VMDataset], model: VMModel, drift_pct_threshold=20 -): +) -> Tuple[Dict[str, pd.DataFrame], bool]: """ Compares classification discrimination metrics between reference and monitoring datasets. diff --git a/validmind/tests/ongoing_monitoring/ClassImbalanceDrift.py b/validmind/tests/ongoing_monitoring/ClassImbalanceDrift.py index fa59efbf7..042738911 100644 --- a/validmind/tests/ongoing_monitoring/ClassImbalanceDrift.py +++ b/validmind/tests/ongoing_monitoring/ClassImbalanceDrift.py @@ -2,7 +2,7 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial -from typing import List +from typing import Dict, List, Tuple import pandas as pd import plotly.graph_objs as go @@ -18,7 +18,7 @@ def ClassImbalanceDrift( datasets: List[VMDataset], drift_pct_threshold: float = 5.0, title: str = "Class Distribution Drift", -): +) -> Tuple[go.Figure, Dict[str, pd.DataFrame], bool]: """ Evaluates drift in class distribution between reference and monitoring datasets. diff --git a/validmind/tests/ongoing_monitoring/ClassificationAccuracyDrift.py b/validmind/tests/ongoing_monitoring/ClassificationAccuracyDrift.py index 85b43a576..37ec3e670 100644 --- a/validmind/tests/ongoing_monitoring/ClassificationAccuracyDrift.py +++ b/validmind/tests/ongoing_monitoring/ClassificationAccuracyDrift.py @@ -2,7 +2,7 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial -from typing import List +from typing import Dict, List, Tuple import numpy as np import pandas as pd @@ -18,7 +18,7 @@ @tasks("classification", "text_classification") def ClassificationAccuracyDrift( datasets: List[VMDataset], model: VMModel, drift_pct_threshold=20 -): +) -> Tuple[Dict[str, pd.DataFrame], bool, RawData]: """ Compares classification accuracy metrics between reference and monitoring datasets. diff --git a/validmind/tests/ongoing_monitoring/ConfusionMatrixDrift.py b/validmind/tests/ongoing_monitoring/ConfusionMatrixDrift.py index 37a1a2ac0..c905555e9 100644 --- a/validmind/tests/ongoing_monitoring/ConfusionMatrixDrift.py +++ b/validmind/tests/ongoing_monitoring/ConfusionMatrixDrift.py @@ -2,7 +2,7 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial -from typing import List +from typing import Dict, List, Tuple import numpy as np import pandas as pd @@ -18,7 +18,7 @@ @tasks("classification", "text_classification") def ConfusionMatrixDrift( datasets: List[VMDataset], model: VMModel, drift_pct_threshold=20 -): +) -> Tuple[Dict[str, pd.DataFrame], bool, RawData]: """ Compares confusion matrix metrics between reference and monitoring datasets. diff --git a/validmind/tests/ongoing_monitoring/CumulativePredictionProbabilitiesDrift.py b/validmind/tests/ongoing_monitoring/CumulativePredictionProbabilitiesDrift.py index d4143c5d7..1b03a0b81 100644 --- a/validmind/tests/ongoing_monitoring/CumulativePredictionProbabilitiesDrift.py +++ b/validmind/tests/ongoing_monitoring/CumulativePredictionProbabilitiesDrift.py @@ -2,7 +2,7 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial -from typing import List +from typing import List, Tuple import numpy as np import plotly.graph_objects as go @@ -17,7 +17,7 @@ def CumulativePredictionProbabilitiesDrift( datasets: List[VMDataset], model: VMModel, -): +) -> Tuple[go.Figure, RawData]: """ Compares cumulative prediction probability distributions between reference and monitoring datasets. diff --git a/validmind/tests/ongoing_monitoring/FeatureDrift.py b/validmind/tests/ongoing_monitoring/FeatureDrift.py index 52547e2f7..9df0cf17f 100644 --- a/validmind/tests/ongoing_monitoring/FeatureDrift.py +++ b/validmind/tests/ongoing_monitoring/FeatureDrift.py @@ -2,11 +2,14 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Dict, List, Tuple + import numpy as np import pandas as pd import plotly.graph_objects as go from validmind import RawData, tags, tasks +from validmind.vm_models import VMDataset def calculate_psi_score(actual, expected): @@ -92,11 +95,11 @@ def create_distribution_plot(feature_name, reference_dist, monitoring_dist, bins @tags("visualization") @tasks("monitoring") def FeatureDrift( - datasets, + datasets: List[VMDataset], bins=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], feature_columns=None, psi_threshold=0.2, -): +) -> Tuple[Dict[str, pd.DataFrame], go.Figure, bool, RawData]: """ Evaluates changes in feature distribution over time to identify potential model drift. diff --git a/validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py b/validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py index 10c075c78..c3c79169d 100644 --- a/validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +++ b/validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py @@ -3,14 +3,19 @@ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import List, Tuple + import matplotlib.pyplot as plt from validmind import RawData, tags, tasks +from validmind.vm_models import VMDataset, VMModel @tags("visualization") @tasks("monitoring") -def PredictionAcrossEachFeature(datasets, model): +def PredictionAcrossEachFeature( + datasets: List[VMDataset], model: VMModel +) -> Tuple[plt.Figure, RawData]: """ Assesses differences in model predictions across individual features between reference and monitoring datasets through visual analysis. diff --git a/validmind/tests/ongoing_monitoring/PredictionCorrelation.py b/validmind/tests/ongoing_monitoring/PredictionCorrelation.py index ddbc14503..937bb92ab 100644 --- a/validmind/tests/ongoing_monitoring/PredictionCorrelation.py +++ b/validmind/tests/ongoing_monitoring/PredictionCorrelation.py @@ -2,15 +2,22 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Dict, List, Tuple + import pandas as pd import plotly.graph_objects as go from validmind import RawData, tags, tasks +from validmind.vm_models import VMDataset, VMModel @tags("visualization") @tasks("monitoring") -def PredictionCorrelation(datasets, model, drift_pct_threshold=20): +def PredictionCorrelation( + datasets: List[VMDataset], + model: VMModel, + drift_pct_threshold: float = 20, +) -> Tuple[Dict[str, pd.DataFrame], go.Figure, bool, RawData]: """ Assesses correlation changes between model predictions from reference and monitoring datasets to detect potential target drift. diff --git a/validmind/tests/ongoing_monitoring/PredictionProbabilitiesHistogramDrift.py b/validmind/tests/ongoing_monitoring/PredictionProbabilitiesHistogramDrift.py index 150e544be..e3b56f41e 100644 --- a/validmind/tests/ongoing_monitoring/PredictionProbabilitiesHistogramDrift.py +++ b/validmind/tests/ongoing_monitoring/PredictionProbabilitiesHistogramDrift.py @@ -2,7 +2,7 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial -from typing import List +from typing import Dict, List, Tuple import numpy as np import pandas as pd @@ -21,7 +21,7 @@ def PredictionProbabilitiesHistogramDrift( model: VMModel, title="Prediction Probabilities Histogram Drift", drift_pct_threshold: float = 20.0, -): +) -> Tuple[go.Figure, Dict[str, pd.DataFrame], bool, RawData]: """ Compares prediction probability distributions between reference and monitoring datasets. diff --git a/validmind/tests/ongoing_monitoring/PredictionQuantilesAcrossFeatures.py b/validmind/tests/ongoing_monitoring/PredictionQuantilesAcrossFeatures.py index 9d24dfc91..c40c71f93 100644 --- a/validmind/tests/ongoing_monitoring/PredictionQuantilesAcrossFeatures.py +++ b/validmind/tests/ongoing_monitoring/PredictionQuantilesAcrossFeatures.py @@ -2,15 +2,20 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import List, Tuple + import plotly.graph_objects as go from plotly.subplots import make_subplots from validmind import tags, tasks +from validmind.vm_models import VMDataset, VMModel @tags("visualization") @tasks("monitoring") -def PredictionQuantilesAcrossFeatures(datasets, model): +def PredictionQuantilesAcrossFeatures( + datasets: List[VMDataset], model: VMModel +) -> Tuple[go.Figure, ...]: """ Assesses differences in model prediction distributions across individual features between reference and monitoring datasets through quantile analysis. diff --git a/validmind/tests/ongoing_monitoring/ROCCurveDrift.py b/validmind/tests/ongoing_monitoring/ROCCurveDrift.py index b2cc2e335..d76cb9e65 100644 --- a/validmind/tests/ongoing_monitoring/ROCCurveDrift.py +++ b/validmind/tests/ongoing_monitoring/ROCCurveDrift.py @@ -2,7 +2,7 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial -from typing import List +from typing import List, Tuple import numpy as np import plotly.graph_objects as go @@ -20,7 +20,9 @@ "visualization", ) @tasks("classification", "text_classification") -def ROCCurveDrift(datasets: List[VMDataset], model: VMModel): +def ROCCurveDrift( + datasets: List[VMDataset], model: VMModel +) -> Tuple[go.Figure, go.Figure, RawData]: """ Compares ROC curves between reference and monitoring datasets. diff --git a/validmind/tests/ongoing_monitoring/ScoreBandsDrift.py b/validmind/tests/ongoing_monitoring/ScoreBandsDrift.py index b122e26bd..23874934f 100644 --- a/validmind/tests/ongoing_monitoring/ScoreBandsDrift.py +++ b/validmind/tests/ongoing_monitoring/ScoreBandsDrift.py @@ -2,7 +2,7 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial -from typing import List +from typing import Dict, List, Tuple import numpy as np import pandas as pd @@ -19,7 +19,7 @@ def ScoreBandsDrift( score_column: str = "score", score_bands: list = None, drift_threshold: float = 20.0, -): +) -> Tuple[Dict[str, pd.DataFrame], bool, RawData]: """ Analyzes drift in population distribution and default rates across score bands. diff --git a/validmind/tests/ongoing_monitoring/ScorecardHistogramDrift.py b/validmind/tests/ongoing_monitoring/ScorecardHistogramDrift.py index 09aa43491..ded42cf64 100644 --- a/validmind/tests/ongoing_monitoring/ScorecardHistogramDrift.py +++ b/validmind/tests/ongoing_monitoring/ScorecardHistogramDrift.py @@ -2,7 +2,7 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial -from typing import List +from typing import Dict, List, Tuple import numpy as np import pandas as pd @@ -21,7 +21,7 @@ def ScorecardHistogramDrift( score_column: str = "score", title: str = "Scorecard Histogram Drift", drift_pct_threshold: float = 20.0, -): +) -> Tuple[go.Figure, Dict[str, pd.DataFrame], bool]: """ Compares score distributions between reference and monitoring datasets for each class. diff --git a/validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py b/validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py index d3eba152b..55beee0aa 100644 --- a/validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +++ b/validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py @@ -2,17 +2,24 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Dict, List, Tuple + import pandas as pd import plotly.figure_factory as ff import plotly.graph_objects as go from scipy.stats import kurtosis, skew from validmind import RawData, tags, tasks +from validmind.vm_models import VMDataset, VMModel @tags("visualization") @tasks("monitoring") -def TargetPredictionDistributionPlot(datasets, model, drift_pct_threshold=20): +def TargetPredictionDistributionPlot( + datasets: List[VMDataset], + model: VMModel, + drift_pct_threshold: float = 20, +) -> Tuple[Dict[str, pd.DataFrame], go.Figure, bool, RawData]: """ Assesses differences in prediction distributions between a reference dataset and a monitoring dataset to identify potential data drift. diff --git a/validmind/tests/prompt_validation/Bias.py b/validmind/tests/prompt_validation/Bias.py index 275894ae3..80d5ed401 100644 --- a/validmind/tests/prompt_validation/Bias.py +++ b/validmind/tests/prompt_validation/Bias.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Any, Dict, List, Tuple + from validmind import RawData, tags, tasks from validmind.errors import MissingRequiredTestInputError @@ -45,7 +47,9 @@ @tags("llm", "few_shot") @tasks("text_classification", "text_summarization") -def Bias(model, min_threshold=7, judge_llm=None): +def Bias( + model, min_threshold=7, judge_llm=None +) -> Tuple[List[Dict[str, Any]], bool, RawData]: """ Assesses potential bias in a Large Language Model by analyzing the distribution and order of exemplars in the prompt. diff --git a/validmind/tests/prompt_validation/Clarity.py b/validmind/tests/prompt_validation/Clarity.py index 63acfd5bf..a55f8819a 100644 --- a/validmind/tests/prompt_validation/Clarity.py +++ b/validmind/tests/prompt_validation/Clarity.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Any, Dict, List, Tuple + from validmind import RawData, tags, tasks from validmind.errors import MissingRequiredTestInputError @@ -46,7 +48,9 @@ @tags("llm", "zero_shot", "few_shot") @tasks("text_classification", "text_summarization") -def Clarity(model, min_threshold=7, judge_llm=None): +def Clarity( + model, min_threshold=7, judge_llm=None +) -> Tuple[List[Dict[str, Any]], bool, RawData]: """ Evaluates and scores the clarity of prompts in a Large Language Model based on specified guidelines. diff --git a/validmind/tests/prompt_validation/Conciseness.py b/validmind/tests/prompt_validation/Conciseness.py index cc26d1769..7822375d9 100644 --- a/validmind/tests/prompt_validation/Conciseness.py +++ b/validmind/tests/prompt_validation/Conciseness.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Any, Dict, List, Tuple + from validmind import RawData, tags, tasks from validmind.errors import MissingRequiredTestInputError @@ -54,7 +56,9 @@ @tags("llm", "zero_shot", "few_shot") @tasks("text_classification", "text_summarization") -def Conciseness(model, min_threshold=7, judge_llm=None): +def Conciseness( + model, min_threshold=7, judge_llm=None +) -> Tuple[List[Dict[str, Any]], bool, RawData]: """ Analyzes and grades the conciseness of prompts provided to a Large Language Model. diff --git a/validmind/tests/prompt_validation/Delimitation.py b/validmind/tests/prompt_validation/Delimitation.py index 87ae8e260..a702e57a6 100644 --- a/validmind/tests/prompt_validation/Delimitation.py +++ b/validmind/tests/prompt_validation/Delimitation.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Any, Dict, List, Tuple + from validmind import RawData, tags, tasks from validmind.errors import MissingRequiredTestInputError @@ -39,7 +41,9 @@ @tags("llm", "zero_shot", "few_shot") @tasks("text_classification", "text_summarization") -def Delimitation(model, min_threshold=7, judge_llm=None): +def Delimitation( + model, min_threshold=7, judge_llm=None +) -> Tuple[List[Dict[str, Any]], bool, RawData]: """ Evaluates the proper use of delimiters in prompts provided to Large Language Models. diff --git a/validmind/tests/prompt_validation/NegativeInstruction.py b/validmind/tests/prompt_validation/NegativeInstruction.py index 64c4a2338..8b66f3fa6 100644 --- a/validmind/tests/prompt_validation/NegativeInstruction.py +++ b/validmind/tests/prompt_validation/NegativeInstruction.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Any, Dict, List, Tuple + from validmind import RawData, tags, tasks from validmind.errors import MissingRequiredTestInputError @@ -52,7 +54,9 @@ @tags("llm", "zero_shot", "few_shot") @tasks("text_classification", "text_summarization") -def NegativeInstruction(model, min_threshold=7, judge_llm=None): +def NegativeInstruction( + model, min_threshold=7, judge_llm=None +) -> Tuple[List[Dict[str, Any]], bool, RawData]: """ Evaluates and grades the use of affirmative, proactive language over negative instructions in LLM prompts. diff --git a/validmind/tests/prompt_validation/Robustness.py b/validmind/tests/prompt_validation/Robustness.py index d1e306f5d..09533c5a3 100644 --- a/validmind/tests/prompt_validation/Robustness.py +++ b/validmind/tests/prompt_validation/Robustness.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Any, Dict, List, Tuple + import pandas as pd from validmind import RawData, tags, tasks @@ -56,7 +58,9 @@ @tags("llm", "zero_shot", "few_shot") @tasks("text_classification", "text_summarization") -def Robustness(model, dataset, num_tests=10, judge_llm=None): +def Robustness( + model, dataset, num_tests=10, judge_llm=None +) -> Tuple[List[Dict[str, Any]], bool, RawData]: """ Assesses the robustness of prompts provided to a Large Language Model under varying conditions and contexts. This test specifically measures the model's ability to generate correct classifications with the given prompt even when the diff --git a/validmind/tests/prompt_validation/Specificity.py b/validmind/tests/prompt_validation/Specificity.py index 10cf4ee5a..54d54b197 100644 --- a/validmind/tests/prompt_validation/Specificity.py +++ b/validmind/tests/prompt_validation/Specificity.py @@ -2,6 +2,8 @@ # See the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +from typing import Any, Dict, List, Tuple + from validmind import RawData, tags, tasks from validmind.errors import MissingRequiredTestInputError @@ -52,7 +54,9 @@ @tags("llm", "zero_shot", "few_shot") @tasks("text_classification", "text_summarization") -def Specificity(model, min_threshold=7, judge_llm=None): +def Specificity( + model, min_threshold=7, judge_llm=None +) -> Tuple[List[Dict[str, Any]], bool, RawData]: """ Evaluates and scores the specificity of prompts provided to a Large Language Model (LLM), based on clarity, detail, and relevance. diff --git a/validmind/unit_metrics/classification/Accuracy.py b/validmind/unit_metrics/classification/Accuracy.py index 5bd3f6d95..a341c99f7 100644 --- a/validmind/unit_metrics/classification/Accuracy.py +++ b/validmind/unit_metrics/classification/Accuracy.py @@ -5,10 +5,11 @@ from sklearn.metrics import accuracy_score from validmind import tags, tasks +from validmind.vm_models import VMDataset, VMModel @tasks("classification") @tags("classification") -def Accuracy(dataset, model): +def Accuracy(dataset: VMDataset, model: VMModel) -> float: """Calculates the accuracy of a model""" return accuracy_score(dataset.y, dataset.y_pred(model)) diff --git a/validmind/unit_metrics/classification/F1.py b/validmind/unit_metrics/classification/F1.py index 62bda0d30..ea302571a 100644 --- a/validmind/unit_metrics/classification/F1.py +++ b/validmind/unit_metrics/classification/F1.py @@ -5,10 +5,11 @@ from sklearn.metrics import f1_score from validmind import tags, tasks +from validmind.vm_models import VMDataset, VMModel @tasks("classification") @tags("classification") -def F1(model, dataset, **kwargs): +def F1(model: VMModel, dataset: VMDataset, **kwargs) -> float: """Calculates the F1 score for a classification model.""" return f1_score(dataset.y, dataset.y_pred(model), **kwargs) diff --git a/validmind/unit_metrics/classification/Precision.py b/validmind/unit_metrics/classification/Precision.py index fa35504ce..3523d080d 100644 --- a/validmind/unit_metrics/classification/Precision.py +++ b/validmind/unit_metrics/classification/Precision.py @@ -5,10 +5,11 @@ from sklearn.metrics import precision_score from validmind import tags, tasks +from validmind.vm_models import VMDataset, VMModel @tasks("classification") @tags("classification") -def Precision(model, dataset, **kwargs): +def Precision(model: VMModel, dataset: VMDataset, **kwargs) -> float: """Calculates the precision for a classification model.""" return precision_score(dataset.y, dataset.y_pred(model), **kwargs) diff --git a/validmind/unit_metrics/classification/ROC_AUC.py b/validmind/unit_metrics/classification/ROC_AUC.py index d362369e2..1abdb07b5 100644 --- a/validmind/unit_metrics/classification/ROC_AUC.py +++ b/validmind/unit_metrics/classification/ROC_AUC.py @@ -7,11 +7,12 @@ from sklearn.preprocessing import LabelBinarizer from validmind import tags, tasks +from validmind.vm_models import VMDataset, VMModel @tasks("classification") @tags("classification") -def ROC_AUC(model, dataset, **kwargs): +def ROC_AUC(model: VMModel, dataset: VMDataset, **kwargs) -> float: """Calculates the ROC AUC for a classification model.""" y_true = dataset.y diff --git a/validmind/unit_metrics/classification/Recall.py b/validmind/unit_metrics/classification/Recall.py index a0668142f..6f88e4e05 100644 --- a/validmind/unit_metrics/classification/Recall.py +++ b/validmind/unit_metrics/classification/Recall.py @@ -5,10 +5,11 @@ from sklearn.metrics import recall_score from validmind import tags, tasks +from validmind.vm_models import VMDataset, VMModel @tasks("classification") @tags("classification") -def Recall(model, dataset, **kwargs): +def Recall(model: VMModel, dataset: VMDataset, **kwargs) -> float: """Calculates the recall for a classification model.""" return recall_score(dataset.y, dataset.y_pred(model), **kwargs) diff --git a/validmind/unit_metrics/regression/AdjustedRSquaredScore.py b/validmind/unit_metrics/regression/AdjustedRSquaredScore.py index b4003e132..ef0507254 100644 --- a/validmind/unit_metrics/regression/AdjustedRSquaredScore.py +++ b/validmind/unit_metrics/regression/AdjustedRSquaredScore.py @@ -5,11 +5,12 @@ from sklearn.metrics import r2_score as _r2_score from validmind import tags, tasks +from validmind.vm_models import VMDataset, VMModel @tags("regression") @tasks("regression") -def AdjustedRSquaredScore(model, dataset): +def AdjustedRSquaredScore(model: VMModel, dataset: VMDataset) -> float: """Calculates the adjusted R-squared score for a regression model.""" r2_score = _r2_score( dataset.y, diff --git a/validmind/unit_metrics/regression/GiniCoefficient.py b/validmind/unit_metrics/regression/GiniCoefficient.py index eee6afd88..a40a58c22 100644 --- a/validmind/unit_metrics/regression/GiniCoefficient.py +++ b/validmind/unit_metrics/regression/GiniCoefficient.py @@ -5,11 +5,12 @@ import numpy as np from validmind import tags, tasks +from validmind.vm_models import VMDataset, VMModel @tags("regression") @tasks("regression") -def GiniCoefficient(dataset, model): +def GiniCoefficient(dataset: VMDataset, model: VMModel) -> float: """Calculates the Gini coefficient for a regression model.""" y_true = dataset.y y_pred = dataset.y_pred(model) diff --git a/validmind/unit_metrics/regression/HuberLoss.py b/validmind/unit_metrics/regression/HuberLoss.py index 511d2e650..8db2d2864 100644 --- a/validmind/unit_metrics/regression/HuberLoss.py +++ b/validmind/unit_metrics/regression/HuberLoss.py @@ -5,11 +5,12 @@ import numpy as np from validmind import tags, tasks +from validmind.vm_models import VMDataset, VMModel @tags("regression") @tasks("regression") -def HuberLoss(model, dataset): +def HuberLoss(model: VMModel, dataset: VMDataset) -> float: """Calculates the Huber loss for a regression model.""" y_true = dataset.y y_pred = dataset.y_pred(model) diff --git a/validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py b/validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py index c51a6171f..817ae4f72 100644 --- a/validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +++ b/validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py @@ -5,11 +5,12 @@ import numpy as np from validmind import tags, tasks +from validmind.vm_models import VMDataset, VMModel @tags("regression") @tasks("regression") -def KolmogorovSmirnovStatistic(dataset, model): +def KolmogorovSmirnovStatistic(dataset: VMDataset, model: VMModel) -> float: """Calculates the Kolmogorov-Smirnov statistic for a regression model.""" y_true = dataset.y.flatten() y_pred = dataset.y_pred(model) diff --git a/validmind/unit_metrics/regression/MeanAbsoluteError.py b/validmind/unit_metrics/regression/MeanAbsoluteError.py index fc29282ed..94aac7972 100644 --- a/validmind/unit_metrics/regression/MeanAbsoluteError.py +++ b/validmind/unit_metrics/regression/MeanAbsoluteError.py @@ -5,10 +5,11 @@ from sklearn.metrics import mean_absolute_error as _mean_absolute_error from validmind import tags, tasks +from validmind.vm_models import VMDataset, VMModel @tags("regression") @tasks("regression") -def MeanAbsoluteError(model, dataset, **kwargs): +def MeanAbsoluteError(model: VMModel, dataset: VMDataset, **kwargs) -> float: """Calculates the mean absolute error for a regression model.""" return _mean_absolute_error(dataset.y, dataset.y_pred(model), **kwargs) diff --git a/validmind/unit_metrics/regression/MeanAbsolutePercentageError.py b/validmind/unit_metrics/regression/MeanAbsolutePercentageError.py index 2e0b1b700..e6703c3ab 100644 --- a/validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +++ b/validmind/unit_metrics/regression/MeanAbsolutePercentageError.py @@ -5,11 +5,12 @@ import numpy as np from validmind import tags, tasks +from validmind.vm_models import VMDataset, VMModel @tags("regression") @tasks("regression") -def MeanAbsolutePercentageError(model, dataset): +def MeanAbsolutePercentageError(model: VMModel, dataset: VMDataset) -> float: """Calculates the mean absolute percentage error for a regression model.""" y_true = dataset.y y_pred = dataset.y_pred(model) diff --git a/validmind/unit_metrics/regression/MeanBiasDeviation.py b/validmind/unit_metrics/regression/MeanBiasDeviation.py index 0ae616bc7..446e9b620 100644 --- a/validmind/unit_metrics/regression/MeanBiasDeviation.py +++ b/validmind/unit_metrics/regression/MeanBiasDeviation.py @@ -5,10 +5,11 @@ import numpy as np from validmind import tags, tasks +from validmind.vm_models import VMDataset, VMModel @tags("regression") @tasks("regression") -def MeanBiasDeviation(model, dataset): +def MeanBiasDeviation(model: VMModel, dataset: VMDataset) -> float: """Calculates the mean bias deviation for a regression model.""" return np.mean(dataset.y - dataset.y_pred(model)) diff --git a/validmind/unit_metrics/regression/MeanSquaredError.py b/validmind/unit_metrics/regression/MeanSquaredError.py index 2d49e38a2..b4943b95a 100644 --- a/validmind/unit_metrics/regression/MeanSquaredError.py +++ b/validmind/unit_metrics/regression/MeanSquaredError.py @@ -5,10 +5,11 @@ from sklearn.metrics import mean_squared_error from validmind import tags, tasks +from validmind.vm_models import VMDataset, VMModel @tags("regression") @tasks("regression") -def MeanSquaredError(model, dataset, **kwargs): +def MeanSquaredError(model: VMModel, dataset: VMDataset, **kwargs) -> float: """Calculates the mean squared error for a regression model.""" return mean_squared_error(dataset.y, dataset.y_pred(model), **kwargs) diff --git a/validmind/unit_metrics/regression/QuantileLoss.py b/validmind/unit_metrics/regression/QuantileLoss.py index 35967fcd8..0c2b86826 100644 --- a/validmind/unit_metrics/regression/QuantileLoss.py +++ b/validmind/unit_metrics/regression/QuantileLoss.py @@ -9,7 +9,7 @@ @tags("regression") @tasks("regression") -def QuantileLoss(model, dataset, quantile=0.5): +def QuantileLoss(model, dataset, quantile=0.5) -> float: """Calculates the quantile loss for a regression model.""" error = dataset.y - dataset.y_pred(model) diff --git a/validmind/unit_metrics/regression/RSquaredScore.py b/validmind/unit_metrics/regression/RSquaredScore.py index 85df74433..1d53212ae 100644 --- a/validmind/unit_metrics/regression/RSquaredScore.py +++ b/validmind/unit_metrics/regression/RSquaredScore.py @@ -5,10 +5,11 @@ from sklearn.metrics import r2_score from validmind import tags, tasks +from validmind.vm_models import VMDataset, VMModel @tags("regression") @tasks("regression") -def RSquaredScore(model, dataset): +def RSquaredScore(model: VMModel, dataset: VMDataset) -> float: """Calculates the R-squared score for a regression model.""" return r2_score(dataset.y, dataset.y_pred(model)) diff --git a/validmind/unit_metrics/regression/RootMeanSquaredError.py b/validmind/unit_metrics/regression/RootMeanSquaredError.py index 146a36da0..d387139b6 100644 --- a/validmind/unit_metrics/regression/RootMeanSquaredError.py +++ b/validmind/unit_metrics/regression/RootMeanSquaredError.py @@ -6,11 +6,12 @@ from sklearn.metrics import mean_squared_error from validmind import tags, tasks +from validmind.vm_models import VMDataset, VMModel @tags("regression") @tasks("regression") -def RootMeanSquaredError(model, dataset, **kwargs): +def RootMeanSquaredError(model: VMModel, dataset: VMDataset, **kwargs) -> float: """Calculates the root mean squared error for a regression model.""" return np.sqrt( mean_squared_error(