From 4515d5dece28516b358ba1ca34c9f6185342c1ec Mon Sep 17 00:00:00 2001 From: Anil Sorathiya Date: Tue, 27 Jan 2026 15:12:50 +0000 Subject: [PATCH 1/3] add langgraph installation in the notebook --- .../agents/banking_test_dataset.py | 32 ------------------- .../langgraph_agent_simple_banking_demo.ipynb | 27 +++++++++++----- validmind/tests/load.py | 8 ++--- 3 files changed, 23 insertions(+), 44 deletions(-) diff --git a/notebooks/code_samples/agents/banking_test_dataset.py b/notebooks/code_samples/agents/banking_test_dataset.py index bd2793169..895b1e97f 100644 --- a/notebooks/code_samples/agents/banking_test_dataset.py +++ b/notebooks/code_samples/agents/banking_test_dataset.py @@ -12,14 +12,6 @@ "session_id": str(uuid.uuid4()), "category": "credit_risk" }, - { - "input": "Evaluate credit risk for a business loan of $250,000 with monthly revenue of $85,000 and existing debt of $45,000 and credit score of 650", - "expected_tools": ["credit_risk_analyzer"], - "possible_outputs": ["MEDIUM RISK", "HIGH RISK", "business loan", "debt service coverage ratio", "1.8", "annual revenue", "$1,020,000", "risk score", "650"], - "expected_output": "MEDIUM RISK", # Example, adjust as needed - "session_id": str(uuid.uuid4()), - "category": "credit_risk" - }, { "input": "Check account balance for checking account 12345", "expected_tools": ["customer_account_manager"], @@ -45,29 +37,5 @@ "expected_output": "High-Yield Savings Account (2.5% APY)", # Example, adjust as needed "session_id": str(uuid.uuid4()), "category": "account_management" - }, - { - "input": "Investigate suspicious transactions totaling $75,000 across multiple accounts in the last week", - "expected_tools": ["fraud_detection_system"], - "possible_outputs": ["Require additional verification", "Implement 24-hour delay for verification"], - "expected_output": "Require additional verification", # Example, adjust as needed - "session_id": str(uuid.uuid4()), - "category": "fraud_detection" - }, - { - "input": "Assess credit risk for a $1,000,000 commercial real estate loan with $500,000 annual business income", - "expected_tools": ["credit_risk_analyzer"], - "possible_outputs": ["HIGH RISK", "VERY HIGH RISK", "loan-to-value", "66.7%", "debt service coverage", "2.0"], - "expected_output": "HIGH RISK", # Example, adjust as needed - "session_id": str(uuid.uuid4()), - "category": "credit_risk" - }, - { - "input": "Update customer contact information and address for account holder 22334", - "expected_tools": ["customer_account_manager"], - "possible_outputs": ["not found in system", "Customer ID 22334 not found in system.", "not found"], - "expected_output": "Customer ID 22334 not found in system.", # Example, adjust as needed - "session_id": str(uuid.uuid4()), - "category": "account_management" } ]) diff --git a/notebooks/code_samples/agents/langgraph_agent_simple_banking_demo.ipynb b/notebooks/code_samples/agents/langgraph_agent_simple_banking_demo.ipynb index 9afebb2e6..3530cd954 100644 --- a/notebooks/code_samples/agents/langgraph_agent_simple_banking_demo.ipynb +++ b/notebooks/code_samples/agents/langgraph_agent_simple_banking_demo.ipynb @@ -158,7 +158,7 @@ "metadata": {}, "outputs": [], "source": [ - "%pip install -q \"validmind[llm]\" " + "%pip install -q \"validmind[llm]\" \"langgraph==0.3.21\"" ] }, { @@ -457,6 +457,9 @@ " - Be professional and thorough in your analysis\n", "\n", " Choose and use tools wisely to provide the most helpful banking assistance.\n", + " Describe the response in user friendly manner with details describing the tool output. \n", + " Provide the response in at least 500 words.\n", + " Generate a concise execution plan for the banking request.\n", " \"\"\"\n", "# Initialize the main LLM for banking responses\n", "main_llm = ChatOpenAI(\n", @@ -736,7 +739,7 @@ "\n", "vm_test_dataset = vm.init_dataset(\n", " input_id=\"banking_test_dataset\",\n", - " dataset=banking_test_dataset.sample(2),\n", + " dataset=banking_test_dataset,\n", " text_column=\"input\",\n", " target_column=\"possible_outputs\",\n", ")\n", @@ -768,7 +771,7 @@ "\n", "print(\"Banking Agent Predictions Generated Successfully!\")\n", "print(f\"Predictions assigned to {len(vm_test_dataset._df)} test cases\")\n", - "vm_test_dataset._df.head()" + "vm_test_dataset._df" ] }, { @@ -840,6 +843,15 @@ "result.log()" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vm_test_dataset.df.head(5)" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -1108,8 +1120,7 @@ " agent_output_column = \"banking_agent_model_output\",\n", "\n", ")\n", - "vm_test_dataset._df.head()\n", - "\n" + "vm_test_dataset._df.head()" ] }, { @@ -1162,7 +1173,7 @@ " \"ylabel\": \"Score\",\n", " \"figsize\": (8, 6)\n", " }\n", - ").log()\n" + ").log()" ] }, { @@ -1479,9 +1490,9 @@ ], "metadata": { "kernelspec": { - "display_name": "ValidMind (Poetry)", + "display_name": "validmind-1QuffXMV-py3.11", "language": "python", - "name": "validmind" + "name": "python3" }, "language_info": { "codemirror_mode": { diff --git a/validmind/tests/load.py b/validmind/tests/load.py index 4dc97d11a..9a9f13c53 100644 --- a/validmind/tests/load.py +++ b/validmind/tests/load.py @@ -127,7 +127,9 @@ def _inspect_signature( return inputs, params -def _get_test_function_from_provider(test_id: str, namespace: str) -> Callable[..., Any]: +def _get_test_function_from_provider( + test_id: str, namespace: str +) -> Callable[..., Any]: """Load a test function from the appropriate provider or scorer store. Args: @@ -146,9 +148,7 @@ def _get_test_function_from_provider(test_id: str, namespace: str) -> Callable[. return custom_scorer if not test_provider_store.has_test_provider(namespace): - raise LoadTestError( - f"No test provider found for namespace: {namespace}" - ) + raise LoadTestError(f"No test provider found for namespace: {namespace}") provider = test_provider_store.get_test_provider(namespace) From e56504f8f79f38f778ee462d3836a935fec7d0c8 Mon Sep 17 00:00:00 2001 From: Anil Sorathiya Date: Tue, 27 Jan 2026 16:54:33 +0000 Subject: [PATCH 2/3] remove toxicity test from notebook --- .../langgraph_agent_simple_banking_demo.ipynb | 25 ------------------- 1 file changed, 25 deletions(-) diff --git a/notebooks/code_samples/agents/langgraph_agent_simple_banking_demo.ipynb b/notebooks/code_samples/agents/langgraph_agent_simple_banking_demo.ipynb index 3530cd954..5d5f99771 100644 --- a/notebooks/code_samples/agents/langgraph_agent_simple_banking_demo.ipynb +++ b/notebooks/code_samples/agents/langgraph_agent_simple_banking_demo.ipynb @@ -1394,31 +1394,6 @@ ").log()" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "### Toxicity\n", - "\n", - "Let's ensure responses are professional and appropriate for banking contexts." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run_test(\n", - " \"validmind.data_validation.nlp.Toxicity\",\n", - " inputs={\n", - " \"dataset\": vm_test_dataset,\n", - " },\n", - ").log()" - ] - }, { "cell_type": "markdown", "metadata": {}, From c6d19bc30d5a523abd4e53977ac64582caaab935 Mon Sep 17 00:00:00 2001 From: Anil Sorathiya Date: Wed, 28 Jan 2026 11:46:55 +0000 Subject: [PATCH 3/3] remove stepefficiency description from notebook --- .../agents/langgraph_agent_simple_banking_demo.ipynb | 1 - notebooks/code_sharing/deepeval_integration_demo.ipynb | 1 - 2 files changed, 2 deletions(-) diff --git a/notebooks/code_samples/agents/langgraph_agent_simple_banking_demo.ipynb b/notebooks/code_samples/agents/langgraph_agent_simple_banking_demo.ipynb index 5d5f99771..46a3aa9e8 100644 --- a/notebooks/code_samples/agents/langgraph_agent_simple_banking_demo.ipynb +++ b/notebooks/code_samples/agents/langgraph_agent_simple_banking_demo.ipynb @@ -1006,7 +1006,6 @@ "3. **Execution Layer** – Measures end-to-end performance:\n", "\n", " * *TaskCompletionMetric* – whether the agent successfully completes the intended task\n", - " * *StepEfficiencyMetric* – whether the agent avoids unnecessary or redundant steps\n", "\n", "Together, these metrics enable granular diagnosis of agent behavior, help pinpoint where failures occur (reasoning, action, or execution), and support both development benchmarking and production monitoring." ] diff --git a/notebooks/code_sharing/deepeval_integration_demo.ipynb b/notebooks/code_sharing/deepeval_integration_demo.ipynb index d7fd31f85..ac8d0bafc 100644 --- a/notebooks/code_sharing/deepeval_integration_demo.ipynb +++ b/notebooks/code_sharing/deepeval_integration_demo.ipynb @@ -847,7 +847,6 @@ "3. **Execution Layer** – Measures end-to-end performance:\n", "\n", " * *TaskCompletionMetric* – whether the agent successfully completes the intended task\n", - " * *StepEfficiencyMetric* – whether the agent avoids unnecessary or redundant steps\n", "\n", "Together, these metrics enable granular diagnosis of agent behavior, help pinpoint where failures occur (reasoning, action, or execution), and support both development benchmarking and production monitoring." ]