awslabs · apbrwn · Mar 19, 2025 · Mar 19, 2025 · Mar 19, 2025 · Mar 19, 2025
diff --git a/README.md b/README.md
@@ -11,18 +11,18 @@ Agent Evaluation is a generative AI-powered framework for testing virtual agents
 
 Internally, Agent Evaluation implements an LLM agent (evaluator) that will orchestrate conversations with your own agent (target) and evaluate the responses during the conversation.
 
-## ✨ Key features
+## Key features
 
 - Built-in support for popular AWS services including [Amazon Bedrock](https://aws.amazon.com/bedrock/), [Amazon Q Business](https://aws.amazon.com/q/business/), and [Amazon SageMaker](https://aws.amazon.com/sagemaker/). You can also [bring your own agent](https://awslabs.github.io/agent-evaluation/targets/custom_targets/) to test using Agent Evaluation.
 - Orchestrate concurrent, multi-turn conversations with your agent while evaluating its responses.
 - Define [hooks](https://awslabs.github.io/agent-evaluation/hooks/) to perform additional tasks such as integration testing.
 - Can be incorporated into CI/CD pipelines to expedite the time to delivery while maintaining the stability of agents in production environments.
 
-## 📚 Documentation
+## Documentation
 
 To get started, please visit the full documentation [here](https://awslabs.github.io/agent-evaluation/). To contribute, please refer to [CONTRIBUTING.md](./CONTRIBUTING.md)
 
-## 👏 Contributors
+## Contributors
 
 Shout out to these awesome contributors:
 

diff --git a/src/agenteval/evaluators/canonical/evaluator.py b/src/agenteval/evaluators/canonical/evaluator.py
@@ -13,9 +13,11 @@
 )
 from agenteval.test import TestResult
 
+from pathlib import Path
+
 logger = logging.getLogger(__name__)
 
-_PROMPT_TEMPLATE_ROOT = "evaluators/canonical"
+_PROMPT_TEMPLATE_ROOT = Path("evaluators/canonical")
 _SYSTEM_PROMPT_DIR = "system"
 _RUNTIME_PROMPT_DIR = "runtime"
 _PROMPT_TEMPLATE_NAMES = [
@@ -65,18 +67,13 @@ def __init__(
         """Initialize the evaluator."""
         super().__init__(**kwargs)
 
+        system_path = _PROMPT_TEMPLATE_ROOT / _SYSTEM_PROMPT_DIR
+        prompt_path = _PROMPT_TEMPLATE_ROOT / _RUNTIME_PROMPT_DIR
+
         self._prompt_template_map = {
             name: {
-                "system": jinja_env.get_template(
-                    os.path.join(
-                        _PROMPT_TEMPLATE_ROOT, _SYSTEM_PROMPT_DIR, f"{name}.jinja"
-                    )
-                ),
-                "prompt": jinja_env.get_template(
-                    os.path.join(
-                        _PROMPT_TEMPLATE_ROOT, _RUNTIME_PROMPT_DIR, f"{name}.jinja"
-                    )
-                ),
+                "system": jinja_env.get_template(Path.as_posix(system_path / f"{name}.jinja")),
+                "prompt": jinja_env.get_template(Path.as_posix(prompt_path / f"{name}.jinja")),
             }
             for name in _PROMPT_TEMPLATE_NAMES
         }

diff --git a/src/agenteval/evaluators/evaluator_factory.py b/src/agenteval/evaluators/evaluator_factory.py
@@ -17,7 +17,7 @@
 from agenteval.test import Test
 
 _EVALUATOR_METHOD_MAP = {
-    "canoncial": CanonicalEvaluator,
+    "canonical": CanonicalEvaluator,
 }
 
 _DEFAULT_MODEL_CONFIG_MAP = {

diff --git a/src/agenteval/evaluators/model_config/__init__.py b/src/agenteval/evaluators/model_config/__init__.py
@@ -0,0 +1,7 @@
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+from .bedrock_model_config import ModelProvider, BedrockModelConfig
+from .preconfigured_model_configs import DEFAULT_CLAUDE_3_5_MODEL_CONFIG, DEFAULT_CLAUDE_3_MODEL_CONFIG, DEFAULT_CLAUDE_HAIKU_3_5_US_MODEL_CONFIG, DEFAULT_CLAUDE_US_3_7_MODEL_CONFIG, DEFAULT_LLAMA_3_3_70B_US_MODEL_CONFIG
+
+__all__ = ["ModelProvider", "BedrockModelConfig", "DEFAULT_CLAUDE_3_5_MODEL_CONFIG", "DEFAULT_CLAUDE_3_MODEL_CONFIG", "DEFAULT_CLAUDE_HAIKU_3_5_US_MODEL_CONFIG", "DEFAULT_CLAUDE_US_3_7_MODEL_CONFIG", "DEFAULT_LLAMA_3_3_70B_US_MODEL_CONFIG"]
diff --git a/src/agenteval/summary.py b/src/agenteval/summary.py
@@ -7,6 +7,8 @@
 from agenteval.metrics import calculate_pass_rate_metric
 from agenteval.test import Test, TestResult
 
+from pathlib import Path
+
 _TEMPLATE_ROOT = "summary"
 _TEMPLATE_FILE_NAME = "agenteval_summary.md.jinja"
 
@@ -36,8 +38,9 @@ def create_markdown_summary(
     Returns:
         None
     """
-    template = jinja_env.get_template(os.path.join(_TEMPLATE_ROOT, _TEMPLATE_FILE_NAME))
-    summary_path = os.path.join(work_dir, os.path.splitext(_TEMPLATE_FILE_NAME)[0])
+    template_path = Path(_TEMPLATE_ROOT) / _TEMPLATE_FILE_NAME
+    template = jinja_env.get_template(Path.as_posix(template_path))
+    summary_path = Path(Path(work_dir) / Path(_TEMPLATE_FILE_NAME).stem).absolute() 
 
     metrics = {"pass_rate": calculate_pass_rate_metric(pass_count, num_tests)}
 
@@ -49,5 +52,5 @@ def create_markdown_summary(
 
 
 def _write_summary(path: str, summary: str):
-    with open(path, "w+") as f:
+    with open(path, "w+", encoding='utf-8') as f:
         f.write(summary)