From bb29d4c9d93e7aaa1c63144ccc6d578e011671ba Mon Sep 17 00:00:00 2001
From: Adonai Vera <adonai.vera@gmail.com>
Date: Thu, 21 Nov 2024 13:15:54 -0500
Subject: [PATCH 1/3] Added antrophic and gpt4all

---
 README.md                             | 73 ++++++++++++++++++++-----
 requirements.txt                      |  2 +-
 simulation_engine/example-settings.py | 47 +++++++++++++---
 simulation_engine/gpt_structure.py    | 79 +++++++++++++++++++++++++--
 4 files changed, 172 insertions(+), 29 deletions(-)

diff --git a/README.md b/README.md
index 4eac2e9e..c5018e9e 100644
--- a/README.md
+++ b/README.md
@@ -64,19 +64,50 @@ Create a `settings.py` file in the `simulation_engine` folder (where `example-se
 ```python
 from pathlib import Path
 
-OPENAI_API_KEY = "YOUR_API_KEY"
-KEY_OWNER = "YOUR_NAME"
-
-DEBUG = False
-
-MAX_CHUNK_SIZE = 4
-
-LLM_VERS = "gpt-4o-mini"
-
-BASE_DIR = f"{Path(__file__).resolve().parent.parent}"
-
-POPULATIONS_DIR = f"{BASE_DIR}/agent_bank/populations"
-LLM_PROMPT_DIR = f"{BASE_DIR}/simulation_engine/prompt_template"
+# API Keys
+OPENAI_API_KEY = "API_KEY"  # Replace with your actual OpenAI API key
+ANTHROPIC_API_KEY = "API_KEY"  # Replace with your actual Anthropic API key
+
+# Owner Information
+KEY_OWNER = "NAME"  # Replace with the name of the key owner
+
+# Debugging Configuration
+DEBUG = False  # Set to True for enabling debug logs
+
+# Configuration for Chunk Size
+MAX_CHUNK_SIZE = 4  # Maximum size of data chunks to process
+
+# LLM Configuration
+LLM_VERS = "claude-3-5-sonnet-20241022"  
+# Options: 
+# - "gpt-4o-mini" (OpenAI GPT model)
+# - "claude-3-5-sonnet-20241022" (Anthropic Claude model)
+# - "gpt4all" (Open-source GPT model)
+
+# GPT4All Model Settings
+LLM_MODEL = "MODEL GPT4ALL"  
+# Options: 
+# - "orca-mini-3b-gguf2-q4_0.gguf" (3 Billion Parameters, 4GB RAM)
+# - "Meta-Llama-3-8B-Instruct.Q4_0.gguf" (8 Billion Parameters, 8GB RAM)
+# - "Nous-Hermes-2-Mistral-7B-DPO.Q4_0.gguf" (7 Billion Parameters, 8GB RAM)
+
+# Notes:
+# - Choose the model based on your hardware capabilities and task requirements.
+# - Ensure you have sufficient RAM to load the selected model.
+# - Visit 📖 [GPT4All Documentation](https://docs.gpt4all.io/gpt4all_python/home.html) for detailed information.
+
+# Base Directory
+BASE_DIR = Path(__file__).resolve().parent.parent
+
+# Directory Configurations
+# - Populations Directory: Used for managing agent populations
+# - Prompt Template Directory: Contains LLM prompt templates
+POPULATIONS_DIR = BASE_DIR / "agent_bank" / "populations"
+LLM_PROMPT_DIR = BASE_DIR / "simulation_engine" / "prompt_template"
+
+# Note:
+# - Ensure `POPULATIONS_DIR` and `LLM_PROMPT_DIR` exist in your project structure.
+# - Adjust the paths as needed for your specific setup.
 ```
 
 Replace `"YOUR_API_KEY"` with your actual OpenAI API key and `"YOUR_NAME"` with your name.
@@ -219,6 +250,22 @@ print(response["responses"])
 
 Due to participant privacy concerns, the full agent bank containing over 1,000 generative agents based on real interviews is not publicly available at the moment. However, we plan to make aggregated responses on fixed tasks accessible for general research use in the coming months. Researchers interested in accessing individual responses on open tasks can request restricted access by contacting the authors and following a review process that ensures ethical considerations are met.
 
+## Test run local model with GPT4ALL
+GPT4All supports a wide range of open-source models optimized for diverse use cases, including general language understanding, code generation, and specialized tasks. Below are some commonly used models:
+
+| Model Name                                 | Filesize | RAM Required | Parameters | Quantization | Developer           | License            |
+|-------------------------------------------|----------|--------------|------------|--------------|---------------------|--------------------|
+| **Meta-Llama-3-8B-Instruct.Q4_0.gguf**    | 4.66 GB  | 8 GB         | 8 Billion  | q4_0         | Meta                | [Llama 3 License](https://llama-license-link.com) |
+| **Nous-Hermes-2-Mistral-7B-DPO.Q4_0.gguf**| 4.11 GB  | 8 GB         | 7 Billion  | q4_0         | Mistral & Nous Research | [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0) |
+| **Phi-3-mini-4k-instruct.Q4_0.gguf**      | 2.18 GB  | 4 GB         | 3.8 Billion| q4_0         | Microsoft           | [MIT](https://opensource.org/licenses/MIT) |
+| **orca-mini-3b-gguf2-q4_0.gguf**          | 1.98 GB  | 4 GB         | 3 Billion  | q4_0         | Microsoft           | [CC-BY-NC-SA-4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/) |
+| **gpt4all-13b-snoozy-q4_0.gguf**          | 7.37 GB  | 16 GB        | 13 Billion | q4_0         | Nomic AI            | [GPL](https://www.gnu.org/licenses/gpl-3.0.html) |
+
+
+For the complete list of models and detailed documentation on installation, configuration, and usage, visit the official GPT4All Python library documentation:
+📖 [GPT4All Documentation](https://docs.gpt4all.io/gpt4all_python/home.html)
+
+
 ## Contributing
 
 We welcome contributions to enhance the functionality and usability of this project. If you are interested in contributing, please follow these steps:
diff --git a/requirements.txt b/requirements.txt
index 295c5509..07d5f7a9 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -12,4 +12,4 @@ pydantic==2.9.2
 pydantic_core==2.23.4
 sniffio==1.3.1
 tqdm==4.67.0
-typing_extensions==4.12.2
+typing_extensions==4.12.2
\ No newline at end of file
diff --git a/simulation_engine/example-settings.py b/simulation_engine/example-settings.py
index bf6db3a2..2bb3ea96 100644
--- a/simulation_engine/example-settings.py
+++ b/simulation_engine/example-settings.py
@@ -1,17 +1,46 @@
 from pathlib import Path
 
-OPENAI_API_KEY = "API_KEY"
-KEY_OWNER = "NAME"
+# API Keys
+OPENAI_API_KEY = "API_KEY"  # Replace with your actual OpenAI API key
+ANTHROPIC_API_KEY = "API_KEY"  # Replace with your actual Anthropic API key
 
+# Owner Information
+KEY_OWNER = "NAME"  # Replace with the name of the key owner
 
-DEBUG = False
+# Debugging Configuration
+DEBUG = False  # Set to True for enabling debug logs
 
-MAX_CHUNK_SIZE = 4
+# Configuration for Chunk Size
+MAX_CHUNK_SIZE = 4  # Maximum size of data chunks to process
 
-LLM_VERS = "gpt-4o-mini"
+# LLM Configuration
+LLM_VERS = "claude-3-5-sonnet-20241022"  
+# Options: 
+# - "gpt-4o-mini" (OpenAI GPT model)
+# - "claude-3-5-sonnet-20241022" (Anthropic Claude model)
+# - "gpt4all" (Open-source GPT model)
 
-BASE_DIR = f"{Path(__file__).resolve().parent.parent}"
+# GPT4All Model Settings
+LLM_MODEL = "MODEL GPT4ALL"  
+# Options: 
+# - "orca-mini-3b-gguf2-q4_0.gguf" (3 Billion Parameters, 4GB RAM)
+# - "Meta-Llama-3-8B-Instruct.Q4_0.gguf" (8 Billion Parameters, 8GB RAM)
+# - "Nous-Hermes-2-Mistral-7B-DPO.Q4_0.gguf" (7 Billion Parameters, 8GB RAM)
 
-## To do: Are the following needed in the new structure? Ideally Populations_Dir is for the user to define.
-POPULATIONS_DIR = f"{BASE_DIR}/agent_bank/populations" 
-LLM_PROMPT_DIR = f"{BASE_DIR}/simulation_engine/prompt_template"
\ No newline at end of file
+# Notes:
+# - Choose the model based on your hardware capabilities and task requirements.
+# - Ensure you have sufficient RAM to load the selected model.
+# - Visit 📖 [GPT4All Documentation](https://docs.gpt4all.io/gpt4all_python/home.html) for detailed information.
+
+# Base Directory
+BASE_DIR = Path(__file__).resolve().parent.parent
+
+# Directory Configurations
+# - Populations Directory: Used for managing agent populations
+# - Prompt Template Directory: Contains LLM prompt templates
+POPULATIONS_DIR = BASE_DIR / "agent_bank" / "populations"
+LLM_PROMPT_DIR = BASE_DIR / "simulation_engine" / "prompt_template"
+
+# Note:
+# - Ensure `POPULATIONS_DIR` and `LLM_PROMPT_DIR` exist in your project structure.
+# - Adjust the paths as needed for your specific setup.
diff --git a/simulation_engine/gpt_structure.py b/simulation_engine/gpt_structure.py
index 24c7f9ba..6d42d8da 100644
--- a/simulation_engine/gpt_structure.py
+++ b/simulation_engine/gpt_structure.py
@@ -5,9 +5,40 @@
 
 from simulation_engine.settings import *
 
+# Conditional import for GPT4All
+gpt4all_instance = None
+if LLM_VERS == "gpt4all":
+  try:
+    from gpt4all import GPT4All, Embed4All
+  except ImportError:
+    raise ImportError(
+      "The 'gpt4all' library is not installed. Please install it with 'pip install gpt4all' to use GPT4All models."
+    )
+  try:
+    gpt4all_instance = GPT4All(LLM_MODEL, n_ctx=28672)
+    gpt4all_embeddings = Embed4All("nomic-embed-text-v1.5.f16.gguf")
+  except Exception as e:
+    raise RuntimeError(
+      f"Failed to initialize GPT4All with the model '{LLM_MODEL}'. "
+      "Ensure the model file exists and is correctly configured."
+    ) from e
+elif LLM_VERS.startswith("claude"):
+  try:
+    import anthropic
+  except ImportError:
+    raise ImportError(
+      "The 'anthropic' library is not installed. Please install it with 'pip install anthropic' to use anthropic models."
+    )
+  try:
+    anthropic_client = anthropic.Client(api_key=ANTHROPIC_API_KEY)
+  except Exception as e:
+    raise RuntimeError(
+      f"Failed to initialize anthropic with the model '{LLM_VERS}'. "
+      "Ensure the model file exists and is correctly configured."
+    ) from e
+    
 openai.api_key = OPENAI_API_KEY
 
-
 # ============================================================================
 # #######################[SECTION 1: HELPER FUNCTIONS] #######################
 # ============================================================================
@@ -53,7 +84,7 @@ def generate_prompt(prompt_input: Union[str, List[str]],
 def gpt_request(prompt: str, 
                 model: str = "gpt-4o", 
                 max_tokens: int = 1500) -> str:
-  """Make a request to OpenAI's GPT model."""
+  """Make a request to OpenAI or GPT4All based on LLM_VERS."""
   if model == "o1-preview": 
     try:
       client = openai.OpenAI(api_key=OPENAI_API_KEY)
@@ -64,7 +95,34 @@ def gpt_request(prompt: str,
       return response.choices[0].message.content
     except Exception as e:
       return f"GENERATION ERROR: {str(e)}"
+  if LLM_VERS == "gpt4all":
+    try:
+      response = gpt4all_instance.generate(
+        prompt=prompt, 
+        max_tokens=max_tokens,
+        temp=0.7
+      )
+
+      return response
+    except Exception as e:
+      raise ImportError(
+        f"GENERATION ERROR GPT4ALL: {str(e)}"
+      )
+  elif LLM_VERS.startswith("claude"):
+    try:
+      response = anthropic_client.messages.create(
+        model=LLM_VERS, 
+        messages=[{"role": "user", "content": prompt}],
+        max_tokens=max_tokens,
+        temperature=0.7,
+      )
 
+      return response.content[0].text
+    except Exception as e:
+      raise ImportError(
+        f"GENERATION ERROR ANTTHROPIC: {str(e)}"
+      )
+    
   try:
     client = openai.OpenAI(api_key=OPENAI_API_KEY)
     response = client.chat.completions.create(
@@ -75,8 +133,10 @@ def gpt_request(prompt: str,
     )
     return response.choices[0].message.content
   except Exception as e:
-    return f"GENERATION ERROR: {str(e)}"
-
+    raise ImportError(
+      f"GENERATION ERROR OPENAI: {str(e)}"
+    )
+  
 
 def gpt4_vision(messages: List[dict], max_tokens: int = 1500) -> str:
   """Make a request to OpenAI's GPT-4 Vision model."""
@@ -159,8 +219,15 @@ def get_text_embedding(text: str,
     raise ValueError("Input text must be a non-empty string.")
 
   text = text.replace("\n", " ").strip()
-  response = openai.embeddings.create(
-    input=[text], model=model).data[0].embedding
+
+  if LLM_VERS == "gpt4allx":
+    # Temporal solution to get the same embedding twice
+    response = list(gpt4all_embeddings.embed(text=[text], dimensionality=768)[0]) + list(gpt4all_embeddings.embed(text=[text], dimensionality=768)[0]) 
+
+  else:
+    response = openai.embeddings.create(
+      input=[text], model=model).data[0].embedding
+    
   return response
 
 

From 7ec572ef3e3d5250aae9088cd7a72b026e554c50 Mon Sep 17 00:00:00 2001
From: Adonai Vera <adonai.vera@gmail.com>
Date: Thu, 21 Nov 2024 13:34:16 -0500
Subject: [PATCH 2/3] Fixed the readme to add more information about models
 with gpt4all

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index c5018e9e..67457c6a 100644
--- a/README.md
+++ b/README.md
@@ -264,7 +264,7 @@ GPT4All supports a wide range of open-source models optimized for diverse use ca
 
 For the complete list of models and detailed documentation on installation, configuration, and usage, visit the official GPT4All Python library documentation:
 📖 [GPT4All Documentation](https://docs.gpt4all.io/gpt4all_python/home.html)
-
+📖 [GPT4All Internal Documentation](https://raw.githubusercontent.com/nomic-ai/gpt4all/main/gpt4all-chat/metadata/models3.json)
 
 ## Contributing
 

From d463735ea134ead24bab27d050a6685f456bfab7 Mon Sep 17 00:00:00 2001
From: Adonai Vera <adonai.vera@gmail.com>
Date: Fri, 22 Nov 2024 14:49:06 -0500
Subject: [PATCH 3/3] fix typo x

---
 simulation_engine/gpt_structure.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/simulation_engine/gpt_structure.py b/simulation_engine/gpt_structure.py
index 6d42d8da..c37459b4 100644
--- a/simulation_engine/gpt_structure.py
+++ b/simulation_engine/gpt_structure.py
@@ -220,7 +220,7 @@ def get_text_embedding(text: str,
 
   text = text.replace("\n", " ").strip()
 
-  if LLM_VERS == "gpt4allx":
+  if LLM_VERS == "gpt4all":
     # Temporal solution to get the same embedding twice
     response = list(gpt4all_embeddings.embed(text=[text], dimensionality=768)[0]) + list(gpt4all_embeddings.embed(text=[text], dimensionality=768)[0])