Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 60 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,19 +64,50 @@ Create a `settings.py` file in the `simulation_engine` folder (where `example-se
```python
from pathlib import Path

OPENAI_API_KEY = "YOUR_API_KEY"
KEY_OWNER = "YOUR_NAME"

DEBUG = False

MAX_CHUNK_SIZE = 4

LLM_VERS = "gpt-4o-mini"

BASE_DIR = f"{Path(__file__).resolve().parent.parent}"

POPULATIONS_DIR = f"{BASE_DIR}/agent_bank/populations"
LLM_PROMPT_DIR = f"{BASE_DIR}/simulation_engine/prompt_template"
# API Keys
OPENAI_API_KEY = "API_KEY" # Replace with your actual OpenAI API key
ANTHROPIC_API_KEY = "API_KEY" # Replace with your actual Anthropic API key

# Owner Information
KEY_OWNER = "NAME" # Replace with the name of the key owner

# Debugging Configuration
DEBUG = False # Set to True for enabling debug logs

# Configuration for Chunk Size
MAX_CHUNK_SIZE = 4 # Maximum size of data chunks to process

# LLM Configuration
LLM_VERS = "claude-3-5-sonnet-20241022"
# Options:
# - "gpt-4o-mini" (OpenAI GPT model)
# - "claude-3-5-sonnet-20241022" (Anthropic Claude model)
# - "gpt4all" (Open-source GPT model)

# GPT4All Model Settings
LLM_MODEL = "MODEL GPT4ALL"
# Options:
# - "orca-mini-3b-gguf2-q4_0.gguf" (3 Billion Parameters, 4GB RAM)
# - "Meta-Llama-3-8B-Instruct.Q4_0.gguf" (8 Billion Parameters, 8GB RAM)
# - "Nous-Hermes-2-Mistral-7B-DPO.Q4_0.gguf" (7 Billion Parameters, 8GB RAM)

# Notes:
# - Choose the model based on your hardware capabilities and task requirements.
# - Ensure you have sufficient RAM to load the selected model.
# - Visit 📖 [GPT4All Documentation](https://docs.gpt4all.io/gpt4all_python/home.html) for detailed information.

# Base Directory
BASE_DIR = Path(__file__).resolve().parent.parent

# Directory Configurations
# - Populations Directory: Used for managing agent populations
# - Prompt Template Directory: Contains LLM prompt templates
POPULATIONS_DIR = BASE_DIR / "agent_bank" / "populations"
LLM_PROMPT_DIR = BASE_DIR / "simulation_engine" / "prompt_template"

# Note:
# - Ensure `POPULATIONS_DIR` and `LLM_PROMPT_DIR` exist in your project structure.
# - Adjust the paths as needed for your specific setup.
```

Replace `"YOUR_API_KEY"` with your actual OpenAI API key and `"YOUR_NAME"` with your name.
Expand Down Expand Up @@ -219,6 +250,22 @@ print(response["responses"])

Due to participant privacy concerns, the full agent bank containing over 1,000 generative agents based on real interviews is not publicly available at the moment. However, we plan to make aggregated responses on fixed tasks accessible for general research use in the coming months. Researchers interested in accessing individual responses on open tasks can request restricted access by contacting the authors and following a review process that ensures ethical considerations are met.

## Test run local model with GPT4ALL
GPT4All supports a wide range of open-source models optimized for diverse use cases, including general language understanding, code generation, and specialized tasks. Below are some commonly used models:

| Model Name | Filesize | RAM Required | Parameters | Quantization | Developer | License |
|-------------------------------------------|----------|--------------|------------|--------------|---------------------|--------------------|
| **Meta-Llama-3-8B-Instruct.Q4_0.gguf** | 4.66 GB | 8 GB | 8 Billion | q4_0 | Meta | [Llama 3 License](https://llama-license-link.com) |
| **Nous-Hermes-2-Mistral-7B-DPO.Q4_0.gguf**| 4.11 GB | 8 GB | 7 Billion | q4_0 | Mistral & Nous Research | [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0) |
| **Phi-3-mini-4k-instruct.Q4_0.gguf** | 2.18 GB | 4 GB | 3.8 Billion| q4_0 | Microsoft | [MIT](https://opensource.org/licenses/MIT) |
| **orca-mini-3b-gguf2-q4_0.gguf** | 1.98 GB | 4 GB | 3 Billion | q4_0 | Microsoft | [CC-BY-NC-SA-4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/) |
| **gpt4all-13b-snoozy-q4_0.gguf** | 7.37 GB | 16 GB | 13 Billion | q4_0 | Nomic AI | [GPL](https://www.gnu.org/licenses/gpl-3.0.html) |


For the complete list of models and detailed documentation on installation, configuration, and usage, visit the official GPT4All Python library documentation:
📖 [GPT4All Documentation](https://docs.gpt4all.io/gpt4all_python/home.html)
📖 [GPT4All Internal Documentation](https://raw.githubusercontent.com/nomic-ai/gpt4all/main/gpt4all-chat/metadata/models3.json)

## Contributing

We welcome contributions to enhance the functionality and usability of this project. If you are interested in contributing, please follow these steps:
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@ pydantic==2.9.2
pydantic_core==2.23.4
sniffio==1.3.1
tqdm==4.67.0
typing_extensions==4.12.2
typing_extensions==4.12.2
47 changes: 38 additions & 9 deletions simulation_engine/example-settings.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,46 @@
from pathlib import Path

OPENAI_API_KEY = "API_KEY"
KEY_OWNER = "NAME"
# API Keys
OPENAI_API_KEY = "API_KEY" # Replace with your actual OpenAI API key
ANTHROPIC_API_KEY = "API_KEY" # Replace with your actual Anthropic API key

# Owner Information
KEY_OWNER = "NAME" # Replace with the name of the key owner

DEBUG = False
# Debugging Configuration
DEBUG = False # Set to True for enabling debug logs

MAX_CHUNK_SIZE = 4
# Configuration for Chunk Size
MAX_CHUNK_SIZE = 4 # Maximum size of data chunks to process

LLM_VERS = "gpt-4o-mini"
# LLM Configuration
LLM_VERS = "claude-3-5-sonnet-20241022"
# Options:
# - "gpt-4o-mini" (OpenAI GPT model)
# - "claude-3-5-sonnet-20241022" (Anthropic Claude model)
# - "gpt4all" (Open-source GPT model)

BASE_DIR = f"{Path(__file__).resolve().parent.parent}"
# GPT4All Model Settings
LLM_MODEL = "MODEL GPT4ALL"
# Options:
# - "orca-mini-3b-gguf2-q4_0.gguf" (3 Billion Parameters, 4GB RAM)
# - "Meta-Llama-3-8B-Instruct.Q4_0.gguf" (8 Billion Parameters, 8GB RAM)
# - "Nous-Hermes-2-Mistral-7B-DPO.Q4_0.gguf" (7 Billion Parameters, 8GB RAM)

## To do: Are the following needed in the new structure? Ideally Populations_Dir is for the user to define.
POPULATIONS_DIR = f"{BASE_DIR}/agent_bank/populations"
LLM_PROMPT_DIR = f"{BASE_DIR}/simulation_engine/prompt_template"
# Notes:
# - Choose the model based on your hardware capabilities and task requirements.
# - Ensure you have sufficient RAM to load the selected model.
# - Visit 📖 [GPT4All Documentation](https://docs.gpt4all.io/gpt4all_python/home.html) for detailed information.

# Base Directory
BASE_DIR = Path(__file__).resolve().parent.parent

# Directory Configurations
# - Populations Directory: Used for managing agent populations
# - Prompt Template Directory: Contains LLM prompt templates
POPULATIONS_DIR = BASE_DIR / "agent_bank" / "populations"
LLM_PROMPT_DIR = BASE_DIR / "simulation_engine" / "prompt_template"

# Note:
# - Ensure `POPULATIONS_DIR` and `LLM_PROMPT_DIR` exist in your project structure.
# - Adjust the paths as needed for your specific setup.
79 changes: 73 additions & 6 deletions simulation_engine/gpt_structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,40 @@

from simulation_engine.settings import *

# Conditional import for GPT4All
gpt4all_instance = None
if LLM_VERS == "gpt4all":
try:
from gpt4all import GPT4All, Embed4All
except ImportError:
raise ImportError(
"The 'gpt4all' library is not installed. Please install it with 'pip install gpt4all' to use GPT4All models."
)
try:
gpt4all_instance = GPT4All(LLM_MODEL, n_ctx=28672)
gpt4all_embeddings = Embed4All("nomic-embed-text-v1.5.f16.gguf")
except Exception as e:
raise RuntimeError(
f"Failed to initialize GPT4All with the model '{LLM_MODEL}'. "
"Ensure the model file exists and is correctly configured."
) from e
elif LLM_VERS.startswith("claude"):
try:
import anthropic
except ImportError:
raise ImportError(
"The 'anthropic' library is not installed. Please install it with 'pip install anthropic' to use anthropic models."
)
try:
anthropic_client = anthropic.Client(api_key=ANTHROPIC_API_KEY)
except Exception as e:
raise RuntimeError(
f"Failed to initialize anthropic with the model '{LLM_VERS}'. "
"Ensure the model file exists and is correctly configured."
) from e

openai.api_key = OPENAI_API_KEY


# ============================================================================
# #######################[SECTION 1: HELPER FUNCTIONS] #######################
# ============================================================================
Expand Down Expand Up @@ -53,7 +84,7 @@ def generate_prompt(prompt_input: Union[str, List[str]],
def gpt_request(prompt: str,
model: str = "gpt-4o",
max_tokens: int = 1500) -> str:
"""Make a request to OpenAI's GPT model."""
"""Make a request to OpenAI or GPT4All based on LLM_VERS."""
if model == "o1-preview":
try:
client = openai.OpenAI(api_key=OPENAI_API_KEY)
Expand All @@ -64,7 +95,34 @@ def gpt_request(prompt: str,
return response.choices[0].message.content
except Exception as e:
return f"GENERATION ERROR: {str(e)}"
if LLM_VERS == "gpt4all":
try:
response = gpt4all_instance.generate(
prompt=prompt,
max_tokens=max_tokens,
temp=0.7
)

return response
except Exception as e:
raise ImportError(
f"GENERATION ERROR GPT4ALL: {str(e)}"
)
elif LLM_VERS.startswith("claude"):
try:
response = anthropic_client.messages.create(
model=LLM_VERS,
messages=[{"role": "user", "content": prompt}],
max_tokens=max_tokens,
temperature=0.7,
)

return response.content[0].text
except Exception as e:
raise ImportError(
f"GENERATION ERROR ANTTHROPIC: {str(e)}"
)

try:
client = openai.OpenAI(api_key=OPENAI_API_KEY)
response = client.chat.completions.create(
Expand All @@ -75,8 +133,10 @@ def gpt_request(prompt: str,
)
return response.choices[0].message.content
except Exception as e:
return f"GENERATION ERROR: {str(e)}"

raise ImportError(
f"GENERATION ERROR OPENAI: {str(e)}"
)


def gpt4_vision(messages: List[dict], max_tokens: int = 1500) -> str:
"""Make a request to OpenAI's GPT-4 Vision model."""
Expand Down Expand Up @@ -159,8 +219,15 @@ def get_text_embedding(text: str,
raise ValueError("Input text must be a non-empty string.")

text = text.replace("\n", " ").strip()
response = openai.embeddings.create(
input=[text], model=model).data[0].embedding

if LLM_VERS == "gpt4all":
# Temporal solution to get the same embedding twice
response = list(gpt4all_embeddings.embed(text=[text], dimensionality=768)[0]) + list(gpt4all_embeddings.embed(text=[text], dimensionality=768)[0])

else:
response = openai.embeddings.create(
input=[text], model=model).data[0].embedding

return response


Expand Down