diff --git a/README.md b/README.md index 4eac2e9e..67457c6a 100644 --- a/README.md +++ b/README.md @@ -64,19 +64,50 @@ Create a `settings.py` file in the `simulation_engine` folder (where `example-se ```python from pathlib import Path -OPENAI_API_KEY = "YOUR_API_KEY" -KEY_OWNER = "YOUR_NAME" - -DEBUG = False - -MAX_CHUNK_SIZE = 4 - -LLM_VERS = "gpt-4o-mini" - -BASE_DIR = f"{Path(__file__).resolve().parent.parent}" - -POPULATIONS_DIR = f"{BASE_DIR}/agent_bank/populations" -LLM_PROMPT_DIR = f"{BASE_DIR}/simulation_engine/prompt_template" +# API Keys +OPENAI_API_KEY = "API_KEY" # Replace with your actual OpenAI API key +ANTHROPIC_API_KEY = "API_KEY" # Replace with your actual Anthropic API key + +# Owner Information +KEY_OWNER = "NAME" # Replace with the name of the key owner + +# Debugging Configuration +DEBUG = False # Set to True for enabling debug logs + +# Configuration for Chunk Size +MAX_CHUNK_SIZE = 4 # Maximum size of data chunks to process + +# LLM Configuration +LLM_VERS = "claude-3-5-sonnet-20241022" +# Options: +# - "gpt-4o-mini" (OpenAI GPT model) +# - "claude-3-5-sonnet-20241022" (Anthropic Claude model) +# - "gpt4all" (Open-source GPT model) + +# GPT4All Model Settings +LLM_MODEL = "MODEL GPT4ALL" +# Options: +# - "orca-mini-3b-gguf2-q4_0.gguf" (3 Billion Parameters, 4GB RAM) +# - "Meta-Llama-3-8B-Instruct.Q4_0.gguf" (8 Billion Parameters, 8GB RAM) +# - "Nous-Hermes-2-Mistral-7B-DPO.Q4_0.gguf" (7 Billion Parameters, 8GB RAM) + +# Notes: +# - Choose the model based on your hardware capabilities and task requirements. +# - Ensure you have sufficient RAM to load the selected model. +# - Visit 📖 [GPT4All Documentation](https://docs.gpt4all.io/gpt4all_python/home.html) for detailed information. + +# Base Directory +BASE_DIR = Path(__file__).resolve().parent.parent + +# Directory Configurations +# - Populations Directory: Used for managing agent populations +# - Prompt Template Directory: Contains LLM prompt templates +POPULATIONS_DIR = BASE_DIR / "agent_bank" / "populations" +LLM_PROMPT_DIR = BASE_DIR / "simulation_engine" / "prompt_template" + +# Note: +# - Ensure `POPULATIONS_DIR` and `LLM_PROMPT_DIR` exist in your project structure. +# - Adjust the paths as needed for your specific setup. ``` Replace `"YOUR_API_KEY"` with your actual OpenAI API key and `"YOUR_NAME"` with your name. @@ -219,6 +250,22 @@ print(response["responses"]) Due to participant privacy concerns, the full agent bank containing over 1,000 generative agents based on real interviews is not publicly available at the moment. However, we plan to make aggregated responses on fixed tasks accessible for general research use in the coming months. Researchers interested in accessing individual responses on open tasks can request restricted access by contacting the authors and following a review process that ensures ethical considerations are met. +## Test run local model with GPT4ALL +GPT4All supports a wide range of open-source models optimized for diverse use cases, including general language understanding, code generation, and specialized tasks. Below are some commonly used models: + +| Model Name | Filesize | RAM Required | Parameters | Quantization | Developer | License | +|-------------------------------------------|----------|--------------|------------|--------------|---------------------|--------------------| +| **Meta-Llama-3-8B-Instruct.Q4_0.gguf** | 4.66 GB | 8 GB | 8 Billion | q4_0 | Meta | [Llama 3 License](https://llama-license-link.com) | +| **Nous-Hermes-2-Mistral-7B-DPO.Q4_0.gguf**| 4.11 GB | 8 GB | 7 Billion | q4_0 | Mistral & Nous Research | [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0) | +| **Phi-3-mini-4k-instruct.Q4_0.gguf** | 2.18 GB | 4 GB | 3.8 Billion| q4_0 | Microsoft | [MIT](https://opensource.org/licenses/MIT) | +| **orca-mini-3b-gguf2-q4_0.gguf** | 1.98 GB | 4 GB | 3 Billion | q4_0 | Microsoft | [CC-BY-NC-SA-4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/) | +| **gpt4all-13b-snoozy-q4_0.gguf** | 7.37 GB | 16 GB | 13 Billion | q4_0 | Nomic AI | [GPL](https://www.gnu.org/licenses/gpl-3.0.html) | + + +For the complete list of models and detailed documentation on installation, configuration, and usage, visit the official GPT4All Python library documentation: +📖 [GPT4All Documentation](https://docs.gpt4all.io/gpt4all_python/home.html) +📖 [GPT4All Internal Documentation](https://raw.githubusercontent.com/nomic-ai/gpt4all/main/gpt4all-chat/metadata/models3.json) + ## Contributing We welcome contributions to enhance the functionality and usability of this project. If you are interested in contributing, please follow these steps: diff --git a/requirements.txt b/requirements.txt index 295c5509..07d5f7a9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,4 +12,4 @@ pydantic==2.9.2 pydantic_core==2.23.4 sniffio==1.3.1 tqdm==4.67.0 -typing_extensions==4.12.2 +typing_extensions==4.12.2 \ No newline at end of file diff --git a/simulation_engine/example-settings.py b/simulation_engine/example-settings.py index bf6db3a2..2bb3ea96 100644 --- a/simulation_engine/example-settings.py +++ b/simulation_engine/example-settings.py @@ -1,17 +1,46 @@ from pathlib import Path -OPENAI_API_KEY = "API_KEY" -KEY_OWNER = "NAME" +# API Keys +OPENAI_API_KEY = "API_KEY" # Replace with your actual OpenAI API key +ANTHROPIC_API_KEY = "API_KEY" # Replace with your actual Anthropic API key +# Owner Information +KEY_OWNER = "NAME" # Replace with the name of the key owner -DEBUG = False +# Debugging Configuration +DEBUG = False # Set to True for enabling debug logs -MAX_CHUNK_SIZE = 4 +# Configuration for Chunk Size +MAX_CHUNK_SIZE = 4 # Maximum size of data chunks to process -LLM_VERS = "gpt-4o-mini" +# LLM Configuration +LLM_VERS = "claude-3-5-sonnet-20241022" +# Options: +# - "gpt-4o-mini" (OpenAI GPT model) +# - "claude-3-5-sonnet-20241022" (Anthropic Claude model) +# - "gpt4all" (Open-source GPT model) -BASE_DIR = f"{Path(__file__).resolve().parent.parent}" +# GPT4All Model Settings +LLM_MODEL = "MODEL GPT4ALL" +# Options: +# - "orca-mini-3b-gguf2-q4_0.gguf" (3 Billion Parameters, 4GB RAM) +# - "Meta-Llama-3-8B-Instruct.Q4_0.gguf" (8 Billion Parameters, 8GB RAM) +# - "Nous-Hermes-2-Mistral-7B-DPO.Q4_0.gguf" (7 Billion Parameters, 8GB RAM) -## To do: Are the following needed in the new structure? Ideally Populations_Dir is for the user to define. -POPULATIONS_DIR = f"{BASE_DIR}/agent_bank/populations" -LLM_PROMPT_DIR = f"{BASE_DIR}/simulation_engine/prompt_template" \ No newline at end of file +# Notes: +# - Choose the model based on your hardware capabilities and task requirements. +# - Ensure you have sufficient RAM to load the selected model. +# - Visit 📖 [GPT4All Documentation](https://docs.gpt4all.io/gpt4all_python/home.html) for detailed information. + +# Base Directory +BASE_DIR = Path(__file__).resolve().parent.parent + +# Directory Configurations +# - Populations Directory: Used for managing agent populations +# - Prompt Template Directory: Contains LLM prompt templates +POPULATIONS_DIR = BASE_DIR / "agent_bank" / "populations" +LLM_PROMPT_DIR = BASE_DIR / "simulation_engine" / "prompt_template" + +# Note: +# - Ensure `POPULATIONS_DIR` and `LLM_PROMPT_DIR` exist in your project structure. +# - Adjust the paths as needed for your specific setup. diff --git a/simulation_engine/gpt_structure.py b/simulation_engine/gpt_structure.py index 24c7f9ba..c37459b4 100644 --- a/simulation_engine/gpt_structure.py +++ b/simulation_engine/gpt_structure.py @@ -5,9 +5,40 @@ from simulation_engine.settings import * +# Conditional import for GPT4All +gpt4all_instance = None +if LLM_VERS == "gpt4all": + try: + from gpt4all import GPT4All, Embed4All + except ImportError: + raise ImportError( + "The 'gpt4all' library is not installed. Please install it with 'pip install gpt4all' to use GPT4All models." + ) + try: + gpt4all_instance = GPT4All(LLM_MODEL, n_ctx=28672) + gpt4all_embeddings = Embed4All("nomic-embed-text-v1.5.f16.gguf") + except Exception as e: + raise RuntimeError( + f"Failed to initialize GPT4All with the model '{LLM_MODEL}'. " + "Ensure the model file exists and is correctly configured." + ) from e +elif LLM_VERS.startswith("claude"): + try: + import anthropic + except ImportError: + raise ImportError( + "The 'anthropic' library is not installed. Please install it with 'pip install anthropic' to use anthropic models." + ) + try: + anthropic_client = anthropic.Client(api_key=ANTHROPIC_API_KEY) + except Exception as e: + raise RuntimeError( + f"Failed to initialize anthropic with the model '{LLM_VERS}'. " + "Ensure the model file exists and is correctly configured." + ) from e + openai.api_key = OPENAI_API_KEY - # ============================================================================ # #######################[SECTION 1: HELPER FUNCTIONS] ####################### # ============================================================================ @@ -53,7 +84,7 @@ def generate_prompt(prompt_input: Union[str, List[str]], def gpt_request(prompt: str, model: str = "gpt-4o", max_tokens: int = 1500) -> str: - """Make a request to OpenAI's GPT model.""" + """Make a request to OpenAI or GPT4All based on LLM_VERS.""" if model == "o1-preview": try: client = openai.OpenAI(api_key=OPENAI_API_KEY) @@ -64,7 +95,34 @@ def gpt_request(prompt: str, return response.choices[0].message.content except Exception as e: return f"GENERATION ERROR: {str(e)}" + if LLM_VERS == "gpt4all": + try: + response = gpt4all_instance.generate( + prompt=prompt, + max_tokens=max_tokens, + temp=0.7 + ) + + return response + except Exception as e: + raise ImportError( + f"GENERATION ERROR GPT4ALL: {str(e)}" + ) + elif LLM_VERS.startswith("claude"): + try: + response = anthropic_client.messages.create( + model=LLM_VERS, + messages=[{"role": "user", "content": prompt}], + max_tokens=max_tokens, + temperature=0.7, + ) + return response.content[0].text + except Exception as e: + raise ImportError( + f"GENERATION ERROR ANTTHROPIC: {str(e)}" + ) + try: client = openai.OpenAI(api_key=OPENAI_API_KEY) response = client.chat.completions.create( @@ -75,8 +133,10 @@ def gpt_request(prompt: str, ) return response.choices[0].message.content except Exception as e: - return f"GENERATION ERROR: {str(e)}" - + raise ImportError( + f"GENERATION ERROR OPENAI: {str(e)}" + ) + def gpt4_vision(messages: List[dict], max_tokens: int = 1500) -> str: """Make a request to OpenAI's GPT-4 Vision model.""" @@ -159,8 +219,15 @@ def get_text_embedding(text: str, raise ValueError("Input text must be a non-empty string.") text = text.replace("\n", " ").strip() - response = openai.embeddings.create( - input=[text], model=model).data[0].embedding + + if LLM_VERS == "gpt4all": + # Temporal solution to get the same embedding twice + response = list(gpt4all_embeddings.embed(text=[text], dimensionality=768)[0]) + list(gpt4all_embeddings.embed(text=[text], dimensionality=768)[0]) + + else: + response = openai.embeddings.create( + input=[text], model=model).data[0].embedding + return response