diff --git a/express-server/src/routes/create.ts b/express-server/src/routes/create.ts index e4d497ae..5797e8db 100644 --- a/express-server/src/routes/create.ts +++ b/express-server/src/routes/create.ts @@ -255,7 +255,7 @@ const buildPipelineJob = ( cruxes: updatedConfig.cruxesEnabled, }, llm: { - model: "gpt-4o-mini", // ! Change when we allow different models + model: "gpt-5-mini", // ! Change when we allow different models }, }, data: updatedConfig.data, diff --git a/pyserver/config.py b/pyserver/config.py index 8ac24b10..88eff8db 100644 --- a/pyserver/config.py +++ b/pyserver/config.py @@ -8,7 +8,7 @@ DRY_RUN = False # cheapest for testing -MODEL = "gpt-4o-mini" # prod default: "gpt-4-turbo-preview" +MODEL = "gpt-5-mini" # prod default: "gpt-4-turbo-preview" COST_BY_MODEL = { # GPT-4o mini: Input is $0.150 / 1M tokens, Output is $0.600 / 1M tokens @@ -17,6 +17,8 @@ # GPT-4o : Input is $2.50 / 1M tokens, Output is $10.00/1M tokens # or: input is $0.0025/1K tokens, output is $0.01/1K tokens "gpt-4o": {"in_per_1K": 0.0025, "out_per_1K": 0.01}, + # GPT-5-mini: Input is $0.250 / 1M tokens, Output is $2.00/1M tokens + "gpt-5-mini": {"in_per_1K": 0.00025, "out_per_1K": 0.002} } # for web-app mode, require at least 3 words in order to extract meaningful claims diff --git a/pyserver/main.py b/pyserver/main.py index 217de7d6..5a1affe6 100644 --- a/pyserver/main.py +++ b/pyserver/main.py @@ -499,8 +499,8 @@ def comments_to_tree( {"role": "system", "content": req.llm.system_prompt}, {"role": "user", "content": full_prompt}, ], - temperature=0.0, response_format={"type": "json_object"}, + reasoning_effort="minimal", ) try: tree = json.loads(response.choices[0].message.content) @@ -634,7 +634,6 @@ def comment_to_claims(llm: dict, comment: str, tree: dict, api_key: str, comment {"role": "system", "content": llm.system_prompt}, {"role": "user", "content": full_prompt}, ], - temperature=0.0, response_format=ClaimsSchema, ) @@ -652,7 +651,6 @@ def comment_to_claims(llm: dict, comment: str, tree: dict, api_key: str, comment llm_metadata = { "model": llm.model_name, "prompt_version": "v1.0", # Update when prompt changes - "temperature": getattr(llm, 'temperature', None), } audit_logger.log_claims_extraction_empty( comment_id, @@ -684,7 +682,6 @@ def comment_to_claims(llm: dict, comment: str, tree: dict, api_key: str, comment llm_metadata = { "model": llm.model_name, "prompt_version": "v1.0", # Update when prompt changes - "temperature": getattr(llm, 'temperature', None), } # For now, claim_ids will be the same as comment_id since we don't have unique claim IDs yet @@ -1146,8 +1143,8 @@ def dedup_claims(claims: list, llm: LLMConfig, api_key: str, topic_name: str = " {"role": "system", "content": llm.system_prompt}, {"role": "user", "content": full_prompt}, ], - temperature=0.0, response_format={"type": "json_object"}, + reasoning_effort="minimal", ) try: deduped_claims = response.choices[0].message.content @@ -1723,8 +1720,8 @@ def generate_topic_summaries( {"role": "system", "content": llm_config["system_prompt"]}, {"role": "user", "content": full_prompt}, ], - temperature=0.0, response_format={"type": "json_object"}, + reasoning_effort="minimal" ) try: @@ -1888,8 +1885,9 @@ def cruxes_for_topic( {"role": "system", "content": llm.system_prompt}, {"role": "user", "content": full_prompt}, ], - temperature=0.0, response_format={"type": "json_object"}, + reasoning_effort="minimal", + ) crux = response.choices[0].message.content try: diff --git a/pyserver/test_pipeline.py b/pyserver/test_pipeline.py index 1ce80119..847fc3ee 100644 --- a/pyserver/test_pipeline.py +++ b/pyserver/test_pipeline.py @@ -53,7 +53,7 @@ # NOTE: gpt-4o-mini is cheaper/better for basic tests, but it fails on some very basic deduplication API_KEY = os.getenv('OPENAI_API_KEY') base_llm = { - "model_name" : "gpt-4o-mini", + "model_name" : "gpt-5-mini", "system_prompt": config.SYSTEM_PROMPT, "api_key" : API_KEY } diff --git a/pyserver/utils.py b/pyserver/utils.py index 51c941fd..49d446c9 100644 --- a/pyserver/utils.py +++ b/pyserver/utils.py @@ -21,6 +21,7 @@ def token_cost(model_name:str, tok_in:int, tok_out:int): if model_name not in config.COST_BY_MODEL: print("model undefined!") return -1 + return 0.001 * (tok_in * config.COST_BY_MODEL[model_name]["in_per_1K"] + tok_out * config.COST_BY_MODEL[model_name]["out_per_1K"]) def cute_print(json_obj):