diff --git a/express-server/src/routes/create.ts b/express-server/src/routes/create.ts
index e4d497ae..5797e8db 100644
--- a/express-server/src/routes/create.ts
+++ b/express-server/src/routes/create.ts
@@ -255,7 +255,7 @@ const buildPipelineJob = (
         cruxes: updatedConfig.cruxesEnabled,
       },
       llm: {
-        model: "gpt-4o-mini", // ! Change when we allow different models
+        model: "gpt-5-mini", // ! Change when we allow different models
       },
     },
     data: updatedConfig.data,
diff --git a/pyserver/config.py b/pyserver/config.py
index 8ac24b10..88eff8db 100644
--- a/pyserver/config.py
+++ b/pyserver/config.py
@@ -8,7 +8,7 @@
 DRY_RUN = False
 
 # cheapest for testing
-MODEL = "gpt-4o-mini"  # prod default: "gpt-4-turbo-preview"
+MODEL = "gpt-5-mini"  # prod default: "gpt-4-turbo-preview"
 
 COST_BY_MODEL = {
     # GPT-4o mini: Input is $0.150 / 1M tokens, Output is $0.600 / 1M tokens
@@ -17,6 +17,8 @@
     # GPT-4o : Input is $2.50 / 1M tokens, Output is $10.00/1M tokens
     # or: input is $0.0025/1K tokens, output is $0.01/1K tokens
     "gpt-4o": {"in_per_1K": 0.0025, "out_per_1K": 0.01},
+    # GPT-5-mini: Input is $0.250 / 1M tokens, Output is $2.00/1M tokens
+    "gpt-5-mini": {"in_per_1K": 0.00025, "out_per_1K": 0.002}
 }
 
 # for web-app mode, require at least 3 words in order to extract meaningful claims
diff --git a/pyserver/main.py b/pyserver/main.py
index 217de7d6..5a1affe6 100644
--- a/pyserver/main.py
+++ b/pyserver/main.py
@@ -499,8 +499,8 @@ def comments_to_tree(
             {"role": "system", "content": req.llm.system_prompt},
             {"role": "user", "content": full_prompt},
         ],
-        temperature=0.0,
         response_format={"type": "json_object"},
+        reasoning_effort="minimal",
     )
     try:
         tree = json.loads(response.choices[0].message.content)
@@ -634,7 +634,6 @@ def comment_to_claims(llm: dict, comment: str, tree: dict, api_key: str, comment
             {"role": "system", "content": llm.system_prompt},
             {"role": "user", "content": full_prompt},
         ],
-        temperature=0.0,
         response_format=ClaimsSchema,
     )
 
@@ -652,7 +651,6 @@ def comment_to_claims(llm: dict, comment: str, tree: dict, api_key: str, comment
             llm_metadata = {
                 "model": llm.model_name,
                 "prompt_version": "v1.0",  # Update when prompt changes
-                "temperature": getattr(llm, 'temperature', None),
             }
             audit_logger.log_claims_extraction_empty(
                 comment_id,
@@ -684,7 +682,6 @@ def comment_to_claims(llm: dict, comment: str, tree: dict, api_key: str, comment
             llm_metadata = {
                 "model": llm.model_name,
                 "prompt_version": "v1.0",  # Update when prompt changes
-                "temperature": getattr(llm, 'temperature', None),
             }
 
             # For now, claim_ids will be the same as comment_id since we don't have unique claim IDs yet
@@ -1146,8 +1143,8 @@ def dedup_claims(claims: list, llm: LLMConfig, api_key: str, topic_name: str = "
             {"role": "system", "content": llm.system_prompt},
             {"role": "user", "content": full_prompt},
         ],
-        temperature=0.0,
         response_format={"type": "json_object"},
+        reasoning_effort="minimal",
     )
     try:
         deduped_claims = response.choices[0].message.content
@@ -1723,8 +1720,8 @@ def generate_topic_summaries(
             {"role": "system", "content": llm_config["system_prompt"]},
             {"role": "user", "content": full_prompt},
         ],
-        temperature=0.0,
         response_format={"type": "json_object"},
+        reasoning_effort="minimal"
     )
 
     try:
@@ -1888,8 +1885,9 @@ def cruxes_for_topic(
             {"role": "system", "content": llm.system_prompt},
             {"role": "user", "content": full_prompt},
         ],
-        temperature=0.0,
         response_format={"type": "json_object"},
+        reasoning_effort="minimal",
+
     )
     crux = response.choices[0].message.content
     try:
diff --git a/pyserver/test_pipeline.py b/pyserver/test_pipeline.py
index 1ce80119..847fc3ee 100644
--- a/pyserver/test_pipeline.py
+++ b/pyserver/test_pipeline.py
@@ -53,7 +53,7 @@
 # NOTE: gpt-4o-mini is cheaper/better for basic tests, but it fails on some very basic deduplication
 API_KEY = os.getenv('OPENAI_API_KEY')
 base_llm = {
-  "model_name" : "gpt-4o-mini",
+  "model_name" : "gpt-5-mini",
   "system_prompt": config.SYSTEM_PROMPT,
   "api_key" : API_KEY
 }
diff --git a/pyserver/utils.py b/pyserver/utils.py
index 51c941fd..49d446c9 100644
--- a/pyserver/utils.py
+++ b/pyserver/utils.py
@@ -21,6 +21,7 @@ def token_cost(model_name:str, tok_in:int, tok_out:int):
   if model_name not in config.COST_BY_MODEL:
     print("model undefined!")
     return -1
+
   return 0.001 * (tok_in  *  config.COST_BY_MODEL[model_name]["in_per_1K"] + tok_out * config.COST_BY_MODEL[model_name]["out_per_1K"])
 
 def cute_print(json_obj):