From 2b1e1b725283dde665ed32d46f2e2b46b87d742f Mon Sep 17 00:00:00 2001
From: JGalego <jgalego@amazon.pt>
Date: Fri, 16 Aug 2024 02:40:31 +0100
Subject: [PATCH 1/2] Added Amazon Bedrock support

---
 README.md                                     |  1 +
 opro/evaluation/evaluate_instructions.py      | 23 +++++++
 opro/optimization/optimize_instructions.py    | 55 +++++++++++++++--
 .../optimize_linear_regression.py             | 15 +++++
 opro/optimization/optimize_tsp.py             | 15 +++++
 opro/prompt_utils.py                          | 61 +++++++++++++++++++
 6 files changed, 166 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index e3a8bbd0..88dad1b0 100644
--- a/README.md
+++ b/README.md
@@ -20,6 +20,7 @@ The code has been verified to work under `Python 3.10.13` with the following dep
 - google.generativeai (0.1.0)
 - immutabledict (3.0.0)
 - openai (0.27.2)
+- boto3 (1.34.162)
 ```
 
 ## Usage
diff --git a/opro/evaluation/evaluate_instructions.py b/opro/evaluation/evaluate_instructions.py
index d1f085d0..9265a929 100644
--- a/opro/evaluation/evaluate_instructions.py
+++ b/opro/evaluation/evaluate_instructions.py
@@ -269,6 +269,29 @@ def main(_):
     scorer_llm_dict.update(scorer_finetuned_palm_dict)
     call_scorer_server_func = call_scorer_finetuned_palm_server_func
 
+  elif scorer_llm_name.startswith("bedrock"):
+    # Amazon Bedrock models
+    scorer_bedrock_max_decode_steps = 1024
+    scorer_bedrock_temperature = 0.0
+
+    scorer_bedrock_dict = dict()
+    scorer_bedrock_dict["max_decode_steps"] = scorer_bedrock_max_decode_steps
+    scorer_bedrock_dict["temperature"] = scorer_bedrock_temperature
+    scorer_bedrock_dict["num_decodes"] = 1
+    scorer_bedrock_dict["batch_size"] = 1
+    scorer_bedrock_dict["num_servers"] = 1
+
+    scorer_llm_dict = {
+        "model_type": scorer_llm_name,
+    }
+    scorer_llm_dict.update(scorer_bedrock_dict)
+    call_scorer_server_func = functools.partial(
+        prompt_utils.call_amazon_bedrock_func,
+        model=scorer_llm_name.split("/")[-1],
+        max_decode_steps=scorer_bedrock_max_decode_steps,
+        temperature=scorer_bedrock_temperature,
+    )
+
   else:
     # GPT models
     assert scorer_llm_name.lower() in {"gpt-3.5-turbo", "gpt-4"}
diff --git a/opro/optimization/optimize_instructions.py b/opro/optimization/optimize_instructions.py
index 4cf3effd..71c7d944 100644
--- a/opro/optimization/optimize_instructions.py
+++ b/opro/optimization/optimize_instructions.py
@@ -160,12 +160,12 @@ def main(_):
       "text-bison",
       "gpt-3.5-turbo",
       "gpt-4",
-  }
+  } or scorer_llm_name.startswith("bedrock")
   assert optimizer_llm_name in {
       "text-bison",
       "gpt-3.5-turbo",
       "gpt-4",
-  }
+  } or optimizer_llm_name.startswith("bedrock")
   assert meta_prompt_type in {
       "both_instructions_and_exemplars",
       "instructions_only",
@@ -191,6 +191,10 @@ def main(_):
   if scorer_llm_name in {"gpt-3.5-turbo", "gpt-4"}:
     assert openai_api_key, "The OpenAI API key must be provided."
     openai.api_key = openai_api_key
+  elif scorer_llm_name.startswith("bedrock"):
+    # For more information on how to configure boto3 credentials, see
+    # https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html
+    pass
   else:
     assert scorer_llm_name == "text-bison"
     assert (
@@ -201,6 +205,8 @@ def main(_):
   if optimizer_llm_name in {"gpt-3.5-turbo", "gpt-4"}:
     assert openai_api_key, "The OpenAI API key must be provided."
     openai.api_key = openai_api_key
+  elif optimizer_llm_name.startswith("bedrock"):
+    pass
   else:
     assert optimizer_llm_name == "text-bison"
     assert (
@@ -274,6 +280,29 @@ def main(_):
     scorer_llm_dict.update(scorer_finetuned_palm_dict)
     call_scorer_server_func = call_scorer_finetuned_palm_server_func
 
+  elif scorer_llm_name.startswith("bedrock"):
+    # Amazon Bedrock models
+    scorer_bedrock_max_decode_steps = 1024
+    scorer_bedrock_temperature = 0.0
+
+    scorer_bedrock_dict = dict()
+    scorer_bedrock_dict["max_decode_steps"] = scorer_bedrock_max_decode_steps
+    scorer_bedrock_dict["temperature"] = scorer_bedrock_temperature
+    scorer_bedrock_dict["num_decodes"] = 1
+    scorer_bedrock_dict["batch_size"] = 1
+    scorer_bedrock_dict["num_servers"] = 1
+
+    scorer_llm_dict = {
+        "model_type": scorer_llm_name,
+    }
+    scorer_llm_dict.update(scorer_bedrock_dict)
+    call_scorer_server_func = functools.partial(
+        prompt_utils.call_amazon_bedrock_func,
+        model=scorer_llm_name.split("/")[-1],
+        max_decode_steps=scorer_bedrock_max_decode_steps,
+        temperature=scorer_bedrock_temperature,
+    )
+
   else:
     assert scorer_llm_name.lower() in {"gpt-3.5-turbo", "gpt-4"}
     scorer_gpt_max_decode_steps = 1024
@@ -335,6 +364,22 @@ def main(_):
     optimizer_llm_dict.update(optimizer_finetuned_palm_dict)
     call_optimizer_server_func = call_optimizer_finetuned_palm_server_func
 
+  elif optimizer_llm_name.startswith("bedrock"):
+    optimizer_bedrock_max_decode_steps = 512
+    optimizer_bedrock_temperature = 1.0
+
+    optimizer_llm_dict = dict()
+    optimizer_llm_dict["max_decode_steps"] = optimizer_bedrock_max_decode_steps
+    optimizer_llm_dict["temperature"] = optimizer_bedrock_temperature
+    optimizer_llm_dict["batch_size"] = 1
+    optimizer_llm_dict["num_decodes"] = 1
+    call_optimizer_server_func = functools.partial(
+        prompt_utils.call_amazon_bedrock_func,
+        model=optimizer_llm_name.split("/")[-1],
+        max_decode_steps=optimizer_bedrock_max_decode_steps,
+        temperature=optimizer_bedrock_temperature,
+    )
+
   else:
     assert optimizer_llm_name in {"gpt-3.5-turbo", "gpt-4"}
     optimizer_gpt_max_decode_steps = 512
@@ -679,14 +724,16 @@ def main(_):
   )
 
   # ========== set other optimization experiment hyperparameters ==============
-  if scorer_llm_name == "text-bison":
+  if scorer_llm_name == "text-bison" or \
+     scorer_llm_name.startswith("bedrock"):
     old_instruction_score_threshold = 0.0
     # old_instruction_score_threshold = 0.15  # for GSM8K
   else:
     assert scorer_llm_name in {"gpt-3.5-turbo", "gpt-4"}
     old_instruction_score_threshold = 0.3
 
-  if scorer_llm_name == "text-bison":
+  if scorer_llm_name == "text-bison" or \
+     scorer_llm_name.startswith("bedrock"):
     extract_final_answer_by_prompting_again = False
     include_qa = False
     evaluate_in_parallel = False
diff --git a/opro/optimization/optimize_linear_regression.py b/opro/optimization/optimize_linear_regression.py
index 0e02d698..4d5b28b8 100644
--- a/opro/optimization/optimize_linear_regression.py
+++ b/opro/optimization/optimize_linear_regression.py
@@ -139,6 +139,21 @@ def main(_):
     optimizer_llm_dict.update(optimizer_finetuned_palm_dict)
     call_optimizer_server_func = call_optimizer_finetuned_palm_server_func
 
+  elif optimizer_llm_name.startswith("bedrock"):
+    optimizer_bedrock_max_decode_steps = 1024
+    optimizer_bedrock_temperature = 1.0
+
+    optimizer_llm_dict = dict()
+    optimizer_llm_dict["max_decode_steps"] = optimizer_bedrock_max_decode_steps
+    optimizer_llm_dict["temperature"] = optimizer_bedrock_temperature
+    optimizer_llm_dict["batch_size"] = 1
+    call_optimizer_server_func = functools.partial(
+        prompt_utils.call_amazon_bedrock_func,
+        model=optimizer_llm_name.split("/")[-1],
+        max_decode_steps=optimizer_bedrock_max_decode_steps,
+        temperature=optimizer_bedrock_temperature,
+    )
+
   else:
     assert optimizer_llm_name in {"gpt-3.5-turbo", "gpt-4"}
     optimizer_gpt_max_decode_steps = 1024
diff --git a/opro/optimization/optimize_tsp.py b/opro/optimization/optimize_tsp.py
index 0fde9498..2b87976b 100644
--- a/opro/optimization/optimize_tsp.py
+++ b/opro/optimization/optimize_tsp.py
@@ -142,6 +142,21 @@ def main(_):
     optimizer_llm_dict.update(optimizer_finetuned_palm_dict)
     call_optimizer_server_func = call_optimizer_finetuned_palm_server_func
 
+  elif optimizer_llm_name.startswith("bedrock"):
+    optimizer_bedrock_max_decode_steps = 1024
+    optimizer_bedrock_temperature = 1.0
+
+    optimizer_llm_dict = dict()
+    optimizer_llm_dict["max_decode_steps"] = optimizer_bedrock_max_decode_steps
+    optimizer_llm_dict["temperature"] = optimizer_bedrock_temperature
+    optimizer_llm_dict["batch_size"] = 1
+    call_optimizer_server_func = functools.partial(
+        prompt_utils.call_amazon_bedrock_func,
+        model=optimizer_llm_name.split("/")[-1],
+        max_decode_steps=optimizer_bedrock_max_decode_steps,
+        temperature=optimizer_bedrock_temperature,
+    )
+
   else:
     assert optimizer_llm_name in {"gpt-3.5-turbo", "gpt-4"}
     optimizer_gpt_max_decode_steps = 1024
diff --git a/opro/prompt_utils.py b/opro/prompt_utils.py
index a98f420e..1e7c6fe4 100644
--- a/opro/prompt_utils.py
+++ b/opro/prompt_utils.py
@@ -16,6 +16,7 @@
 import time
 import google.generativeai as palm
 import openai
+import boto3
 
 
 def call_openai_server_single_prompt(
@@ -130,3 +131,63 @@ def call_palm_server_from_cloud(
     return call_palm_server_from_cloud(
         input_text, max_decode_steps=max_decode_steps, temperature=temperature
     )
+
+
+def call_amazon_bedrock_single_prompt(
+    prompt, model="anthropic.claude-3-sonnet-20240229-v1:0", max_decode_steps=20, temperature=0.8
+):
+  """Makes a call to Amazon Bedrock via Converse API."""
+  bedrock_runtime = boto3.client('bedrock-runtime')
+  try:
+    response = bedrock_runtime.converse(
+        modelId=model,
+        inferenceConfig={
+          "temperature": temperature,
+          "maxTokens": max_decode_steps
+		},
+        messages=[{
+            "role": "user",
+            "content": [{
+                "text": prompt
+			}]
+        }]
+    )
+    return response["output"]["message"]["content"][0]["text"]
+
+  except (bedrock_runtime.exceptions.ModelTimeoutException,
+          bedrock_runtime.exceptions.ThrottlingException,
+          bedrock_runtime.exceptions.ServiceUnavailableException) as e:
+    retry_time = e.retry_after if hasattr(e, "retry_after") else 30
+    print(f"{e} occurred. Retrying in {retry_time} seconds...")
+    time.sleep(retry_time)
+    return call_amazon_bedrock_single_prompt(
+        prompt, max_decode_steps=max_decode_steps, temperature=temperature
+    )
+
+  except OSError as e:
+    retry_time = 5  # Adjust the retry time as needed
+    print(
+        f"Connection error occurred: {e}. Retrying in {retry_time} seconds..."
+    )
+    time.sleep(retry_time)
+    return call_amazon_bedrock_single_prompt(
+        prompt, max_decode_steps=max_decode_steps, temperature=temperature
+    )
+
+
+def call_amazon_bedrock_func(
+    inputs, model="anthropic.claude-3-sonnet-20240229-v1:0", max_decode_steps=20, temperature=0.8
+):
+  """Function to call Amazon Bedrock with a list of input strings."""
+  if isinstance(inputs, str):
+    inputs = [inputs]
+  outputs = []
+  for input_str in inputs:
+    output = call_amazon_bedrock_single_prompt(
+        input_str,
+        model=model,
+        max_decode_steps=max_decode_steps,
+        temperature=temperature,
+    )
+    outputs.append(output)
+  return outputs
\ No newline at end of file

From 05d6b58798cf52cdfaa6abe5b7883c5118da2128 Mon Sep 17 00:00:00 2001
From: JGalego <jgalego@amazon.pt>
Date: Fri, 16 Aug 2024 03:18:03 +0100
Subject: [PATCH 2/2] Added missing optimizer checks (LR+TSP)

---
 opro/optimization/optimize_linear_regression.py | 4 +++-
 opro/optimization/optimize_tsp.py               | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/opro/optimization/optimize_linear_regression.py b/opro/optimization/optimize_linear_regression.py
index 4d5b28b8..6d341990 100644
--- a/opro/optimization/optimize_linear_regression.py
+++ b/opro/optimization/optimize_linear_regression.py
@@ -75,13 +75,15 @@ def main(_):
       "text-bison",
       "gpt-3.5-turbo",
       "gpt-4",
-  }
+  } or optimizer_llm_name.startswith("bedrock")
   openai_api_key = _OPENAI_API_KEY.value
   palm_api_key = _PALM_API_KEY.value
 
   if optimizer_llm_name in {"gpt-3.5-turbo", "gpt-4"}:
     assert openai_api_key, "The OpenAI API key must be provided."
     openai.api_key = openai_api_key
+  elif optimizer_llm_name.startswith("bedrock"):
+    pass
   else:
     assert optimizer_llm_name == "text-bison"
     assert (
diff --git a/opro/optimization/optimize_tsp.py b/opro/optimization/optimize_tsp.py
index 2b87976b..983f0467 100644
--- a/opro/optimization/optimize_tsp.py
+++ b/opro/optimization/optimize_tsp.py
@@ -78,13 +78,15 @@ def main(_):
       "text-bison",
       "gpt-3.5-turbo",
       "gpt-4",
-  }
+  } or optimizer_llm_name.startswith("bedrock")
   openai_api_key = _OPENAI_API_KEY.value
   palm_api_key = _PALM_API_KEY.value
 
   if optimizer_llm_name in {"gpt-3.5-turbo", "gpt-4"}:
     assert openai_api_key, "The OpenAI API key must be provided."
     openai.api_key = openai_api_key
+  elif optimizer_llm_name.startswith("bedrock"):
+    pass
   else:
     assert optimizer_llm_name == "text-bison"
     assert (