From 1f65c29c4dc9b15036f42c5944461d2891deb6da Mon Sep 17 00:00:00 2001
From: xie river <chuanxie@amazon.com>
Date: Tue, 25 Jun 2024 10:15:22 +0000
Subject: [PATCH] add bedrock claude 3.5

---
 README.md                  |  6 +--
 backend/llm.py             | 85 ++++++++------------------------------
 frontend/src/lib/models.ts |  2 +
 3 files changed, 22 insertions(+), 71 deletions(-)

diff --git a/README.md b/README.md
index 1caa9cf..8c520b3 100644
--- a/README.md
+++ b/README.md
@@ -37,9 +37,9 @@ We also just added experimental support for taking a video/screen recording of a
 
 ## 🛠 Getting Started
 
-### 使用AWS Bedrock Claude 3 sonnet注意事项
-- 如果使用Bedrock Claude 3需要在运行机器上安装 https://aws.amazon.com/cn/cli/， 并配置aws iam 账号的ak sk，另外还需要开通该账号Bedrock Claude 3 访问的权限。
-- 如果使用Bedrock Claude 3，则无须配置OPENAI_API_KEY 或者 ANTHROPIC_API_KEY 到.env中
+### 使用AWS Bedrock Claude 3/3.5 sonnet注意事项
+- 如果使用Bedrock Claude 3/3.5需要在运行机器上安装 https://aws.amazon.com/cn/cli/， 并配置aws iam 账号的ak sk，另外还需要开通该账号Bedrock Claude 3 访问的权限。
+- 如果使用Bedrock Claude 3/3.5，则无须配置OPENAI_API_KEY 或者 ANTHROPIC_API_KEY 到.env中
 
 
 The app has a React/Vite frontend and a FastAPI backend. You will need an OpenAI API key with access to the GPT-4 Vision API or an Anthropic key if you want to use Claude Sonnet, or for experimental video support.
diff --git a/backend/llm.py b/backend/llm.py
index 869f925..039e7eb 100644
--- a/backend/llm.py
+++ b/backend/llm.py
@@ -19,29 +19,36 @@ from datetime import datetime
 from botocore.config import Config
 	
 #get modelARN
-region = 'us-west-2' 
+REGION = os.environ.get('region','us-east-1') 
+PROFILE = os.environ.get('profile', 'default')
+session = boto3.Session(profile_name=PROFILE,region_name=REGION)
+bedrock_runtime = session.client(
+    service_name="bedrock-runtime",
+    region_name=REGION
+)
 
-config = Config(read_timeout=1000) #timeout
-                      
-boto3_bedrock = boto3.client('bedrock',region)
-bedrock_runtime = boto3.client('bedrock-runtime',config=config)
-modelId = "anthropic.claude-3-sonnet-20240229-v1:0"
 # Actual model versions that are passed to the LLMs and stored in our logs
 class Llm(Enum):
     GPT_4_VISION = "gpt-4-vision-preview"
     GPT_4_TURBO_2024_04_09 = "gpt-4-turbo-2024-04-09"
     GPT_4O_2024_05_13 = "gpt-4o-2024-05-13"
     CLAUDE_3_SONNET = "claude-3-sonnet-20240229"
+    CLAUDE_3_5_SONNET = "claude-3-5-sonnet-20240620"
     CLAUDE_3_OPUS = "claude-3-opus-20240229"
     CLAUDE_3_HAIKU = "claude-3-haiku-20240307"
 
 
+BEDROCK_LLM_MODELID_LIST = {Llm.CLAUDE_3_5_SONNET: 'anthropic.claude-3-sonnet-20240229-v1:0',
+                            Llm.CLAUDE_3_SONNET: 'anthropic.claude-3-5-sonnet-20240620-v1:0',}
+
 # Will throw errors if you send a garbage string
 def convert_frontend_str_to_llm(frontend_str: str) -> Llm:
     if frontend_str == "gpt_4_vision":
         return Llm.GPT_4_VISION
     elif frontend_str == "claude_3_sonnet":
         return Llm.CLAUDE_3_SONNET
+    elif frontend_str == "claude_3_5_sonnet":
+        return Llm.CLAUDE_3_5_SONNET
     else:
         return Llm(frontend_str)
 
@@ -85,65 +92,6 @@ async def stream_openai_response(
     return full_response
 
 
-# TODO: Have a seperate function that translates OpenAI messages to Claude messages
-async def stream_claude_response_bak(
-    messages: List[ChatCompletionMessageParam],
-    api_key: str,
-    callback: Callable[[str], Awaitable[None]],
-) -> str:
-
-    client = AsyncAnthropic(api_key=api_key)
-
-    # Base parameters
-    model = Llm.CLAUDE_3_SONNET
-    max_tokens = 4096
-    temperature = 0.0
-
-    # Translate OpenAI messages to Claude messages
-    system_prompt = cast(str, messages[0].get("content"))
-    claude_messages = [dict(message) for message in messages[1:]]
-    for message in claude_messages:
-        if not isinstance(message["content"], list):
-            continue
-
-        for content in message["content"]:  # type: ignore
-            if content["type"] == "image_url":
-                content["type"] = "image"
-
-                # Extract base64 data and media type from data URL
-                # Example base64 data URL: data:image/png;base64,iVBOR...
-                image_data_url = cast(str, content["image_url"]["url"])
-                media_type = image_data_url.split(";")[0].split(":")[1]
-                base64_data = image_data_url.split(",")[1]
-
-                # Remove OpenAI parameter
-                del content["image_url"]
-
-                content["source"] = {
-                    "type": "base64",
-                    "media_type": media_type,
-                    "data": base64_data,
-                }
-
-    # Stream Claude response
-    async with client.messages.stream(
-        model=model.value,
-        max_tokens=max_tokens,
-        temperature=temperature,
-        system=system_prompt,
-        messages=claude_messages,  # type: ignore
-    ) as stream:
-        async for text in stream.text_stream:
-            await callback(text)
-
-    # Return final message
-    response = await stream.get_final_message()
-
-    # Close the Anthropic client
-    await client.close()
-
-    return response.content[0].text
-
 async def stream_claude_response(
     messages: List[ChatCompletionMessageParam],
     api_key: str,
@@ -153,7 +101,6 @@ async def stream_claude_response(
     # client = AsyncAnthropic(api_key=api_key)
 
     # Base parameters
-    model = Llm.CLAUDE_3_SONNET
     max_tokens = 4096
     temperature = 0.0
 
@@ -190,7 +137,8 @@ async def stream_claude_response(
             "anthropic_version": "bedrock-2023-05-31",
             "max_tokens": max_tokens,
             "messages": claude_messages,
-            "temperature":temperature
+            "temperature":temperature,
+            "system":system_prompt,
         }
     }
     
@@ -224,10 +172,11 @@ async def stream_claude_response_native(
     api_key: str,
     callback: Callable[[str], Awaitable[None]],
     include_thinking: bool = False,
-    model: Llm = Llm.CLAUDE_3_OPUS,
+    model: Llm = Llm.CLAUDE_3_5_SONNET,
 ) -> str:
 
     # client = AsyncAnthropic(api_key=api_key)
+    modelId = BEDROCK_LLM_MODELID_LIST[model]
 
     # Base model parameters
     max_tokens = 4096
diff --git a/frontend/src/lib/models.ts b/frontend/src/lib/models.ts
index 0320b9a..f193931 100644
--- a/frontend/src/lib/models.ts
+++ b/frontend/src/lib/models.ts
@@ -1,6 +1,7 @@
 // Keep in sync with backend (llm.py)
 // Order here matches dropdown order
 export enum CodeGenerationModel {
+  CLAUDE_3_5_SONNET = "claude_3_5_sonnet",
   CLAUDE_3_SONNET = "claude_3_sonnet",
   GPT_4O_2024_05_13 = "gpt-4o-2024-05-13",
   GPT_4_TURBO_2024_04_09 = "gpt-4-turbo-2024-04-09",
@@ -15,4 +16,5 @@ export const CODE_GENERATION_MODEL_DESCRIPTIONS: {
   "gpt-4-turbo-2024-04-09": { name: "GPT-4 Turbo (Apr 2024)", inBeta: false },
   gpt_4_vision: { name: "GPT-4 Vision (Nov 2023)", inBeta: false },
   claude_3_sonnet: { name: "Claude 3 Sonnet", inBeta: false },
+  claude_3_5_sonnet: { name: "Claude 3.5 Sonnet", inBeta: false },
 };