From a2d6fda7fdf7cf3a87abbd85f3b7cd1339cdd06f Mon Sep 17 00:00:00 2001
From: Abi Raja <abimanyuraja@gmail.com>
Date: Mon, 15 Jul 2024 16:33:39 -0400
Subject: [PATCH 1/2] update QA tests to use 4o and Claude Sonnet

---
 frontend/src/tests/qa.test.ts | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/frontend/src/tests/qa.test.ts b/frontend/src/tests/qa.test.ts
index b5274d1..9c6c973 100644
--- a/frontend/src/tests/qa.test.ts
+++ b/frontend/src/tests/qa.test.ts
@@ -16,14 +16,16 @@ describe("e2e tests", () => {
   let browser: Browser;
   let page: Page;
 
-  const DEBUG = false;
+  const DEBUG = true;
   const IS_HEADLESS = true;
 
   const stacks = Object.values(Stack).slice(0, DEBUG ? 1 : undefined);
-  const models = Object.values(CodeGenerationModel).slice(
-    0,
-    DEBUG ? 1 : undefined
-  );
+  const models = DEBUG
+    ? [
+        CodeGenerationModel.GPT_4O_2024_05_13,
+        CodeGenerationModel.CLAUDE_3_5_SONNET_2024_06_20,
+      ]
+    : Object.values(CodeGenerationModel);
 
   beforeAll(async () => {
     browser = await puppeteer.launch({ headless: IS_HEADLESS });

From 9f732c4f5d5784873b964b3bdea9647874864bf5 Mon Sep 17 00:00:00 2001
From: Abi Raja <abimanyuraja@gmail.com>
Date: Mon, 15 Jul 2024 18:51:22 -0400
Subject: [PATCH 2/2] update max tokens for Claude Sonnet 3.5 to newly
 supported limit (8192)

---
 backend/llm.py       | 3 ++-
 backend/run_evals.py | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/backend/llm.py b/backend/llm.py
index 0637416..2b71102 100644
--- a/backend/llm.py
+++ b/backend/llm.py
@@ -88,7 +88,7 @@ async def stream_claude_response(
     client = AsyncAnthropic(api_key=api_key)
 
     # Base parameters
-    max_tokens = 4096
+    max_tokens = 8192
     temperature = 0.0
 
     # Translate OpenAI messages to Claude messages
@@ -126,6 +126,7 @@ async def stream_claude_response(
         temperature=temperature,
         system=system_prompt,
         messages=claude_messages,  # type: ignore
+        extra_headers={"anthropic-beta": "max-tokens-3-5-sonnet-2024-07-15"},
     ) as stream:
         async for text in stream.text_stream:
             await callback(text)
diff --git a/backend/run_evals.py b/backend/run_evals.py
index bbf355a..ff5dc9f 100644
--- a/backend/run_evals.py
+++ b/backend/run_evals.py
@@ -13,8 +13,8 @@ from evals.config import EVALS_DIR
 from evals.core import generate_code_core
 from evals.utils import image_to_data_url
 
-STACK = "ionic_tailwind"
-MODEL = Llm.GPT_4O_2024_05_13
+STACK = "html_tailwind"
+MODEL = Llm.CLAUDE_3_5_SONNET_2024_06_20
 N = 1  # Number of outputs to generate