From a2d6fda7fdf7cf3a87abbd85f3b7cd1339cdd06f Mon Sep 17 00:00:00 2001 From: Abi Raja Date: Mon, 15 Jul 2024 16:33:39 -0400 Subject: [PATCH 1/2] update QA tests to use 4o and Claude Sonnet --- frontend/src/tests/qa.test.ts | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/frontend/src/tests/qa.test.ts b/frontend/src/tests/qa.test.ts index b5274d1..9c6c973 100644 --- a/frontend/src/tests/qa.test.ts +++ b/frontend/src/tests/qa.test.ts @@ -16,14 +16,16 @@ describe("e2e tests", () => { let browser: Browser; let page: Page; - const DEBUG = false; + const DEBUG = true; const IS_HEADLESS = true; const stacks = Object.values(Stack).slice(0, DEBUG ? 1 : undefined); - const models = Object.values(CodeGenerationModel).slice( - 0, - DEBUG ? 1 : undefined - ); + const models = DEBUG + ? [ + CodeGenerationModel.GPT_4O_2024_05_13, + CodeGenerationModel.CLAUDE_3_5_SONNET_2024_06_20, + ] + : Object.values(CodeGenerationModel); beforeAll(async () => { browser = await puppeteer.launch({ headless: IS_HEADLESS }); From 9f732c4f5d5784873b964b3bdea9647874864bf5 Mon Sep 17 00:00:00 2001 From: Abi Raja Date: Mon, 15 Jul 2024 18:51:22 -0400 Subject: [PATCH 2/2] update max tokens for Claude Sonnet 3.5 to newly supported limit (8192) --- backend/llm.py | 3 ++- backend/run_evals.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/backend/llm.py b/backend/llm.py index 0637416..2b71102 100644 --- a/backend/llm.py +++ b/backend/llm.py @@ -88,7 +88,7 @@ async def stream_claude_response( client = AsyncAnthropic(api_key=api_key) # Base parameters - max_tokens = 4096 + max_tokens = 8192 temperature = 0.0 # Translate OpenAI messages to Claude messages @@ -126,6 +126,7 @@ async def stream_claude_response( temperature=temperature, system=system_prompt, messages=claude_messages, # type: ignore + extra_headers={"anthropic-beta": "max-tokens-3-5-sonnet-2024-07-15"}, ) as stream: async for text in stream.text_stream: await callback(text) diff --git a/backend/run_evals.py b/backend/run_evals.py index bbf355a..ff5dc9f 100644 --- a/backend/run_evals.py +++ b/backend/run_evals.py @@ -13,8 +13,8 @@ from evals.config import EVALS_DIR from evals.core import generate_code_core from evals.utils import image_to_data_url -STACK = "ionic_tailwind" -MODEL = Llm.GPT_4O_2024_05_13 +STACK = "html_tailwind" +MODEL = Llm.CLAUDE_3_5_SONNET_2024_06_20 N = 1 # Number of outputs to generate