diff --git a/backend/llm.py b/backend/llm.py index 35afac5..83ae94c 100644 --- a/backend/llm.py +++ b/backend/llm.py @@ -14,6 +14,7 @@ from utils import pprint_prompt class Llm(Enum): GPT_4_VISION = "gpt-4-vision-preview" GPT_4_TURBO_2024_04_09 = "gpt-4-turbo-2024-04-09" + GPT_4O_2024_05_13 = "gpt-4o-2024-05-13" CLAUDE_3_SONNET = "claude-3-sonnet-20240229" CLAUDE_3_OPUS = "claude-3-opus-20240229" CLAUDE_3_HAIKU = "claude-3-haiku-20240307" @@ -48,7 +49,11 @@ async def stream_openai_response( } # Add 'max_tokens' only if the model is a GPT4 vision or Turbo model - if model == Llm.GPT_4_VISION or model == Llm.GPT_4_TURBO_2024_04_09: + if ( + model == Llm.GPT_4_VISION + or model == Llm.GPT_4_TURBO_2024_04_09 + or model == Llm.GPT_4O_2024_05_13 + ): params["max_tokens"] = 4096 stream = await client.chat.completions.create(**params) # type: ignore diff --git a/backend/routes/evals.py b/backend/routes/evals.py index 798a9d8..22262cd 100644 --- a/backend/routes/evals.py +++ b/backend/routes/evals.py @@ -7,10 +7,13 @@ from evals.config import EVALS_DIR router = APIRouter() +# Update this if the number of outputs generated per input changes +N = 1 + class Eval(BaseModel): input: str - output: str + outputs: list[str] @router.get("/evals") @@ -25,21 +28,27 @@ async def get_evals(): input_file_path = os.path.join(input_dir, file) input_file = await image_to_data_url(input_file_path) - # Construct the corresponding output file name - output_file_name = file.replace(".png", ".html") - output_file_path = os.path.join(output_dir, output_file_name) + # Construct the corresponding output file names + output_file_names = [ + file.replace(".png", f"_{i}.html") for i in range(0, N) + ] # Assuming 3 outputs for each input - # Check if the output file exists - if os.path.exists(output_file_path): - with open(output_file_path, "r") as f: - output_file_data = f.read() - else: - output_file_data = "Output file not found." + output_files_data: list[str] = [] + for output_file_name in output_file_names: + output_file_path = os.path.join(output_dir, output_file_name) + # Check if the output file exists + if os.path.exists(output_file_path): + with open(output_file_path, "r") as f: + output_files_data.append(f.read()) + else: + output_files_data.append( + "

Output file not found.

" + ) evals.append( Eval( input=input_file, - output=output_file_data, + outputs=output_files_data, ) ) diff --git a/backend/routes/generate_code.py b/backend/routes/generate_code.py index 0189dfb..0173b7b 100644 --- a/backend/routes/generate_code.py +++ b/backend/routes/generate_code.py @@ -85,7 +85,7 @@ async def stream_code(websocket: WebSocket): # Read the model from the request. Fall back to default if not provided. code_generation_model_str = params.get( - "codeGenerationModel", Llm.GPT_4_VISION.value + "codeGenerationModel", Llm.GPT_4O_2024_05_13.value ) try: code_generation_model = convert_frontend_str_to_llm(code_generation_model_str) @@ -146,6 +146,7 @@ async def stream_code(websocket: WebSocket): if not openai_api_key and ( code_generation_model == Llm.GPT_4_VISION or code_generation_model == Llm.GPT_4_TURBO_2024_04_09 + or code_generation_model == Llm.GPT_4O_2024_05_13 ): print("OpenAI API key not found") await throw_error( diff --git a/backend/run_evals.py b/backend/run_evals.py index f26c708..bbf355a 100644 --- a/backend/run_evals.py +++ b/backend/run_evals.py @@ -13,8 +13,9 @@ from evals.config import EVALS_DIR from evals.core import generate_code_core from evals.utils import image_to_data_url -STACK = "html_tailwind" -MODEL = Llm.CLAUDE_3_SONNET +STACK = "ionic_tailwind" +MODEL = Llm.GPT_4O_2024_05_13 +N = 1 # Number of outputs to generate async def main(): @@ -28,16 +29,21 @@ async def main(): for filename in evals: filepath = os.path.join(INPUT_DIR, filename) data_url = await image_to_data_url(filepath) - task = generate_code_core(image_url=data_url, stack=STACK, model=MODEL) - tasks.append(task) + for _ in range(N): # Generate N tasks for each input + task = generate_code_core(image_url=data_url, stack=STACK, model=MODEL) + tasks.append(task) results = await asyncio.gather(*tasks) os.makedirs(OUTPUT_DIR, exist_ok=True) - for filename, content in zip(evals, results): - # File name is derived from the original filename in evals - output_filename = f"{os.path.splitext(filename)[0]}.html" + for i, content in enumerate(results): + # Calculate index for filename and output number + eval_index = i // N + output_number = i % N + filename = evals[eval_index] + # File name is derived from the original filename in evals with an added output number + output_filename = f"{os.path.splitext(filename)[0]}_{output_number}.html" output_filepath = os.path.join(OUTPUT_DIR, output_filename) with open(output_filepath, "w") as file: file.write(content) diff --git a/backend/test_llm.py b/backend/test_llm.py index ec005a3..aeb02ab 100644 --- a/backend/test_llm.py +++ b/backend/test_llm.py @@ -24,6 +24,11 @@ class TestConvertFrontendStrToLlm(unittest.TestCase): Llm.GPT_4_TURBO_2024_04_09, "Should convert 'gpt-4-turbo-2024-04-09' to Llm.GPT_4_TURBO_2024_04_09", ) + self.assertEqual( + convert_frontend_str_to_llm("gpt-4o-2024-05-13"), + Llm.GPT_4O_2024_05_13, + "Should convert 'gpt-4o-2024-05-13' to Llm.GPT_4O_2024_05_13", + ) def test_convert_invalid_string_raises_exception(self): with self.assertRaises(ValueError): diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index 6afc174..6b04f59 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -74,7 +74,7 @@ function App({ navbarComponent }: Props) { isImageGenerationEnabled: true, editorTheme: EditorTheme.COBALT, generatedCodeConfig: Stack.HTML_TAILWIND, - codeGenerationModel: CodeGenerationModel.GPT_4_TURBO_2024_04_09, + codeGenerationModel: CodeGenerationModel.GPT_4O_2024_05_13, // Only relevant for hosted version isTermOfServiceAccepted: false, }, diff --git a/frontend/src/components/evals/EvalsPage.tsx b/frontend/src/components/evals/EvalsPage.tsx index 6e76a0d..6d2adbd 100644 --- a/frontend/src/components/evals/EvalsPage.tsx +++ b/frontend/src/components/evals/EvalsPage.tsx @@ -4,7 +4,7 @@ import RatingPicker from "./RatingPicker"; interface Eval { input: string; - output: string; + outputs: string[]; } function EvalsPage() { @@ -38,18 +38,22 @@ function EvalsPage() {
{evals.map((e, index) => (
-
+

{index}

+
+ {/* Update w if N changes to a fixed number like w-[600px] */}
- -
-
- {/* Put output into an iframe */} - + {`Input
+ {e.outputs.map((output, outputIndex) => ( +
+ {/* Put output into an iframe */} + +
+ ))}