diff --git a/backend/llm.py b/backend/llm.py
index 35afac5..83ae94c 100644
--- a/backend/llm.py
+++ b/backend/llm.py
@@ -14,6 +14,7 @@ from utils import pprint_prompt
class Llm(Enum):
GPT_4_VISION = "gpt-4-vision-preview"
GPT_4_TURBO_2024_04_09 = "gpt-4-turbo-2024-04-09"
+ GPT_4O_2024_05_13 = "gpt-4o-2024-05-13"
CLAUDE_3_SONNET = "claude-3-sonnet-20240229"
CLAUDE_3_OPUS = "claude-3-opus-20240229"
CLAUDE_3_HAIKU = "claude-3-haiku-20240307"
@@ -48,7 +49,11 @@ async def stream_openai_response(
}
# Add 'max_tokens' only if the model is a GPT4 vision or Turbo model
- if model == Llm.GPT_4_VISION or model == Llm.GPT_4_TURBO_2024_04_09:
+ if (
+ model == Llm.GPT_4_VISION
+ or model == Llm.GPT_4_TURBO_2024_04_09
+ or model == Llm.GPT_4O_2024_05_13
+ ):
params["max_tokens"] = 4096
stream = await client.chat.completions.create(**params) # type: ignore
diff --git a/backend/routes/evals.py b/backend/routes/evals.py
index 798a9d8..22262cd 100644
--- a/backend/routes/evals.py
+++ b/backend/routes/evals.py
@@ -7,10 +7,13 @@ from evals.config import EVALS_DIR
router = APIRouter()
+# Update this if the number of outputs generated per input changes
+N = 1
+
class Eval(BaseModel):
input: str
- output: str
+ outputs: list[str]
@router.get("/evals")
@@ -25,21 +28,27 @@ async def get_evals():
input_file_path = os.path.join(input_dir, file)
input_file = await image_to_data_url(input_file_path)
- # Construct the corresponding output file name
- output_file_name = file.replace(".png", ".html")
- output_file_path = os.path.join(output_dir, output_file_name)
+ # Construct the corresponding output file names
+ output_file_names = [
+ file.replace(".png", f"_{i}.html") for i in range(0, N)
+ ] # Assuming 3 outputs for each input
- # Check if the output file exists
- if os.path.exists(output_file_path):
- with open(output_file_path, "r") as f:
- output_file_data = f.read()
- else:
- output_file_data = "Output file not found."
+ output_files_data: list[str] = []
+ for output_file_name in output_file_names:
+ output_file_path = os.path.join(output_dir, output_file_name)
+ # Check if the output file exists
+ if os.path.exists(output_file_path):
+ with open(output_file_path, "r") as f:
+ output_files_data.append(f.read())
+ else:
+ output_files_data.append(
+ "
Output file not found.
"
+ )
evals.append(
Eval(
input=input_file,
- output=output_file_data,
+ outputs=output_files_data,
)
)
diff --git a/backend/routes/generate_code.py b/backend/routes/generate_code.py
index 0189dfb..0173b7b 100644
--- a/backend/routes/generate_code.py
+++ b/backend/routes/generate_code.py
@@ -85,7 +85,7 @@ async def stream_code(websocket: WebSocket):
# Read the model from the request. Fall back to default if not provided.
code_generation_model_str = params.get(
- "codeGenerationModel", Llm.GPT_4_VISION.value
+ "codeGenerationModel", Llm.GPT_4O_2024_05_13.value
)
try:
code_generation_model = convert_frontend_str_to_llm(code_generation_model_str)
@@ -146,6 +146,7 @@ async def stream_code(websocket: WebSocket):
if not openai_api_key and (
code_generation_model == Llm.GPT_4_VISION
or code_generation_model == Llm.GPT_4_TURBO_2024_04_09
+ or code_generation_model == Llm.GPT_4O_2024_05_13
):
print("OpenAI API key not found")
await throw_error(
diff --git a/backend/run_evals.py b/backend/run_evals.py
index f26c708..bbf355a 100644
--- a/backend/run_evals.py
+++ b/backend/run_evals.py
@@ -13,8 +13,9 @@ from evals.config import EVALS_DIR
from evals.core import generate_code_core
from evals.utils import image_to_data_url
-STACK = "html_tailwind"
-MODEL = Llm.CLAUDE_3_SONNET
+STACK = "ionic_tailwind"
+MODEL = Llm.GPT_4O_2024_05_13
+N = 1 # Number of outputs to generate
async def main():
@@ -28,16 +29,21 @@ async def main():
for filename in evals:
filepath = os.path.join(INPUT_DIR, filename)
data_url = await image_to_data_url(filepath)
- task = generate_code_core(image_url=data_url, stack=STACK, model=MODEL)
- tasks.append(task)
+ for _ in range(N): # Generate N tasks for each input
+ task = generate_code_core(image_url=data_url, stack=STACK, model=MODEL)
+ tasks.append(task)
results = await asyncio.gather(*tasks)
os.makedirs(OUTPUT_DIR, exist_ok=True)
- for filename, content in zip(evals, results):
- # File name is derived from the original filename in evals
- output_filename = f"{os.path.splitext(filename)[0]}.html"
+ for i, content in enumerate(results):
+ # Calculate index for filename and output number
+ eval_index = i // N
+ output_number = i % N
+ filename = evals[eval_index]
+ # File name is derived from the original filename in evals with an added output number
+ output_filename = f"{os.path.splitext(filename)[0]}_{output_number}.html"
output_filepath = os.path.join(OUTPUT_DIR, output_filename)
with open(output_filepath, "w") as file:
file.write(content)
diff --git a/backend/test_llm.py b/backend/test_llm.py
index ec005a3..aeb02ab 100644
--- a/backend/test_llm.py
+++ b/backend/test_llm.py
@@ -24,6 +24,11 @@ class TestConvertFrontendStrToLlm(unittest.TestCase):
Llm.GPT_4_TURBO_2024_04_09,
"Should convert 'gpt-4-turbo-2024-04-09' to Llm.GPT_4_TURBO_2024_04_09",
)
+ self.assertEqual(
+ convert_frontend_str_to_llm("gpt-4o-2024-05-13"),
+ Llm.GPT_4O_2024_05_13,
+ "Should convert 'gpt-4o-2024-05-13' to Llm.GPT_4O_2024_05_13",
+ )
def test_convert_invalid_string_raises_exception(self):
with self.assertRaises(ValueError):
diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx
index 6afc174..6b04f59 100644
--- a/frontend/src/App.tsx
+++ b/frontend/src/App.tsx
@@ -74,7 +74,7 @@ function App({ navbarComponent }: Props) {
isImageGenerationEnabled: true,
editorTheme: EditorTheme.COBALT,
generatedCodeConfig: Stack.HTML_TAILWIND,
- codeGenerationModel: CodeGenerationModel.GPT_4_TURBO_2024_04_09,
+ codeGenerationModel: CodeGenerationModel.GPT_4O_2024_05_13,
// Only relevant for hosted version
isTermOfServiceAccepted: false,
},
diff --git a/frontend/src/components/evals/EvalsPage.tsx b/frontend/src/components/evals/EvalsPage.tsx
index 6e76a0d..6d2adbd 100644
--- a/frontend/src/components/evals/EvalsPage.tsx
+++ b/frontend/src/components/evals/EvalsPage.tsx
@@ -4,7 +4,7 @@ import RatingPicker from "./RatingPicker";
interface Eval {
input: string;
- output: string;
+ outputs: string[];
}
function EvalsPage() {
@@ -38,18 +38,22 @@ function EvalsPage() {
{evals.map((e, index) => (
-
+
{index}
+
+ {/* Update w if N changes to a fixed number like w-[600px] */}
-

-
-
- {/* Put output into an iframe */}
-
+
+ {e.outputs.map((output, outputIndex) => (
+
+ {/* Put output into an iframe */}
+
+
+ ))}