diff --git a/backend/routes/evals.py b/backend/routes/evals.py index 798a9d8..22262cd 100644 --- a/backend/routes/evals.py +++ b/backend/routes/evals.py @@ -7,10 +7,13 @@ from evals.config import EVALS_DIR router = APIRouter() +# Update this if the number of outputs generated per input changes +N = 1 + class Eval(BaseModel): input: str - output: str + outputs: list[str] @router.get("/evals") @@ -25,21 +28,27 @@ async def get_evals(): input_file_path = os.path.join(input_dir, file) input_file = await image_to_data_url(input_file_path) - # Construct the corresponding output file name - output_file_name = file.replace(".png", ".html") - output_file_path = os.path.join(output_dir, output_file_name) + # Construct the corresponding output file names + output_file_names = [ + file.replace(".png", f"_{i}.html") for i in range(0, N) + ] # Assuming 3 outputs for each input - # Check if the output file exists - if os.path.exists(output_file_path): - with open(output_file_path, "r") as f: - output_file_data = f.read() - else: - output_file_data = "Output file not found." + output_files_data: list[str] = [] + for output_file_name in output_file_names: + output_file_path = os.path.join(output_dir, output_file_name) + # Check if the output file exists + if os.path.exists(output_file_path): + with open(output_file_path, "r") as f: + output_files_data.append(f.read()) + else: + output_files_data.append( + "

Output file not found.

" + ) evals.append( Eval( input=input_file, - output=output_file_data, + outputs=output_files_data, ) ) diff --git a/backend/run_evals.py b/backend/run_evals.py index f26c708..a5fa878 100644 --- a/backend/run_evals.py +++ b/backend/run_evals.py @@ -14,7 +14,8 @@ from evals.core import generate_code_core from evals.utils import image_to_data_url STACK = "html_tailwind" -MODEL = Llm.CLAUDE_3_SONNET +MODEL = Llm.GPT_4_TURBO_2024_04_09 +N = 1 # Number of outputs to generate async def main(): @@ -28,16 +29,21 @@ async def main(): for filename in evals: filepath = os.path.join(INPUT_DIR, filename) data_url = await image_to_data_url(filepath) - task = generate_code_core(image_url=data_url, stack=STACK, model=MODEL) - tasks.append(task) + for _ in range(N): # Generate N tasks for each input + task = generate_code_core(image_url=data_url, stack=STACK, model=MODEL) + tasks.append(task) results = await asyncio.gather(*tasks) os.makedirs(OUTPUT_DIR, exist_ok=True) - for filename, content in zip(evals, results): - # File name is derived from the original filename in evals - output_filename = f"{os.path.splitext(filename)[0]}.html" + for i, content in enumerate(results): + # Calculate index for filename and output number + eval_index = i // N + output_number = i % N + filename = evals[eval_index] + # File name is derived from the original filename in evals with an added output number + output_filename = f"{os.path.splitext(filename)[0]}_{output_number}.html" output_filepath = os.path.join(OUTPUT_DIR, output_filename) with open(output_filepath, "w") as file: file.write(content) diff --git a/frontend/src/components/evals/EvalsPage.tsx b/frontend/src/components/evals/EvalsPage.tsx index 6e76a0d..6d2adbd 100644 --- a/frontend/src/components/evals/EvalsPage.tsx +++ b/frontend/src/components/evals/EvalsPage.tsx @@ -4,7 +4,7 @@ import RatingPicker from "./RatingPicker"; interface Eval { input: string; - output: string; + outputs: string[]; } function EvalsPage() { @@ -38,18 +38,22 @@ function EvalsPage() {
{evals.map((e, index) => (
-
+

{index}

+
+ {/* Update w if N changes to a fixed number like w-[600px] */}
- -
-
- {/* Put output into an iframe */} - + {`Input
+ {e.outputs.map((output, outputIndex) => ( +
+ {/* Put output into an iframe */} + +
+ ))}