support best of n evals
This commit is contained in:
parent
f9c4dd9c7c
commit
a5fe0960d8
@ -7,10 +7,13 @@ from evals.config import EVALS_DIR
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
# Update this if the number of outputs generated per input changes
|
||||
N = 1
|
||||
|
||||
|
||||
class Eval(BaseModel):
|
||||
input: str
|
||||
output: str
|
||||
outputs: list[str]
|
||||
|
||||
|
||||
@router.get("/evals")
|
||||
@ -25,21 +28,27 @@ async def get_evals():
|
||||
input_file_path = os.path.join(input_dir, file)
|
||||
input_file = await image_to_data_url(input_file_path)
|
||||
|
||||
# Construct the corresponding output file name
|
||||
output_file_name = file.replace(".png", ".html")
|
||||
output_file_path = os.path.join(output_dir, output_file_name)
|
||||
# Construct the corresponding output file names
|
||||
output_file_names = [
|
||||
file.replace(".png", f"_{i}.html") for i in range(0, N)
|
||||
] # Assuming 3 outputs for each input
|
||||
|
||||
output_files_data: list[str] = []
|
||||
for output_file_name in output_file_names:
|
||||
output_file_path = os.path.join(output_dir, output_file_name)
|
||||
# Check if the output file exists
|
||||
if os.path.exists(output_file_path):
|
||||
with open(output_file_path, "r") as f:
|
||||
output_file_data = f.read()
|
||||
output_files_data.append(f.read())
|
||||
else:
|
||||
output_file_data = "Output file not found."
|
||||
output_files_data.append(
|
||||
"<html><h1>Output file not found.</h1></html>"
|
||||
)
|
||||
|
||||
evals.append(
|
||||
Eval(
|
||||
input=input_file,
|
||||
output=output_file_data,
|
||||
outputs=output_files_data,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
@ -14,7 +14,8 @@ from evals.core import generate_code_core
|
||||
from evals.utils import image_to_data_url
|
||||
|
||||
STACK = "html_tailwind"
|
||||
MODEL = Llm.CLAUDE_3_SONNET
|
||||
MODEL = Llm.GPT_4_TURBO_2024_04_09
|
||||
N = 1 # Number of outputs to generate
|
||||
|
||||
|
||||
async def main():
|
||||
@ -28,6 +29,7 @@ async def main():
|
||||
for filename in evals:
|
||||
filepath = os.path.join(INPUT_DIR, filename)
|
||||
data_url = await image_to_data_url(filepath)
|
||||
for _ in range(N): # Generate N tasks for each input
|
||||
task = generate_code_core(image_url=data_url, stack=STACK, model=MODEL)
|
||||
tasks.append(task)
|
||||
|
||||
@ -35,9 +37,13 @@ async def main():
|
||||
|
||||
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
||||
|
||||
for filename, content in zip(evals, results):
|
||||
# File name is derived from the original filename in evals
|
||||
output_filename = f"{os.path.splitext(filename)[0]}.html"
|
||||
for i, content in enumerate(results):
|
||||
# Calculate index for filename and output number
|
||||
eval_index = i // N
|
||||
output_number = i % N
|
||||
filename = evals[eval_index]
|
||||
# File name is derived from the original filename in evals with an added output number
|
||||
output_filename = f"{os.path.splitext(filename)[0]}_{output_number}.html"
|
||||
output_filepath = os.path.join(OUTPUT_DIR, output_filename)
|
||||
with open(output_filepath, "w") as file:
|
||||
file.write(content)
|
||||
|
||||
@ -4,7 +4,7 @@ import RatingPicker from "./RatingPicker";
|
||||
|
||||
interface Eval {
|
||||
input: string;
|
||||
output: string;
|
||||
outputs: string[];
|
||||
}
|
||||
|
||||
function EvalsPage() {
|
||||
@ -38,18 +38,22 @@ function EvalsPage() {
|
||||
<div className="flex flex-col gap-y-4 mt-4 mx-auto justify-center">
|
||||
{evals.map((e, index) => (
|
||||
<div className="flex flex-col justify-center" key={index}>
|
||||
<div className="flex gap-x-2 justify-center">
|
||||
<h2 className="font-bold text-lg ml-4">{index}</h2>
|
||||
<div className="flex gap-x-2 justify-center ml-4">
|
||||
{/* Update w if N changes to a fixed number like w-[600px] */}
|
||||
<div className="w-1/2 p-1 border">
|
||||
<img src={e.input} />
|
||||
<img src={e.input} alt={`Input for eval ${index}`} />
|
||||
</div>
|
||||
<div className="w-1/2 p-1 border">
|
||||
{e.outputs.map((output, outputIndex) => (
|
||||
<div className="w-1/2 p-1 border" key={outputIndex}>
|
||||
{/* Put output into an iframe */}
|
||||
<iframe
|
||||
srcDoc={e.output}
|
||||
srcDoc={output}
|
||||
className="w-[1200px] h-[800px] transform scale-[0.60]"
|
||||
style={{ transformOrigin: "top left" }}
|
||||
></iframe>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
<div className="ml-8 mt-4 flex justify-center">
|
||||
<RatingPicker
|
||||
|
||||
Loading…
Reference in New Issue
Block a user