add a front-end for scoring eval results
This commit is contained in:
parent
896ac66ac5
commit
d23cec9bc0
@ -2,9 +2,11 @@
|
|||||||
from typing import Any, Coroutine
|
from typing import Any, Coroutine
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
from eval_config import EVALS_DIR
|
||||||
|
from eval_utils import image_to_data_url
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
import base64
|
|
||||||
import os
|
import os
|
||||||
from llm import stream_openai_response
|
from llm import stream_openai_response
|
||||||
from prompts import assemble_prompt
|
from prompts import assemble_prompt
|
||||||
@ -36,14 +38,7 @@ async def generate_code_core(image_url: str, stack: str) -> str:
|
|||||||
return completion
|
return completion
|
||||||
|
|
||||||
|
|
||||||
async def image_to_data_url(filepath: str):
|
|
||||||
with open(filepath, "rb") as image_file:
|
|
||||||
encoded_string = base64.b64encode(image_file.read()).decode()
|
|
||||||
return f"data:image/png;base64,{encoded_string}"
|
|
||||||
|
|
||||||
|
|
||||||
async def main():
|
async def main():
|
||||||
EVALS_DIR = "./evals"
|
|
||||||
INPUT_DIR = EVALS_DIR + "/inputs"
|
INPUT_DIR = EVALS_DIR + "/inputs"
|
||||||
OUTPUT_DIR = EVALS_DIR + "/outputs"
|
OUTPUT_DIR = EVALS_DIR + "/outputs"
|
||||||
|
|
||||||
|
|||||||
1
backend/eval_config.py
Normal file
1
backend/eval_config.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
EVALS_DIR = "./evals"
|
||||||
7
backend/eval_utils.py
Normal file
7
backend/eval_utils.py
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
import base64
|
||||||
|
|
||||||
|
|
||||||
|
async def image_to_data_url(filepath: str):
|
||||||
|
with open(filepath, "rb") as image_file:
|
||||||
|
encoded_string = base64.b64encode(image_file.read()).decode()
|
||||||
|
return f"data:image/png;base64,{encoded_string}"
|
||||||
@ -6,7 +6,7 @@ load_dotenv()
|
|||||||
|
|
||||||
from fastapi import FastAPI
|
from fastapi import FastAPI
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
from routes import screenshot, generate_code, home
|
from routes import screenshot, generate_code, home, evals
|
||||||
|
|
||||||
app = FastAPI(openapi_url=None, docs_url=None, redoc_url=None)
|
app = FastAPI(openapi_url=None, docs_url=None, redoc_url=None)
|
||||||
|
|
||||||
@ -23,3 +23,4 @@ app.add_middleware(
|
|||||||
app.include_router(generate_code.router)
|
app.include_router(generate_code.router)
|
||||||
app.include_router(screenshot.router)
|
app.include_router(screenshot.router)
|
||||||
app.include_router(home.router)
|
app.include_router(home.router)
|
||||||
|
app.include_router(evals.router)
|
||||||
|
|||||||
46
backend/routes/evals.py
Normal file
46
backend/routes/evals.py
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
import os
|
||||||
|
from fastapi import APIRouter
|
||||||
|
from pydantic import BaseModel
|
||||||
|
from eval_utils import image_to_data_url
|
||||||
|
from eval_config import EVALS_DIR
|
||||||
|
|
||||||
|
|
||||||
|
router = APIRouter()
|
||||||
|
|
||||||
|
|
||||||
|
class Eval(BaseModel):
|
||||||
|
input: str
|
||||||
|
output: str
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/evals")
|
||||||
|
async def get_evals():
|
||||||
|
# Get all evals from EVALS_DIR
|
||||||
|
input_dir = EVALS_DIR + "/inputs"
|
||||||
|
output_dir = EVALS_DIR + "/outputs"
|
||||||
|
|
||||||
|
evals: list[Eval] = []
|
||||||
|
for file in os.listdir(input_dir):
|
||||||
|
if file.endswith(".png"):
|
||||||
|
input_file_path = os.path.join(input_dir, file)
|
||||||
|
input_file = await image_to_data_url(input_file_path)
|
||||||
|
|
||||||
|
# Construct the corresponding output file name
|
||||||
|
output_file_name = file.replace(".png", ".html")
|
||||||
|
output_file_path = os.path.join(output_dir, output_file_name)
|
||||||
|
|
||||||
|
# Check if the output file exists
|
||||||
|
if os.path.exists(output_file_path):
|
||||||
|
with open(output_file_path, "r") as f:
|
||||||
|
output_file_data = f.read()
|
||||||
|
else:
|
||||||
|
output_file_data = "Output file not found."
|
||||||
|
|
||||||
|
evals.append(
|
||||||
|
Eval(
|
||||||
|
input=input_file,
|
||||||
|
output=output_file_data,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return evals
|
||||||
@ -41,6 +41,7 @@
|
|||||||
"react-dropzone": "^14.2.3",
|
"react-dropzone": "^14.2.3",
|
||||||
"react-hot-toast": "^2.4.1",
|
"react-hot-toast": "^2.4.1",
|
||||||
"react-icons": "^4.12.0",
|
"react-icons": "^4.12.0",
|
||||||
|
"react-router-dom": "^6.20.1",
|
||||||
"tailwind-merge": "^2.0.0",
|
"tailwind-merge": "^2.0.0",
|
||||||
"tailwindcss-animate": "^1.0.7",
|
"tailwindcss-animate": "^1.0.7",
|
||||||
"thememirror": "^2.0.1",
|
"thememirror": "^2.0.1",
|
||||||
|
|||||||
70
frontend/src/components/evals/EvalsPage.tsx
Normal file
70
frontend/src/components/evals/EvalsPage.tsx
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
import React, { useEffect } from "react";
|
||||||
|
import { HTTP_BACKEND_URL } from "../../config";
|
||||||
|
import RatingPicker from "./RatingPicker";
|
||||||
|
|
||||||
|
interface Eval {
|
||||||
|
input: string;
|
||||||
|
output: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
function EvalsPage() {
|
||||||
|
const [evals, setEvals] = React.useState<Eval[]>([]);
|
||||||
|
const [ratings, setRatings] = React.useState<number[]>([]);
|
||||||
|
|
||||||
|
const total = ratings.reduce((a, b) => a + b, 0);
|
||||||
|
const max = ratings.length * 4;
|
||||||
|
const score = ((total / max) * 100 || 0).toFixed(2);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (evals.length > 0) return;
|
||||||
|
|
||||||
|
fetch(`${HTTP_BACKEND_URL}/evals`)
|
||||||
|
.then((res) => res.json())
|
||||||
|
.then((data) => {
|
||||||
|
setEvals(data);
|
||||||
|
setRatings(new Array(data.length).fill(0));
|
||||||
|
});
|
||||||
|
}, [evals]);
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="mx-auto">
|
||||||
|
{/* Display total */}
|
||||||
|
<div className="flex items-center justify-center w-full h-12 bg-zinc-950">
|
||||||
|
<span className="text-2xl font-semibold text-white">
|
||||||
|
Total: {total} out of {max} ({score}%)
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div className="flex flex-col gap-y-4 mt-4 mx-auto justify-center">
|
||||||
|
{evals.map((e, index) => (
|
||||||
|
<div className="flex flex-col justify-center" key={index}>
|
||||||
|
<div className="flex gap-x-2 justify-center">
|
||||||
|
<div className="w-1/2 p-1 border">
|
||||||
|
<img src={e.input} />
|
||||||
|
</div>
|
||||||
|
<div className="w-1/2 p-1 border">
|
||||||
|
{/* Put output into an iframe */}
|
||||||
|
<iframe
|
||||||
|
srcDoc={e.output}
|
||||||
|
className="w-[1200px] h-[800px] transform scale-[0.60]"
|
||||||
|
style={{ transformOrigin: "top left" }}
|
||||||
|
></iframe>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div className="ml-8 mt-4 flex justify-center">
|
||||||
|
<RatingPicker
|
||||||
|
onSelect={(rating) => {
|
||||||
|
const newRatings = [...ratings];
|
||||||
|
newRatings[index] = rating;
|
||||||
|
setRatings(newRatings);
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
export default EvalsPage;
|
||||||
38
frontend/src/components/evals/RatingPicker.tsx
Normal file
38
frontend/src/components/evals/RatingPicker.tsx
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
import React from "react";
|
||||||
|
|
||||||
|
interface Props {
|
||||||
|
onSelect: (rating: number) => void;
|
||||||
|
}
|
||||||
|
|
||||||
|
function RatingPicker({ onSelect }: Props) {
|
||||||
|
const [selected, setSelected] = React.useState<number | null>(null);
|
||||||
|
|
||||||
|
const renderCircle = (number: number) => {
|
||||||
|
const isSelected = selected === number;
|
||||||
|
const bgColor = isSelected ? "bg-black" : "bg-gray-300";
|
||||||
|
const textColor = isSelected ? "text-white" : "text-black";
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div
|
||||||
|
className={`flex items-center justify-center w-8 h-8 ${bgColor} rounded-full cursor-pointer`}
|
||||||
|
onClick={() => {
|
||||||
|
setSelected(number);
|
||||||
|
onSelect(number);
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<span className={`text-lg font-semibold ${textColor}`}>{number}</span>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="flex space-x-4">
|
||||||
|
{renderCircle(1)}
|
||||||
|
{renderCircle(2)}
|
||||||
|
{renderCircle(3)}
|
||||||
|
{renderCircle(4)}
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
export default RatingPicker;
|
||||||
@ -3,10 +3,17 @@ import ReactDOM from "react-dom/client";
|
|||||||
import App from "./App.tsx";
|
import App from "./App.tsx";
|
||||||
import "./index.css";
|
import "./index.css";
|
||||||
import { Toaster } from "react-hot-toast";
|
import { Toaster } from "react-hot-toast";
|
||||||
|
import EvalsPage from "./components/evals/EvalsPage.tsx";
|
||||||
|
import { BrowserRouter as Router, Routes, Route } from "react-router-dom";
|
||||||
|
|
||||||
ReactDOM.createRoot(document.getElementById("root")!).render(
|
ReactDOM.createRoot(document.getElementById("root")!).render(
|
||||||
<React.StrictMode>
|
<React.StrictMode>
|
||||||
<App />
|
<Router>
|
||||||
<Toaster toastOptions={{ className:"dark:bg-zinc-950 dark:text-white" }}/>
|
<Routes>
|
||||||
|
<Route path="/" element={<App />} />
|
||||||
|
<Route path="/evals" element={<EvalsPage />} />
|
||||||
|
</Routes>
|
||||||
|
</Router>
|
||||||
|
<Toaster toastOptions={{ className: "dark:bg-zinc-950 dark:text-white" }} />
|
||||||
</React.StrictMode>
|
</React.StrictMode>
|
||||||
);
|
);
|
||||||
|
|||||||
@ -1184,6 +1184,11 @@
|
|||||||
dependencies:
|
dependencies:
|
||||||
"@babel/runtime" "^7.13.10"
|
"@babel/runtime" "^7.13.10"
|
||||||
|
|
||||||
|
"@remix-run/router@1.13.1":
|
||||||
|
version "1.13.1"
|
||||||
|
resolved "https://registry.yarnpkg.com/@remix-run/router/-/router-1.13.1.tgz#07e2a8006f23a3bc898b3f317e0a58cc8076b86e"
|
||||||
|
integrity sha512-so+DHzZKsoOcoXrILB4rqDkMDy7NLMErRdOxvzvOKb507YINKUP4Di+shbTZDhSE/pBZ+vr7XGIpcOO0VLSA+Q==
|
||||||
|
|
||||||
"@rollup/pluginutils@^4.2.0":
|
"@rollup/pluginutils@^4.2.0":
|
||||||
version "4.2.1"
|
version "4.2.1"
|
||||||
resolved "https://registry.yarnpkg.com/@rollup/pluginutils/-/pluginutils-4.2.1.tgz#e6c6c3aba0744edce3fb2074922d3776c0af2a6d"
|
resolved "https://registry.yarnpkg.com/@rollup/pluginutils/-/pluginutils-4.2.1.tgz#e6c6c3aba0744edce3fb2074922d3776c0af2a6d"
|
||||||
@ -3144,6 +3149,21 @@ react-remove-scroll@2.5.5:
|
|||||||
use-callback-ref "^1.3.0"
|
use-callback-ref "^1.3.0"
|
||||||
use-sidecar "^1.1.2"
|
use-sidecar "^1.1.2"
|
||||||
|
|
||||||
|
react-router-dom@^6.20.1:
|
||||||
|
version "6.20.1"
|
||||||
|
resolved "https://registry.yarnpkg.com/react-router-dom/-/react-router-dom-6.20.1.tgz#e34f8075b9304221420de3609e072bb349824984"
|
||||||
|
integrity sha512-npzfPWcxfQN35psS7rJgi/EW0Gx6EsNjfdJSAk73U/HqMEJZ2k/8puxfwHFgDQhBGmS3+sjnGbMdMSV45axPQw==
|
||||||
|
dependencies:
|
||||||
|
"@remix-run/router" "1.13.1"
|
||||||
|
react-router "6.20.1"
|
||||||
|
|
||||||
|
react-router@6.20.1:
|
||||||
|
version "6.20.1"
|
||||||
|
resolved "https://registry.yarnpkg.com/react-router/-/react-router-6.20.1.tgz#e8cc326031d235aaeec405bb234af77cf0fe75ef"
|
||||||
|
integrity sha512-ccvLrB4QeT5DlaxSFFYi/KR8UMQ4fcD8zBcR71Zp1kaYTC5oJKYAp1cbavzGrogwxca+ubjkd7XjFZKBW8CxPA==
|
||||||
|
dependencies:
|
||||||
|
"@remix-run/router" "1.13.1"
|
||||||
|
|
||||||
react-style-singleton@^2.2.1:
|
react-style-singleton@^2.2.1:
|
||||||
version "2.2.1"
|
version "2.2.1"
|
||||||
resolved "https://registry.yarnpkg.com/react-style-singleton/-/react-style-singleton-2.2.1.tgz#f99e420492b2d8f34d38308ff660b60d0b1205b4"
|
resolved "https://registry.yarnpkg.com/react-style-singleton/-/react-style-singleton-2.2.1.tgz#f99e420492b2d8f34d38308ff660b60d0b1205b4"
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user