diff --git a/Evaluation.md b/Evaluation.md
index 5fd5da8..e937b78 100644
--- a/Evaluation.md
+++ b/Evaluation.md
@@ -5,8 +5,8 @@ Evaluation dataset consists of 16 screenshots. A Python script for running scree
 ### Running evals
 
 - Input screenshots should be located at `backend/evals_data/inputs` and the outputs will be `backend/evals_data/outputs`. If you want to modify this, modify `EVALS_DIR` in `backend/evals/config.py`. You can download the input screenshot dataset here: TODO.
-- Set a stack (`STACK` var) in `backend/run_evals.py`
-- Run `python backend/run_evals.py` - this runs the screenshot-to-code on the input dataset in parallel but it will still take a few minutes to complete.
+- Set a stack and model (`STACK` var, `MODEL` var) in `backend/run_evals.py`
+- Run `OPENAI_API_KEY=sk-... python run_evals.py` - this runs the screenshot-to-code on the input dataset in parallel but it will still take a few minutes to complete.
 - Once the script is done, you can find the outputs in `backend/evals_data/outputs`.
 
 ### Rating evals
diff --git a/backend/evals/core.py b/backend/evals/core.py
index 3438a7d..5e05362 100644
--- a/backend/evals/core.py
+++ b/backend/evals/core.py
@@ -1,14 +1,12 @@
 import os
 from config import ANTHROPIC_API_KEY
 
-from llm import stream_claude_response, stream_openai_response
+from llm import Llm, stream_claude_response, stream_openai_response
 from prompts import assemble_prompt
 from prompts.types import Stack
 
 
-async def generate_code_core(image_url: str, stack: Stack) -> str:
-    model = "CLAUDE"
-
+async def generate_code_core(image_url: str, stack: Stack, model: Llm) -> str:
     prompt_messages = assemble_prompt(image_url, stack)
     openai_api_key = os.environ.get("OPENAI_API_KEY")
     anthropic_api_key = ANTHROPIC_API_KEY
@@ -17,7 +15,7 @@ async def generate_code_core(image_url: str, stack: Stack) -> str:
     async def process_chunk(content: str):
         pass
 
-    if model == "CLAUDE":
+    if model == Llm.CLAUDE_3_SONNET:
         if not anthropic_api_key:
             raise Exception("Anthropic API key not found")
 
@@ -35,6 +33,7 @@ async def generate_code_core(image_url: str, stack: Stack) -> str:
             api_key=openai_api_key,
             base_url=openai_base_url,
             callback=lambda x: process_chunk(x),
+            model=model,
         )
 
     return completion
diff --git a/backend/run_evals.py b/backend/run_evals.py
index a5cfefb..f26c708 100644
--- a/backend/run_evals.py
+++ b/backend/run_evals.py
@@ -1,6 +1,8 @@
 # Load environment variables first
 from dotenv import load_dotenv
 
+from llm import Llm
+
 load_dotenv()
 
 import os
@@ -12,6 +14,7 @@ from evals.core import generate_code_core
 from evals.utils import image_to_data_url
 
 STACK = "html_tailwind"
+MODEL = Llm.CLAUDE_3_SONNET
 
 
 async def main():
@@ -25,7 +28,7 @@ async def main():
     for filename in evals:
         filepath = os.path.join(INPUT_DIR, filename)
         data_url = await image_to_data_url(filepath)
-        task = generate_code_core(data_url, STACK)
+        task = generate_code_core(image_url=data_url, stack=STACK, model=MODEL)
         tasks.append(task)
 
     results = await asyncio.gather(*tasks)