Merge branch 'main' into main

2024-06-11 09:43:03 +08:00 · 2024-06-11 09:43:03 +08:00 · 0b3b492c98
commit 0b3b492c98
parent afd34dfaad 392b9849a2
36 changed files with 3111 additions and 313 deletions
--- a/.github/FUNDING.yml
+++ b/.github/FUNDING.yml
@ -0,0 +1 @@
+github: [abi]
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@ -0,0 +1,21 @@
+---
+name: Bug report
+about: Create a report to help us improve
+title: ''
+labels: ''
+assignees: ''
+
+---
+
+**Describe the bug**
+A clear and concise description of what the bug is.
+
+**To Reproduce**
+Steps to reproduce the behavior:
+1. Go to '...'
+2. Click on '....'
+3. Scroll down to '....'
+4. See error
+
+**Screenshots of backend AND frontend terminal logs**
+If applicable, add screenshots to help explain your problem.
--- a/.github/ISSUE_TEMPLATE/custom.md
+++ b/.github/ISSUE_TEMPLATE/custom.md
@ -0,0 +1,10 @@
+---
+name: Custom issue template
+about: Describe this issue template's purpose here.
+title: ''
+labels: ''
+assignees: ''
+
+---
+
+
--- a/.github/ISSUE_TEMPLATE/feature_request.md
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@ -0,0 +1,20 @@
+---
+name: Feature request
+about: Suggest an idea for this project
+title: ''
+labels: ''
+assignees: ''
+
+---
+
+**Is your feature request related to a problem? Please describe.**
+A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
+
+**Describe the solution you'd like**
+A clear and concise description of what you want to happen.
+
+**Describe alternatives you've considered**
+A clear and concise description of any alternative solutions or features you've considered.
+
+**Additional context**
+Add any other context or screenshots about the feature request here.
--- a/README.md
+++ b/README.md
@ -1,6 +1,6 @@
 # screenshot-to-code

-A simple tool to convert screenshots, mockups and Figma designs into clean, functional code using AI.
+A simple tool to convert screenshots, mockups and Figma designs into clean, functional code using AI. **Now supporting GPT-4O!**

 https://github.com/abi/screenshot-to-code/assets/23818/6cebadae-2fe3-4986-ac6a-8fb9db030045

@ -15,9 +15,10 @@ Supported stacks:

 Supported AI models:

- GPT-4 Turbo (Apr 2024) - Best model
- GPT-4 Vision (Nov 2023) - Good model that's better than GPT-4 Turbo on some inputs
- Claude 3 Sonnet - Faster, and on par or better than GPT-4 vision for many inputs
+- GPT-4O - Best model!
+- GPT-4 Turbo (Apr 2024)
+- GPT-4 Vision (Nov 2023)
+- Claude 3 Sonnet
 - DALL-E 3 for image generation

 See the [Examples](#-examples) section below for more demos.
@ -30,13 +31,22 @@ We also just added experimental support for taking a video/screen recording of a

 [Follow me on Twitter for updates](https://twitter.com/_abi_).

-## 🚀 Try It Out without no install
+## Sponsors
+
+<a href="https://konghq.com/products/kong-konnect/register?utm_medium=referral&utm_source=github&utm_campaign=platform&utm_content=screenshot-to-code" target="_blank" title="Kong - powering the API world"><img src="https://picoapps.xyz/s2c-sponsors/Kong-GitHub-240x100.png"></a>
+
+## 🚀 Hosted Version

 [Try it live on the hosted version (paid)](https://screenshottocode.com).

 ## 🛠 Getting Started

-The app has a React/Vite frontend and a FastAPI backend. You will need an OpenAI API key with access to the GPT-4 Vision API or an Anthropic key if you want to use Claude Sonnet, or for experimental video support.
+The app has a React/Vite frontend and a FastAPI backend. 
+
+Keys needed:
+
+* [OpenAI API key with access to GPT-4](https://github.com/abi/screenshot-to-code/blob/main/Troubleshooting.md)
+* Anthropic key (optional) - only if you want to use Claude Sonnet, or for experimental video support.

 Run the backend (I use Poetry for package management - `pip install poetry` if you don't have it):

@ -48,7 +58,7 @@ poetry shell
 poetry run uvicorn main:app --reload --port 7001
 ```

-If you want to use Anthropic, add the `ANTHROPIC_API_KEY` to `backend/.env` with your API key from Anthropic.
+If you want to use Anthropic, add `ANTHROPIC_API_KEY` to `backend/.env`. You can also set up the keys using the settings dialog on the front-end (click the gear icon after loading the frontend).

 Run the frontend:

@ -107,5 +117,3 @@ https://github.com/abi/screenshot-to-code/assets/23818/3fec0f77-44e8-4fb3-a769-a
 ## 🌍 Hosted Version

 🆕 [Try it here (paid)](https://screenshottocode.com). Or see [Getting Started](#-getting-started) for local install instructions to use with your own API keys.
-
-[!["Buy Me A Coffee"](https://www.buymeacoffee.com/assets/img/custom_images/orange_img.png)](https://www.buymeacoffee.com/abiraja)
--- a/Troubleshooting.md
+++ b/Troubleshooting.md
@ -1,4 +1,4 @@
-### Getting an OpenAI API key with GPT4-Vision model access
+### Getting an OpenAI API key with GPT-4 model access

 You don't need a ChatGPT Pro account. Screenshot to code uses API keys from your OpenAI developer account. In order to get access to the GPT4 Vision model, log into your OpenAI account and then, follow these instructions:

--- a/backend/image_generation.py
+++ b/backend/image_generation.py
@ -5,7 +5,7 @@ from openai import AsyncOpenAI
 from bs4 import BeautifulSoup


-async def process_tasks(prompts: List[str], api_key: str, base_url: str):
+async def process_tasks(prompts: List[str], api_key: str, base_url: str | None):
    tasks = [generate_image(prompt, api_key, base_url) for prompt in prompts]
    results = await asyncio.gather(*tasks, return_exceptions=True)

@ -15,22 +15,23 @@ async def process_tasks(prompts: List[str], api_key: str, base_url: str):
            print(f"An exception occurred: {result}")
            processed_results.append(None)
        else:
-            processed_results.append(result)  # type: ignore
+            processed_results.append(result)

    return processed_results


-async def generate_image(prompt: str, api_key: str, base_url: str):
+async def generate_image(
+    prompt: str, api_key: str, base_url: str | None
+) -> Union[str, None]:
    client = AsyncOpenAI(api_key=api_key, base_url=base_url)
-    image_params: Dict[str, Union[str, int]] = {
-        "model": "dall-e-3",
-        "quality": "standard",
-        "style": "natural",
-        "n": 1,
-        "size": "1024x1024",
-        "prompt": prompt,
-    }
-    res = await client.images.generate(**image_params)  # type: ignore
+    res = await client.images.generate(
+        model="dall-e-3",
+        quality="standard",
+        style="natural",
+        n=1,
+        size="1024x1024",
+        prompt=prompt,
+    )
    await client.close()
    return res.data[0].url

@ -63,13 +64,13 @@ def create_alt_url_mapping(code: str) -> Dict[str, str]:

 async def generate_images(
    code: str, api_key: str, base_url: Union[str, None], image_cache: Dict[str, str]
-):
+) -> str:
    # Find all images
    soup = BeautifulSoup(code, "html.parser")
    images = soup.find_all("img")

    # Extract alt texts as image prompts
-    alts = []
+    alts: List[str | None] = []
    for img in images:
        # Only include URL if the image starts with https://placehold.co
        # and it's not already in the image_cache
@ -77,26 +78,26 @@ async def generate_images(
            img["src"].startswith("https://placehold.co")
            and image_cache.get(img.get("alt")) is None
        ):
-            alts.append(img.get("alt", None))  # type: ignore
+            alts.append(img.get("alt", None))

    # Exclude images with no alt text
-    alts = [alt for alt in alts if alt is not None]  # type: ignore
+    filtered_alts: List[str] = [alt for alt in alts if alt is not None]

    # Remove duplicates
-    prompts = list(set(alts))  # type: ignore
+    prompts = list(set(filtered_alts))

    # Return early if there are no images to replace
-    if len(prompts) == 0:  # type: ignore
+    if len(prompts) == 0:
        return code

    # Generate images
-    results = await process_tasks(prompts, api_key, base_url)  # type: ignore
+    results = await process_tasks(prompts, api_key, base_url)

    # Create a dict mapping alt text to image URL
-    mapped_image_urls = dict(zip(prompts, results))  # type: ignore
+    mapped_image_urls = dict(zip(prompts, results))

    # Merge with image_cache
-    mapped_image_urls = {**mapped_image_urls, **image_cache}  # type: ignore
+    mapped_image_urls = {**mapped_image_urls, **image_cache}

    # Replace old image URLs with the generated URLs
    for img in images:
--- a/backend/llm.py
+++ b/backend/llm.py
@ -16,6 +16,7 @@ from utils import pprint_prompt
 class Llm(Enum):
    GPT_4_VISION = "gpt-4-vision-preview"
    GPT_4_TURBO_2024_04_09 = "gpt-4-turbo-2024-04-09"
+    GPT_4O_2024_05_13 = "gpt-4o-2024-05-13"
    CLAUDE_3_SONNET = "claude-3-sonnet-20240229"
    CLAUDE_3_SONNET_BEDROCK = "anthropic.claude-3-sonnet-20240229-v1:0"
    CLAUDE_3_OPUS = "claude-3-opus-20240229"
@ -51,13 +52,18 @@ async def stream_openai_response(
    }

    # Add 'max_tokens' only if the model is a GPT4 vision or Turbo model
-    if model == Llm.GPT_4_VISION or model == Llm.GPT_4_TURBO_2024_04_09:
+    if (
+        model == Llm.GPT_4_VISION
+        or model == Llm.GPT_4_TURBO_2024_04_09
+        or model == Llm.GPT_4O_2024_05_13
+    ):
        params["max_tokens"] = 4096

    stream = await client.chat.completions.create(**params)  # type: ignore
    full_response = ""
    async for chunk in stream:  # type: ignore
        assert isinstance(chunk, ChatCompletionChunk)
+        if chunk.choices and len(chunk.choices) > 0 and chunk.choices[0].delta and chunk.choices[0].delta.content:
            content = chunk.choices[0].delta.content or ""
            full_response += content
            await callback(content)
--- a/backend/routes/evals.py
+++ b/backend/routes/evals.py
@ -7,10 +7,13 @@ from evals.config import EVALS_DIR

 router = APIRouter()

+# Update this if the number of outputs generated per input changes
+N = 1
+

 class Eval(BaseModel):
    input: str
-    output: str
+    outputs: list[str]


@router.get("/evals")
@ -25,21 +28,27 @@ async def get_evals():
            input_file_path = os.path.join(input_dir, file)
            input_file = await image_to_data_url(input_file_path)

-            # Construct the corresponding output file name
-            output_file_name = file.replace(".png", ".html")
-            output_file_path = os.path.join(output_dir, output_file_name)
+            # Construct the corresponding output file names
+            output_file_names = [
+                file.replace(".png", f"_{i}.html") for i in range(0, N)
+            ]  # Assuming 3 outputs for each input

+            output_files_data: list[str] = []
+            for output_file_name in output_file_names:
+                output_file_path = os.path.join(output_dir, output_file_name)
                # Check if the output file exists
                if os.path.exists(output_file_path):
                    with open(output_file_path, "r") as f:
-                    output_file_data = f.read()
+                        output_files_data.append(f.read())
                else:
-                output_file_data = "Output file not found."
+                    output_files_data.append(
+                        "<html><h1>Output file not found.</h1></html>"
+                    )

            evals.append(
                Eval(
                    input=input_file,
-                    output=output_file_data,
+                    outputs=output_files_data,
                )
            )

--- a/backend/routes/generate_code.py
+++ b/backend/routes/generate_code.py
@ -13,7 +13,7 @@ from llm import (
 )
 from openai.types.chat import ChatCompletionMessageParam
 from mock_llm import mock_completion
-from typing import Dict, List, cast, get_args
+from typing import Dict, List, Union, cast, get_args
 from image_generation import create_alt_url_mapping, generate_images
 from prompts import assemble_imported_code_prompt, assemble_prompt
 from datetime import datetime
@ -84,7 +84,7 @@ async def stream_code(websocket: WebSocket):

    # Read the model from the request. Fall back to default if not provided.
    code_generation_model_str = params.get(
-        "codeGenerationModel", Llm.GPT_4_VISION.value
+        "codeGenerationModel", Llm.GPT_4O_2024_05_13.value
    )
    try:
        code_generation_model = convert_frontend_str_to_llm(code_generation_model_str)
@ -111,6 +111,7 @@ async def stream_code(websocket: WebSocket):
    if not openai_api_key and (
        code_generation_model == Llm.GPT_4_VISION
        or code_generation_model == Llm.GPT_4_TURBO_2024_04_09
+        or code_generation_model == Llm.GPT_4O_2024_05_13
    ):
        print("OpenAI API key not found")
        await throw_error(
@ -118,8 +119,19 @@ async def stream_code(websocket: WebSocket):
        )
        return

+    # Get the Anthropic API key from the request. Fall back to environment variable if not provided.
+    # If neither is provided, we throw an error later only if Claude is used.
+    anthropic_api_key = None
+    if "anthropicApiKey" in params and params["anthropicApiKey"]:
+        anthropic_api_key = params["anthropicApiKey"]
+        print("Using Anthropic API key from client-side settings dialog")
+    else:
+        anthropic_api_key = ANTHROPIC_API_KEY
+        if anthropic_api_key:
+            print("Using Anthropic API key from environment variable")
+
    # Get the OpenAI Base URL from the request. Fall back to environment variable if not provided.
-    openai_base_url = None
+    openai_base_url: Union[str, None] = None
    # Disable user-specified OpenAI Base URL in prod
    if not os.environ.get("IS_PROD"):
        if "openAiBaseURL" in params and params["openAiBaseURL"]:
@ -217,17 +229,17 @@ async def stream_code(websocket: WebSocket):
    else:
        try:
            if validated_input_mode == "video":
-                if not ANTHROPIC_API_KEY and not AWS_ACCESS_KEY and not AWS_SECRET_ACCESS_KEY:
+              if not anthropic_api_key and not AWS_ACCESS_KEY and not AWS_SECRET_ACCESS_KEY:
                      await throw_error(
-                        "Video only works with Anthropic models. Neither Anthropic API key or AWS Access Key found. Please add the environment variable ANTHROPIC_API_KEY or AWS_ACCESS_KEY/AWS_SECRET_ACCESS_KEY to backend/.env"
+                          "Video only works with Anthropic models. Neither Anthropic API key or AWS Access Key found. Please add the environment variable ANTHROPIC_API_KEY or AWS_ACCESS_KEY/AWS_SECRET_ACCESS_KEY to backend/.env or in the settings dialog"
                      )
                      raise Exception("No Anthropic key")

-                if ANTHROPIC_API_KEY:
+                  if anthropic_api_key:
                      completion = await stream_claude_response_native(
                          system_prompt=VIDEO_PROMPT,
                          messages=prompt_messages,  # type: ignore
-                        api_key=ANTHROPIC_API_KEY,
+                          api_key=anthropic_api_key,
                          callback=lambda x: process_chunk(x),
                          model=Llm.CLAUDE_3_OPUS,
                          include_thinking=True,
@ -245,15 +257,15 @@ async def stream_code(websocket: WebSocket):
                      )
                  exact_llm_version = Llm.CLAUDE_3_OPUS
              elif code_generation_model == Llm.CLAUDE_3_SONNET:
-                if not ANTHROPIC_API_KEY and not AWS_ACCESS_KEY and not AWS_SECRET_ACCESS_KEY:
+                  if not anthropic_api_key and not AWS_ACCESS_KEY and not AWS_SECRET_ACCESS_KEY:
                      await throw_error(
-                        "No Anthropic API key or AWS Access Key found. Please add the environment variable ANTHROPIC_API_KEY or AWS_ACCESS_KEY/AWS_SECRET_ACCESS_KEY to backend/.env"
+                          "No Anthropic API key or AWS Access Key found. Please add the environment variable ANTHROPIC_API_KEY or AWS_ACCESS_KEY/AWS_SECRET_ACCESS_KEY to backend/.env or in the settings dialog"
                      )
                      raise Exception("No Anthropic key")
-                if ANTHROPIC_API_KEY:
+                  if anthropic_api_key:
                      completion = await stream_claude_response(
                          prompt_messages,  # type: ignore
-                        api_key=ANTHROPIC_API_KEY,
+                          api_key=anthropic_api_key,
                          callback=lambda x: process_chunk(x),
                      )
                  else:
--- a/backend/run_evals.py
+++ b/backend/run_evals.py
@ -13,8 +13,9 @@ from evals.config import EVALS_DIR
 from evals.core import generate_code_core
 from evals.utils import image_to_data_url

-STACK = "html_tailwind"
-MODEL = Llm.CLAUDE_3_SONNET
+STACK = "ionic_tailwind"
+MODEL = Llm.GPT_4O_2024_05_13
+N = 1  # Number of outputs to generate


 async def main():
@ -28,6 +29,7 @@ async def main():
    for filename in evals:
        filepath = os.path.join(INPUT_DIR, filename)
        data_url = await image_to_data_url(filepath)
+        for _ in range(N):  # Generate N tasks for each input
            task = generate_code_core(image_url=data_url, stack=STACK, model=MODEL)
            tasks.append(task)

@ -35,9 +37,13 @@ async def main():

    os.makedirs(OUTPUT_DIR, exist_ok=True)

-    for filename, content in zip(evals, results):
-        # File name is derived from the original filename in evals
-        output_filename = f"{os.path.splitext(filename)[0]}.html"
+    for i, content in enumerate(results):
+        # Calculate index for filename and output number
+        eval_index = i // N
+        output_number = i % N
+        filename = evals[eval_index]
+        # File name is derived from the original filename in evals with an added output number
+        output_filename = f"{os.path.splitext(filename)[0]}_{output_number}.html"
        output_filepath = os.path.join(OUTPUT_DIR, output_filename)
        with open(output_filepath, "w") as file:
            file.write(content)
--- a/backend/test_llm.py
+++ b/backend/test_llm.py
@ -24,6 +24,11 @@ class TestConvertFrontendStrToLlm(unittest.TestCase):
            Llm.GPT_4_TURBO_2024_04_09,
            "Should convert 'gpt-4-turbo-2024-04-09' to Llm.GPT_4_TURBO_2024_04_09",
        )
+        self.assertEqual(
+            convert_frontend_str_to_llm("gpt-4o-2024-05-13"),
+            Llm.GPT_4O_2024_05_13,
+            "Should convert 'gpt-4o-2024-05-13' to Llm.GPT_4O_2024_05_13",
+        )

    def test_convert_invalid_string_raises_exception(self):
        with self.assertRaises(ValueError):
--- a/frontend/.gitignore
+++ b/frontend/.gitignore
@ -25,3 +25,6 @@ dist-ssr

 # Env files
 .env*
+
+# Test files
+src/tests/results/
--- a/frontend/Dockerfile
+++ b/frontend/Dockerfile
@ -1,4 +1,4 @@
-FROM node:20.9-bullseye-slim
+FROM node:22-bullseye-slim

 # Set the working directory in the container
 WORKDIR /app
@ -6,6 +6,9 @@ WORKDIR /app
 # Copy package.json and yarn.lock
 COPY package.json yarn.lock /app/

+# Set the environment variable to skip Puppeteer download
+ENV PUPPETEER_SKIP_DOWNLOAD=true
+
 # Install dependencies
 RUN yarn install

--- a/frontend/jest.config.js
+++ b/frontend/jest.config.js
@ -0,0 +1,9 @@
+export default {
+  preset: "ts-jest",
+  testEnvironment: "node",
+  setupFiles: ["<rootDir>/src/setupTests.ts"],
+  transform: {
+    "^.+\\.tsx?$": "ts-jest",
+  },
+  testTimeout: 30000,
+};
--- a/frontend/package.json
+++ b/frontend/package.json
@ -10,7 +10,7 @@
    "build-hosted": "tsc && vite build --mode prod",
    "lint": "eslint . --ext ts,tsx --report-unused-disable-directives --max-warnings 0",
    "preview": "vite preview",
-    "test": "vitest"
+    "test": "jest"
  },
  "dependencies": {
    "@codemirror/lang-html": "^6.4.6",
@ -46,21 +46,28 @@
    "tailwindcss-animate": "^1.0.7",
    "thememirror": "^2.0.1",
    "vite-plugin-checker": "^0.6.2",
-    "webm-duration-fix": "^1.0.4"
+    "webm-duration-fix": "^1.0.4",
+    "zustand": "^4.5.2"
  },
  "devDependencies": {
+    "@types/jest": "^29.5.12",
    "@types/node": "^20.9.0",
+    "@types/puppeteer": "^7.0.4",
    "@types/react": "^18.2.15",
    "@types/react-dom": "^18.2.7",
    "@typescript-eslint/eslint-plugin": "^6.0.0",
    "@typescript-eslint/parser": "^6.0.0",
    "@vitejs/plugin-react": "^4.0.3",
    "autoprefixer": "^10.4.16",
+    "dotenv": "^16.4.5",
    "eslint": "^8.45.0",
    "eslint-plugin-react-hooks": "^4.6.0",
    "eslint-plugin-react-refresh": "^0.4.3",
+    "jest": "^29.7.0",
    "postcss": "^8.4.31",
+    "puppeteer": "^22.6.4",
    "tailwindcss": "^3.3.5",
+    "ts-jest": "^29.1.2",
    "typescript": "^5.0.2",
    "vite": "^4.4.5",
    "vite-plugin-html": "^3.2.0",
--- a/frontend/src/.env.jest.example
+++ b/frontend/src/.env.jest.example
@ -0,0 +1,2 @@
+TEST_SCREENSHOTONE_API_KEY=
+TEST_ROOT_PATH=
--- a/frontend/src/App.tsx
+++ b/frontend/src/App.tsx
@ -40,6 +40,8 @@ import ModelSettingsSection from "./components/ModelSettingsSection";
 import { extractHtml } from "./components/preview/extractHtml";
 import useBrowserTabIndicator from "./hooks/useBrowserTabIndicator";
 import TipLink from "./components/core/TipLink";
+import SelectAndEditModeToggleButton from "./components/select-and-edit/SelectAndEditModeToggleButton";
+import { useAppStore } from "./store/app-store";

 const IS_OPENAI_DOWN = false;

@ -54,16 +56,19 @@ function App() {
  const [updateInstruction, setUpdateInstruction] = useState("");
  const [isImportedFromCode, setIsImportedFromCode] = useState<boolean>(false);

+  const { disableInSelectAndEditMode } = useAppStore();
+
  // Settings
  const [settings, setSettings] = usePersistedState<Settings>(
    {
      openAiApiKey: null,
      openAiBaseURL: null,
+      anthropicApiKey: null,
      screenshotOneApiKey: null,
      isImageGenerationEnabled: true,
      editorTheme: EditorTheme.COBALT,
      generatedCodeConfig: Stack.HTML_TAILWIND,
-      codeGenerationModel: CodeGenerationModel.GPT_4_TURBO_2024_04_09,
+      codeGenerationModel: CodeGenerationModel.GPT_4O_2024_05_13,
      // Only relevant for hosted version
      isTermOfServiceAccepted: false,
    },
@ -89,6 +94,14 @@ function App() {
      CodeGenerationModel.GPT_4_TURBO_2024_04_09 &&
    settings.generatedCodeConfig === Stack.REACT_TAILWIND;

+  const showGpt4OMessage =
+    selectedCodeGenerationModel !== CodeGenerationModel.GPT_4O_2024_05_13 &&
+    appState === AppState.INITIAL;
+
+  const showSelectAndEditFeature =
+    selectedCodeGenerationModel === CodeGenerationModel.GPT_4O_2024_05_13 &&
+    settings.generatedCodeConfig === Stack.HTML_TAILWIND;
+
  // Indicate coding state using the browser tab's favicon and title
  useBrowserTabIndicator(appState === AppState.CODING);

@ -144,6 +157,7 @@ function App() {
    setAppHistory([]);
    setCurrentVersion(null);
    setShouldIncludeResultImage(false);
+    disableInSelectAndEditMode();
  };

  const regenerate = () => {
@ -232,7 +246,9 @@ function App() {
                parentIndex: parentVersion,
                code,
                inputs: {
-                  prompt: updateInstruction,
+                  prompt: params.history
+                    ? params.history[params.history.length - 1]
+                    : updateInstruction,
                },
              },
            ];
@ -274,7 +290,10 @@ function App() {
  }

  // Subsequent updates
-  async function doUpdate() {
+  async function doUpdate(
+    updateInstruction: string,
+    selectedElement?: HTMLElement
+  ) {
    if (currentVersion === null) {
      toast.error(
        "No current version set. Contact support or open a Github issue."
@ -292,7 +311,17 @@ function App() {
      return;
    }

-    const updatedHistory = [...historyTree, updateInstruction];
+    let modifiedUpdateInstruction = updateInstruction;
+
+    // Send in a reference to the selected element if it exists
+    if (selectedElement) {
+      modifiedUpdateInstruction =
+        updateInstruction +
+        " referring to this element specifically: " +
+        selectedElement.outerHTML;
+    }
+
+    const updatedHistory = [...historyTree, modifiedUpdateInstruction];

    if (shouldIncludeResultImage) {
      const resultImage = await takeScreenshot();
@ -403,6 +432,15 @@ function App() {
            </div>
          )}

+          {showGpt4OMessage && (
+            <div className="rounded-lg p-2 bg-fuchsia-200">
+              <p className="text-gray-800 text-sm">
+                Now supporting GPT-4o. Higher quality and 2x faster. Give it a
+                try!
+              </p>
+            </div>
+          )}
+
          {appState !== AppState.CODE_READY && <TipLink />}

          {IS_RUNNING_ON_CLOUD && !settings.openAiApiKey && <OnboardingNote />}
@ -468,8 +506,8 @@ function App() {
                      />
                    </div>
                    <Button
-                      onClick={doUpdate}
-                      className="dark:text-white dark:bg-gray-700"
+                      onClick={() => doUpdate(updateInstruction)}
+                      className="dark:text-white dark:bg-gray-700 update-btn"
                    >
                      Update
                    </Button>
@ -477,10 +515,13 @@ function App() {
                  <div className="flex items-center justify-end gap-x-2 mt-2">
                    <Button
                      onClick={regenerate}
-                      className="flex items-center gap-x-2 dark:text-white dark:bg-gray-700"
+                      className="flex items-center gap-x-2 dark:text-white dark:bg-gray-700 regenerate-btn"
                    >
                      🔄 Regenerate
                    </Button>
+                    {showSelectAndEditFeature && (
+                      <SelectAndEditModeToggleButton />
+                    )}
                  </div>
                  <div className="flex justify-end items-center mt-2">
                    <TipLink />
@ -586,7 +627,7 @@ function App() {
                      <Button
                        onClick={downloadCode}
                        variant="secondary"
-                        className="flex items-center gap-x-2 mr-4 dark:text-white dark:bg-gray-700"
+                        className="flex items-center gap-x-2 mr-4 dark:text-white dark:bg-gray-700 download-btn"
                      >
                        <FaDownload /> Download
                      </Button>
@ -609,10 +650,18 @@ function App() {
                </div>
              </div>
              <TabsContent value="desktop">
-                <Preview code={previewCode} device="desktop" />
+                <Preview
+                  code={previewCode}
+                  device="desktop"
+                  doUpdate={doUpdate}
+                />
              </TabsContent>
              <TabsContent value="mobile">
-                <Preview code={previewCode} device="mobile" />
+                <Preview
+                  code={previewCode}
+                  device="mobile"
+                  doUpdate={doUpdate}
+                />
              </TabsContent>
              <TabsContent value="code">
                <CodeTab
--- a/frontend/src/components/ImageUpload.tsx
+++ b/frontend/src/components/ImageUpload.tsx
@ -166,7 +166,7 @@ function ImageUpload({ setReferenceImages }: Props) {
      {screenRecorderState === ScreenRecorderState.INITIAL && (
        /* eslint-disable-next-line @typescript-eslint/no-explicit-any */
        <div {...getRootProps({ style: style as any })}>
-          <input {...getInputProps()} />
+          <input {...getInputProps()} className="file-input" />
          <p className="text-slate-700 text-lg">
            Drag & drop a screenshot here, <br />
            or click to upload
--- a/frontend/src/components/ImportCodeSection.tsx
+++ b/frontend/src/components/ImportCodeSection.tsx
@ -38,7 +38,9 @@ function ImportCodeSection({ importFromCode }: Props) {
  return (
    <Dialog>
      <DialogTrigger asChild>
-        <Button variant="secondary">Import from Code</Button>
+        <Button className="import-from-code-btn" variant="secondary">
+          Import from Code
+        </Button>
      </DialogTrigger>
      <DialogContent className="sm:max-w-[425px]">
        <DialogHeader>
@ -62,7 +64,7 @@ function ImportCodeSection({ importFromCode }: Props) {
        />

        <DialogFooter>
-          <Button type="submit" onClick={doImport}>
+          <Button className="import-btn" type="submit" onClick={doImport}>
            Import
          </Button>
        </DialogFooter>
--- a/frontend/src/components/Preview.tsx
+++ b/frontend/src/components/Preview.tsx
@ -1,21 +1,35 @@
-import { useEffect, useRef } from "react";
+import { useEffect, useRef, useState } from "react";
 import classNames from "classnames";
 import useThrottle from "../hooks/useThrottle";
+import EditPopup from "./select-and-edit/EditPopup";

 interface Props {
  code: string;
  device: "mobile" | "desktop";
+  doUpdate: (updateInstruction: string, selectedElement?: HTMLElement) => void;
 }

-function Preview({ code, device }: Props) {
+function Preview({ code, device, doUpdate }: Props) {
  const iframeRef = useRef<HTMLIFrameElement | null>(null);

  // Don't update code more often than every 200ms.
  const throttledCode = useThrottle(code, 200);

+  // Select and edit functionality
+  const [clickEvent, setClickEvent] = useState<MouseEvent | null>(null);
+
  useEffect(() => {
-    if (iframeRef.current) {
-      iframeRef.current.srcdoc = throttledCode;
+    const iframe = iframeRef.current;
+    if (iframe) {
+      iframe.srcdoc = throttledCode;
+
+      // Set up click handler for select and edit funtionality
+      iframe.addEventListener("load", function () {
+        iframe.contentWindow?.document.body.addEventListener(
+          "click",
+          setClickEvent
+        );
+      });
    }
  }, [throttledCode]);

@ -34,6 +48,7 @@ function Preview({ code, device }: Props) {
          }
        )}
      ></iframe>
+      <EditPopup event={clickEvent} iframeRef={iframeRef} doUpdate={doUpdate} />
    </div>
  );
 }
--- a/frontend/src/components/SettingsDialog.tsx
+++ b/frontend/src/components/SettingsDialog.tsx
@ -49,7 +49,7 @@ function SettingsDialog({ settings, setSettings }: Props) {
        <div className="flex items-center space-x-2">
          <Label htmlFor="image-generation">
            <div>DALL-E Placeholder Image Generation</div>
-            <div className="font-light mt-2">
+            <div className="font-light mt-2 text-xs">
              More fun with it but if you want to save money, turn it off.
            </div>
          </Label>
@ -64,10 +64,11 @@ function SettingsDialog({ settings, setSettings }: Props) {
            }
          />
        </div>
-        <div className="flex flex-col space-y-4">
+        <div className="flex flex-col space-y-6">
+          <div>
            <Label htmlFor="openai-api-key">
              <div>OpenAI API key</div>
-            <div className="font-light mt-2 leading-relaxed">
+              <div className="font-light mt-1 mb-2 text-xs leading-relaxed">
                Only stored in your browser. Never stored on servers. Overrides
                your .env config.
              </div>
@ -84,9 +85,10 @@ function SettingsDialog({ settings, setSettings }: Props) {
                }))
              }
            />
+          </div>

          {!IS_RUNNING_ON_CLOUD && (
-            <>
+            <div>
              <Label htmlFor="openai-api-key">
                <div>OpenAI Base URL (optional)</div>
                <div className="font-light mt-2 leading-relaxed">
@ -105,9 +107,31 @@ function SettingsDialog({ settings, setSettings }: Props) {
                  }))
                }
              />
-            </>
+            </div>
          )}

+          <div>
+            <Label htmlFor="anthropic-api-key">
+              <div>Anthropic API key</div>
+              <div className="font-light mt-1 text-xs leading-relaxed">
+                Only stored in your browser. Never stored on servers. Overrides
+                your .env config.
+              </div>
+            </Label>
+
+            <Input
+              id="anthropic-api-key"
+              placeholder="Anthropic API key"
+              value={settings.anthropicApiKey || ""}
+              onChange={(e) =>
+                setSettings((s) => ({
+                  ...s,
+                  anthropicApiKey: e.target.value,
+                }))
+              }
+            />
+          </div>
+
          <Accordion type="single" collapsible className="w-full">
            <AccordionItem value="item-1">
              <AccordionTrigger>Screenshot by URL Config</AccordionTrigger>
--- a/frontend/src/components/UrlInputSection.tsx
+++ b/frontend/src/components/UrlInputSection.tsx
@ -69,7 +69,7 @@ export function UrlInputSection({ doCreate, screenshotOneApiKey }: Props) {
      <Button
        onClick={takeScreenshot}
        disabled={isLoading}
-        className="bg-slate-400"
+        className="bg-slate-400 capture-btn"
      >
        {isLoading ? "Capturing..." : "Capture"}
      </Button>
--- a/frontend/src/components/evals/EvalsPage.tsx
+++ b/frontend/src/components/evals/EvalsPage.tsx
@ -4,7 +4,7 @@ import RatingPicker from "./RatingPicker";

 interface Eval {
  input: string;
-  output: string;
+  outputs: string[];
 }

 function EvalsPage() {
@ -38,18 +38,22 @@ function EvalsPage() {
      <div className="flex flex-col gap-y-4 mt-4 mx-auto justify-center">
        {evals.map((e, index) => (
          <div className="flex flex-col justify-center" key={index}>
-            <div className="flex gap-x-2 justify-center">
+            <h2 className="font-bold text-lg ml-4">{index}</h2>
+            <div className="flex gap-x-2 justify-center ml-4">
+              {/* Update w if N changes to a fixed number like w-[600px] */}
              <div className="w-1/2 p-1 border">
-                <img src={e.input} />
+                <img src={e.input} alt={`Input for eval ${index}`} />
              </div>
-              <div className="w-1/2 p-1 border">
+              {e.outputs.map((output, outputIndex) => (
+                <div className="w-1/2 p-1 border" key={outputIndex}>
                  {/* Put output into an iframe */}
                  <iframe
-                  srcDoc={e.output}
+                    srcDoc={output}
                    className="w-[1200px] h-[800px] transform scale-[0.60]"
                    style={{ transformOrigin: "top left" }}
                  ></iframe>
                </div>
+              ))}
            </div>
            <div className="ml-8 mt-4 flex justify-center">
              <RatingPicker
--- a/frontend/src/components/history/utils.test.ts
+++ b/frontend/src/components/history/utils.test.ts
@ -1,4 +1,3 @@
-import { expect, test } from "vitest";
 import { extractHistoryTree, renderHistory } from "./utils";
 import type { History } from "./history_types";

@ -84,7 +83,8 @@ const basicBadHistory: History = [
  },
 ];

-test("should correctly extract the history tree", () => {
+describe("History Utils", () => {
+  test("should correctly extract the history tree", () => {
    expect(extractHistoryTree(basicLinearHistory, 2)).toEqual([
      "<html>1. create</html>",
      "use better icons",
@ -132,9 +132,9 @@ test("should correctly extract the history tree", () => {

    // Bad tree
    expect(() => extractHistoryTree(basicBadHistory, 1)).toThrow();
-});
+  });

-test("should correctly render the history tree", () => {
+  test("should correctly render the history tree", () => {
    expect(renderHistory(basicLinearHistory, 2)).toEqual([
      {
        isActive: false,
@ -227,4 +227,5 @@ test("should correctly render the history tree", () => {
        type: "Edit",
      },
    ]);
+  });
 });
--- a/frontend/src/components/select-and-edit/EditPopup.tsx
+++ b/frontend/src/components/select-and-edit/EditPopup.tsx
@ -0,0 +1,143 @@
+import React, { useEffect, useRef, useState } from "react";
+import { Textarea } from "../ui/textarea";
+import { Button } from "../ui/button";
+import { addHighlight, getAdjustedCoordinates, removeHighlight } from "./utils";
+import { useAppStore } from "../../store/app-store";
+
+interface EditPopupProps {
+  event: MouseEvent | null;
+  iframeRef: React.RefObject<HTMLIFrameElement>;
+  doUpdate: (updateInstruction: string, selectedElement?: HTMLElement) => void;
+}
+
+const EditPopup: React.FC<EditPopupProps> = ({
+  event,
+  iframeRef,
+  doUpdate,
+}) => {
+  // App state
+  const { inSelectAndEditMode } = useAppStore();
+
+  // Create a wrapper ref to store inSelectAndEditMode so the value is not stale
+  // in a event listener
+  const inSelectAndEditModeRef = useRef(inSelectAndEditMode);
+
+  // Update the ref whenever the state changes
+  useEffect(() => {
+    inSelectAndEditModeRef.current = inSelectAndEditMode;
+  }, [inSelectAndEditMode]);
+
+  // Popup state
+  const [popupVisible, setPopupVisible] = useState(false);
+  const [popupPosition, setPopupPosition] = useState({ x: 0, y: 0 });
+
+  // Edit state
+  const [selectedElement, setSelectedElement] = useState<
+    HTMLElement | undefined
+  >(undefined);
+  const [updateText, setUpdateText] = useState("");
+
+  // Textarea ref for focusing
+  const textareaRef = useRef<HTMLTextAreaElement | null>(null);
+
+  function onUpdate(updateText: string) {
+    // Perform the update
+    doUpdate(
+      updateText,
+      selectedElement ? removeHighlight(selectedElement) : selectedElement
+    );
+
+    // Unselect the element
+    setSelectedElement(undefined);
+
+    // Hide the popup
+    setPopupVisible(false);
+  }
+
+  // Remove highlight and reset state when not in select and edit mode
+  useEffect(() => {
+    if (!inSelectAndEditMode) {
+      if (selectedElement) removeHighlight(selectedElement);
+      setSelectedElement(undefined);
+      setPopupVisible(false);
+    }
+  }, [inSelectAndEditMode, selectedElement]);
+
+  // Handle the click event
+  useEffect(() => {
+    // Return if not in select and edit mode
+    if (!inSelectAndEditModeRef.current || !event) {
+      return;
+    }
+
+    // Prevent default to avoid issues like label clicks triggering textareas, etc.
+    event.preventDefault();
+
+    const targetElement = event.target as HTMLElement;
+
+    // Return if no target element
+    if (!targetElement) return;
+
+    // Highlight and set the selected element
+    setSelectedElement((prev) => {
+      // Remove style from previous element
+      if (prev) {
+        removeHighlight(prev);
+      }
+      return addHighlight(targetElement);
+    });
+
+    // Calculate adjusted coordinates
+    const adjustedCoordinates = getAdjustedCoordinates(
+      event.clientX,
+      event.clientY,
+      iframeRef.current?.getBoundingClientRect()
+    );
+
+    // Show the popup at the click position
+    setPopupVisible(true);
+    setPopupPosition({ x: adjustedCoordinates.x, y: adjustedCoordinates.y });
+
+    // Reset the update text
+    setUpdateText("");
+
+    // Focus the textarea
+    textareaRef.current?.focus();
+  }, [event, iframeRef]);
+
+  // Focus the textarea when the popup is visible (we can't do this only when handling the click event
+  // because the textarea is not rendered yet)
+  // We need to also do it in the click event because popupVisible doesn't change values in that event
+  useEffect(() => {
+    if (popupVisible) {
+      textareaRef.current?.focus();
+    }
+  }, [popupVisible]);
+
+  if (!popupVisible) return;
+
+  return (
+    <div
+      className="absolute bg-white p-4 border border-gray-300 rounded shadow-lg w-60"
+      style={{ top: popupPosition.y, left: popupPosition.x }}
+    >
+      <Textarea
+        ref={textareaRef}
+        value={updateText}
+        onChange={(e) => setUpdateText(e.target.value)}
+        placeholder="Tell the AI what to change about this element..."
+        onKeyDown={(e) => {
+          if (e.key === "Enter") {
+            e.preventDefault();
+            onUpdate(updateText);
+          }
+        }}
+      />
+      <div className="flex justify-end mt-2">
+        <Button onClick={() => onUpdate(updateText)}>Update</Button>
+      </div>
+    </div>
+  );
+};
+
+export default EditPopup;
--- a/frontend/src/components/select-and-edit/SelectAndEditModeToggleButton.tsx
+++ b/frontend/src/components/select-and-edit/SelectAndEditModeToggleButton.tsx
@ -0,0 +1,22 @@
+import { GiClick } from "react-icons/gi";
+import { useAppStore } from "../../store/app-store";
+import { Button } from "../ui/button";
+
+function SelectAndEditModeToggleButton() {
+  const { inSelectAndEditMode, toggleInSelectAndEditMode } = useAppStore();
+
+  return (
+    <Button
+      onClick={toggleInSelectAndEditMode}
+      className="flex items-center gap-x-2 dark:text-white dark:bg-gray-700 regenerate-btn"
+      variant={inSelectAndEditMode ? "destructive" : "default"}
+    >
+      <GiClick className="text-lg" />
+      <span>
+        {inSelectAndEditMode ? "Exit selection mode" : "Select and update"}
+      </span>
+    </Button>
+  );
+}
+
+export default SelectAndEditModeToggleButton;
--- a/frontend/src/components/select-and-edit/utils.ts
+++ b/frontend/src/components/select-and-edit/utils.ts
@ -0,0 +1,22 @@
+export function removeHighlight(element: HTMLElement) {
+  element.style.outline = "";
+  element.style.backgroundColor = "";
+  return element;
+}
+
+export function addHighlight(element: HTMLElement) {
+  element.style.outline = "2px dashed #1846db";
+  element.style.backgroundColor = "#bfcbf5";
+  return element;
+}
+
+export function getAdjustedCoordinates(
+  x: number,
+  y: number,
+  rect: DOMRect | undefined
+) {
+  const offsetX = rect ? rect.left : 0;
+  const offsetY = rect ? rect.top : 0;
+
+  return { x: x + offsetX, y: y + offsetY };
+}
--- a/frontend/src/lib/models.ts
+++ b/frontend/src/lib/models.ts
@ -1,5 +1,7 @@
 // Keep in sync with backend (llm.py)
+// Order here matches dropdown order
 export enum CodeGenerationModel {
+  GPT_4O_2024_05_13 = "gpt-4o-2024-05-13",
  GPT_4_TURBO_2024_04_09 = "gpt-4-turbo-2024-04-09",
  GPT_4_VISION = "gpt_4_vision",
  CLAUDE_3_SONNET = "claude_3_sonnet",
@ -9,6 +11,7 @@ export enum CodeGenerationModel {
 export const CODE_GENERATION_MODEL_DESCRIPTIONS: {
  [key in CodeGenerationModel]: { name: string; inBeta: boolean };
 } = {
+  "gpt-4o-2024-05-13": { name: "GPT-4o 🌟", inBeta: false },
  "gpt-4-turbo-2024-04-09": { name: "GPT-4 Turbo (Apr 2024)", inBeta: false },
  gpt_4_vision: { name: "GPT-4 Vision (Nov 2023)", inBeta: false },
  claude_3_sonnet: { name: "Claude 3 Sonnet", inBeta: false },
--- a/frontend/src/setupTests.ts
+++ b/frontend/src/setupTests.ts
@ -0,0 +1,3 @@
+// So jest test runner can read env vars from .env file
+import { config } from "dotenv";
+config({ path: ".env.jest" });
--- a/frontend/src/store/app-store.ts
+++ b/frontend/src/store/app-store.ts
@ -0,0 +1,15 @@
+import { create } from "zustand";
+
+// Store for app-wide state
+interface AppStore {
+  inSelectAndEditMode: boolean;
+  toggleInSelectAndEditMode: () => void;
+  disableInSelectAndEditMode: () => void;
+}
+
+export const useAppStore = create<AppStore>((set) => ({
+  inSelectAndEditMode: false,
+  toggleInSelectAndEditMode: () =>
+    set((state) => ({ inSelectAndEditMode: !state.inSelectAndEditMode })),
+  disableInSelectAndEditMode: () => set({ inSelectAndEditMode: false }),
+}));
--- a/frontend/src/tests/fixtures/simple_button.png
+++ b/frontend/src/tests/fixtures/simple_button.png
--- a/frontend/src/tests/fixtures/simple_ui_with_image.png
+++ b/frontend/src/tests/fixtures/simple_ui_with_image.png
--- a/frontend/src/tests/qa.test.ts
+++ b/frontend/src/tests/qa.test.ts
@ -0,0 +1,274 @@
+import puppeteer, { Browser, Page, ElementHandle } from "puppeteer";
+import { Stack } from "../lib/stacks";
+import { CodeGenerationModel } from "../lib/models";
+
+const TESTS_ROOT_PATH = process.env.TEST_ROOT_PATH;
+
+// Fixtures
+const FIXTURES_PATH = `${TESTS_ROOT_PATH}/fixtures`;
+const SIMPLE_SCREENSHOT = FIXTURES_PATH + "/simple_button.png";
+const SCREENSHOT_WITH_IMAGES = `${FIXTURES_PATH}/simple_ui_with_image.png`;
+
+// Results
+const RESULTS_DIR = `${TESTS_ROOT_PATH}/results`;
+
+describe("e2e tests", () => {
+  let browser: Browser;
+  let page: Page;
+
+  const DEBUG = false;
+  const IS_HEADLESS = true;
+
+  const stacks = Object.values(Stack).slice(0, DEBUG ? 1 : undefined);
+  const models = Object.values(CodeGenerationModel).slice(
+    0,
+    DEBUG ? 1 : undefined
+  );
+
+  beforeAll(async () => {
+    browser = await puppeteer.launch({ headless: IS_HEADLESS });
+    page = await browser.newPage();
+    await page.goto("http://localhost:5173/");
+
+    // Set screen size
+    await page.setViewport({ width: 1080, height: 1024 });
+
+    // TODO: Does this need to be moved?
+    // const client = await page.createCDPSession();
+    // Set download behavior path
+    // await client.send("Page.setDownloadBehavior", {
+    //   behavior: "allow",
+    //   downloadPath: DOWNLOAD_PATH,
+    // });
+  });
+
+  afterAll(async () => {
+    await browser.close();
+  });
+
+  // Create tests
+  models.forEach((model) => {
+    stacks.forEach((stack) => {
+      it(
+        `Create for : ${model} & ${stack}`,
+        async () => {
+          const app = new App(
+            page,
+            stack,
+            model,
+            `create_screenshot_${model}_${stack}`
+          );
+          await app.init();
+          // Generate from screenshot
+          await app.uploadImage(SCREENSHOT_WITH_IMAGES);
+        },
+        60 * 1000
+      );
+
+      it(
+        `Create from URL for : ${model} & ${stack}`,
+        async () => {
+          const app = new App(
+            page,
+            stack,
+            model,
+            `create_url_${model}_${stack}`
+          );
+          await app.init();
+          // Generate from screenshot
+          await app.generateFromUrl("https://a.picoapps.xyz/design-fear");
+        },
+        60 * 1000
+      );
+    });
+  });
+
+  // Update tests - for every model (doesn’t need to be repeated for each stack - fix to HTML Tailwind only)
+  models.forEach((model) => {
+    ["html_tailwind"].forEach((stack) => {
+      it(
+        `update: ${model}`,
+        async () => {
+          const app = new App(page, stack, model, `update_${model}_${stack}`);
+          await app.init();
+
+          // Generate from screenshot
+          await app.uploadImage(SIMPLE_SCREENSHOT);
+          // Regenerate works for v1
+          await app.regenerate();
+          // Make an update
+          await app.edit("make the button background blue", "v2");
+          // Make another update
+          await app.edit("make the text italic", "v3");
+          // Branch off v2 and make an update
+          await app.clickVersion("v2");
+          await app.edit("make the text yellow", "v4");
+        },
+        90 * 1000
+      );
+    });
+  });
+
+  // Start from code tests - for every model
+  models.forEach((model) => {
+    ["html_tailwind"].forEach((stack) => {
+      it.skip(
+        `Start from code: ${model}`,
+        async () => {
+          const app = new App(
+            page,
+            stack,
+            model,
+            `start_from_code_${model}_${stack}`
+          );
+          await app.init();
+
+          await app.importFromCode();
+
+          // Regenerate works for v1
+          // await app.regenerate();
+          // // Make an update
+          // await app.edit("make the header blue", "v2");
+          // // Make another update
+          // await app.edit("make all text italic", "v3");
+          // // Branch off v2 and make an update
+          // await app.clickVersion("v2");
+          // await app.edit("make all text red", "v4");
+        },
+        90 * 1000
+      );
+    });
+  });
+});
+
+class App {
+  private screenshotPathPrefix: string;
+  private page: Page;
+  private stack: string;
+  private model: string;
+
+  constructor(page: Page, stack: string, model: string, testId: string) {
+    this.page = page;
+    this.stack = stack;
+    this.model = model;
+    this.screenshotPathPrefix = `${RESULTS_DIR}/${testId}`;
+  }
+
+  async init() {
+    await this.setupLocalStorage();
+  }
+
+  async setupLocalStorage() {
+    const setting = {
+      openAiApiKey: null,
+      openAiBaseURL: null,
+      screenshotOneApiKey: process.env.TEST_SCREENSHOTONE_API_KEY,
+      isImageGenerationEnabled: true,
+      editorTheme: "cobalt",
+      generatedCodeConfig: this.stack,
+      codeGenerationModel: this.model,
+      isTermOfServiceAccepted: false,
+      accessCode: null,
+    };
+
+    await this.page.evaluate((setting) => {
+      localStorage.setItem("setting", JSON.stringify(setting));
+    }, setting);
+
+    // Reload the page to apply the local storage
+    await this.page.reload();
+  }
+
+  async _screenshot(step: string) {
+    await this.page.screenshot({
+      path: `${this.screenshotPathPrefix}_${step}.png`,
+    });
+  }
+
+  async _waitUntilVersionIsReady(version: string) {
+    await this.page.waitForNetworkIdle();
+    await this.page.waitForFunction(
+      (version) => document.body.innerText.includes(version),
+      {
+        timeout: 30000,
+      },
+      version
+    );
+    // Wait for 3s so that the HTML and JS has time to render before screenshotting
+    await new Promise((resolve) => setTimeout(resolve, 3000));
+  }
+
+  async generateFromUrl(url: string) {
+    // Type in the URL
+    await this.page.type('input[placeholder="Enter URL"]', url);
+    await this._screenshot("typed_url");
+
+    // Click the capture button and wait for the code to be generated
+    await this.page.click("button.capture-btn");
+    await this._waitUntilVersionIsReady("v1");
+    await this._screenshot("url_result");
+  }
+
+  // Uploads a screenshot and generates the image
+  async uploadImage(screenshotPath: string) {
+    // Upload file
+    const fileInput = (await this.page.$(
+      ".file-input"
+    )) as ElementHandle<HTMLInputElement>;
+    if (!fileInput) {
+      throw new Error("File input element not found");
+    }
+    await fileInput.uploadFile(screenshotPath);
+    await this._screenshot("image_uploaded");
+
+    // Click the generate button and wait for the code to be generated
+    await this._waitUntilVersionIsReady("v1");
+    await this._screenshot("image_results");
+  }
+
+  // Makes a text edit and waits for a new version
+  async edit(edit: string, version: string) {
+    // Type in the edit
+    await this.page.type(
+      'textarea[placeholder="Tell the AI what to change..."]',
+      edit
+    );
+    await this._screenshot(`typed_${version}`);
+
+    // Click the update button and wait for the code to be generated
+    await this.page.click(".update-btn");
+    await this._waitUntilVersionIsReady(version);
+    await this._screenshot(`done_${version}`);
+  }
+
+  async clickVersion(version: string) {
+    await this.page.evaluate((version) => {
+      document.querySelectorAll("div").forEach((div) => {
+        if (div.innerText.includes(version)) {
+          div.click();
+        }
+      });
+    }, version);
+  }
+
+  async regenerate() {
+    await this.page.click(".regenerate-btn");
+    await this._waitUntilVersionIsReady("v1");
+    await this._screenshot("regenerate_results");
+  }
+
+  // Work in progress
+  async importFromCode() {
+    await this.page.click(".import-from-code-btn");
+
+    await this.page.type("textarea", "<html>hello world</html>");
+
+    await this.page.select("#output-settings-js", "HTML + Tailwind");
+
+    await this._screenshot("typed_code");
+
+    await this.page.click(".import-btn");
+
+    await this._waitUntilVersionIsReady("v1");
+  }
+}
--- a/frontend/src/types.ts
+++ b/frontend/src/types.ts
@ -16,6 +16,7 @@ export interface Settings {
  codeGenerationModel: CodeGenerationModel;
  // Only relevant for hosted version
  isTermOfServiceAccepted: boolean;
+  anthropicApiKey: string | null; // Added property for anthropic API key
 }

 export enum AppState {
--- a/frontend/yarn.lock
+++ b/frontend/yarn.lock