Merge branch 'main' into dev

2024-06-25 18:25:40 +08:00 · 2024-06-25 18:25:40 +08:00 · e9c6756c2e
commit e9c6756c2e
parent 1f65c29c4d 6fe7098201
31 changed files with 2964 additions and 252 deletions
--- a/.github/FUNDING.yml
+++ b/.github/FUNDING.yml
@ -0,0 +1 @@
 github: [abi]
--- a/README.md
+++ b/README.md
@ -1,6 +1,6 @@
 # screenshot-to-code
-A simple tool to convert screenshots, mockups and Figma designs into clean, functional code using AI. **Now supporting GPT-4O!**
+A simple tool to convert screenshots, mockups and Figma designs into clean, functional code using AI. **Now supporting Claude Sonnet 3.5 and GPT-4O!**
 https://github.com/abi/screenshot-to-code/assets/23818/6cebadae-2fe3-4986-ac6a-8fb9db030045
@ -15,7 +15,8 @@ Supported stacks:
 Supported AI models:
- GPT-4O - Best model!
+- Claude Sonnet 3.5 - Best model!
 - GPT-4O - also recommended!
 - GPT-4 Turbo (Apr 2024)
 - GPT-4 Vision (Nov 2023)
 - Claude 3 Sonnet
@ -31,7 +32,11 @@ We also just added experimental support for taking a video/screen recording of a
 [Follow me on Twitter for updates](https://twitter.com/_abi_).
-## 🚀 Try It Out without no install
+## Sponsors
 <a href="https://konghq.com/products/kong-konnect/register?utm_medium=referral&utm_source=github&utm_campaign=platform&utm_content=screenshot-to-code" target="_blank" title="Kong - powering the API world"><img src="https://picoapps.xyz/s2c-sponsors/Kong-GitHub-240x100.png"></a>
 ## 🚀 Hosted Version
 [Try it live on the hosted version (paid)](https://screenshottocode.com).
@ -41,9 +46,9 @@ We also just added experimental support for taking a video/screen recording of a
 - 如果使用Bedrock Claude 3/3.5需要在运行机器上安装 https://aws.amazon.com/cn/cli/， 并配置aws iam 账号的ak sk，另外还需要开通该账号Bedrock Claude 3 访问的权限。
 - 如果使用Bedrock Claude 3/3.5，则无须配置OPENAI_API_KEY 或者 ANTHROPIC_API_KEY 到.env中
 The app has a React/Vite frontend and a FastAPI backend. You will need an OpenAI API key with access to the GPT-4 Vision API or an Anthropic key if you want to use Claude Sonnet, or for experimental video support.
 Run the backend (I use Poetry for package management - `pip install poetry` if you don't have it):
 ```bash
@ -55,7 +60,7 @@ poetry shell
 poetry run uvicorn main:app --reload --port 7001
 ```
-If you want to use Anthropic, add the `ANTHROPIC_API_KEY` to `backend/.env` with your API key from Anthropic.
+If you want to use Anthropic, add `ANTHROPIC_API_KEY` to `backend/.env`. You can also set up the keys using the settings dialog on the front-end (click the gear icon after loading the frontend).
 Run the frontend:
@ -114,5 +119,3 @@ https://github.com/abi/screenshot-to-code/assets/23818/3fec0f77-44e8-4fb3-a769-a
 ## 🌍 Hosted Version
 🆕 [Try it here (paid)](https://screenshottocode.com). Or see [Getting Started](#-getting-started) for local install instructions to use with your own API keys.
 [!["Buy Me A Coffee"](https://www.buymeacoffee.com/assets/img/custom_images/orange_img.png)](https://www.buymeacoffee.com/abiraja)
--- a/Troubleshooting.md
+++ b/Troubleshooting.md
@ -1,4 +1,4 @@
-### Getting an OpenAI API key with GPT4-Vision model access
+### Getting an OpenAI API key with GPT-4 model access
 You don't need a ChatGPT Pro account. Screenshot to code uses API keys from your OpenAI developer account. In order to get access to the GPT4 Vision model, log into your OpenAI account and then, follow these instructions:
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@ -1,4 +1,4 @@
-FROM python:3.12-slim-bullseye
+FROM python:3.12.3-slim-bullseye
 ENV POETRY_VERSION 1.4.1
--- a/backend/evals/core.py
+++ b/backend/evals/core.py
@ -15,7 +15,7 @@ async def generate_code_core(image_url: str, stack: Stack, model: Llm) -> str:
    async def process_chunk(content: str):
        pass
-    if model == Llm.CLAUDE_3_SONNET:
+    if model == Llm.CLAUDE_3_SONNET or model == Llm.CLAUDE_3_5_SONNET_2024_06_20:
        if not anthropic_api_key:
            raise Exception("Anthropic API key not found")
@ -23,6 +23,7 @@ async def generate_code_core(image_url: str, stack: Stack, model: Llm) -> str:
            prompt_messages,
            api_key=anthropic_api_key,
            callback=lambda x: process_chunk(x),
            model=model,
        )
    else:
        if not openai_api_key:
--- a/backend/image_generation.py
+++ b/backend/image_generation.py
@ -5,7 +5,7 @@ from openai import AsyncOpenAI
 from bs4 import BeautifulSoup
-async def process_tasks(prompts: List[str], api_key: str, base_url: str):
+async def process_tasks(prompts: List[str], api_key: str, base_url: str | None):
    tasks = [generate_image(prompt, api_key, base_url) for prompt in prompts]
    results = await asyncio.gather(*tasks, return_exceptions=True)
@ -15,22 +15,23 @@ async def process_tasks(prompts: List[str], api_key: str, base_url: str):
            print(f"An exception occurred: {result}")
            processed_results.append(None)
        else:
-            processed_results.append(result)  # type: ignore
+            processed_results.append(result)
    return processed_results
-async def generate_image(prompt: str, api_key: str, base_url: str):
+async def generate_image(
    prompt: str, api_key: str, base_url: str | None
 ) -> Union[str, None]:
    client = AsyncOpenAI(api_key=api_key, base_url=base_url)
-    image_params: Dict[str, Union[str, int]] = {
+    res = await client.images.generate(
-        "model": "dall-e-3",
+        model="dall-e-3",
-        "quality": "standard",
+        quality="standard",
-        "style": "natural",
+        style="natural",
-        "n": 1,
+        n=1,
-        "size": "1024x1024",
+        size="1024x1024",
-        "prompt": prompt,
+        prompt=prompt,
-    }
+    )
    res = await client.images.generate(**image_params)  # type: ignore
    await client.close()
    return res.data[0].url
@ -63,13 +64,13 @@ def create_alt_url_mapping(code: str) -> Dict[str, str]:
 async def generate_images(
    code: str, api_key: str, base_url: Union[str, None], image_cache: Dict[str, str]
-):
+) -> str:
    # Find all images
    soup = BeautifulSoup(code, "html.parser")
    images = soup.find_all("img")
    # Extract alt texts as image prompts
-    alts = []
+    alts: List[str | None] = []
    for img in images:
        # Only include URL if the image starts with https://placehold.co
        # and it's not already in the image_cache
@ -77,26 +78,26 @@ async def generate_images(
            img["src"].startswith("https://placehold.co")
            and image_cache.get(img.get("alt")) is None
        ):
-            alts.append(img.get("alt", None))  # type: ignore
+            alts.append(img.get("alt", None))
    # Exclude images with no alt text
-    alts = [alt for alt in alts if alt is not None]  # type: ignore
+    filtered_alts: List[str] = [alt for alt in alts if alt is not None]
    # Remove duplicates
-    prompts = list(set(alts))  # type: ignore
+    prompts = list(set(filtered_alts))
    # Return early if there are no images to replace
-    if len(prompts) == 0:  # type: ignore
+    if len(prompts) == 0:
        return code
    # Generate images
-    results = await process_tasks(prompts, api_key, base_url)  # type: ignore
+    results = await process_tasks(prompts, api_key, base_url)
    # Create a dict mapping alt text to image URL
-    mapped_image_urls = dict(zip(prompts, results))  # type: ignore
+    mapped_image_urls = dict(zip(prompts, results))
    # Merge with image_cache
-    mapped_image_urls = {**mapped_image_urls, **image_cache}  # type: ignore
+    mapped_image_urls = {**mapped_image_urls, **image_cache}
    # Replace old image URLs with the generated URLs
    for img in images:
--- a/backend/llm.py
+++ b/backend/llm.py
@ -36,6 +36,7 @@ class Llm(Enum):
    CLAUDE_3_5_SONNET = "claude-3-5-sonnet-20240620"
    CLAUDE_3_OPUS = "claude-3-opus-20240229"
    CLAUDE_3_HAIKU = "claude-3-haiku-20240307"
    CLAUDE_3_5_SONNET_2024_06_20 = "claude-3-5-sonnet-20240620"
 BEDROCK_LLM_MODELID_LIST = {Llm.CLAUDE_3_5_SONNET: 'anthropic.claude-3-sonnet-20240229-v1:0',
@ -83,6 +84,12 @@ async def stream_openai_response(
    full_response = ""
    async for chunk in stream:  # type: ignore
        assert isinstance(chunk, ChatCompletionChunk)
        if (
            chunk.choices
            and len(chunk.choices) > 0
            and chunk.choices[0].delta
            and chunk.choices[0].delta.content
        ):
            content = chunk.choices[0].delta.content or ""
            full_response += content
            await callback(content)
@ -96,6 +103,7 @@ async def stream_claude_response(
    messages: List[ChatCompletionMessageParam],
    api_key: str,
    callback: Callable[[str], Awaitable[None]],
    model: Llm,
 ) -> str:
    # client = AsyncAnthropic(api_key=api_key)
--- a/backend/routes/generate_code.py
+++ b/backend/routes/generate_code.py
@ -13,7 +13,7 @@ from llm import (
 )
 from openai.types.chat import ChatCompletionMessageParam
 from mock_llm import mock_completion
-from typing import Dict, List, cast, get_args
+from typing import Dict, List, Union, cast, get_args
 from image_generation import create_alt_url_mapping, generate_images
 from prompts import assemble_imported_code_prompt, assemble_prompt
 from datetime import datetime
@ -120,8 +120,19 @@ async def stream_code(websocket: WebSocket):
        )
        return
    # Get the Anthropic API key from the request. Fall back to environment variable if not provided.
    # If neither is provided, we throw an error later only if Claude is used.
    anthropic_api_key = None
    if "anthropicApiKey" in params and params["anthropicApiKey"]:
        anthropic_api_key = params["anthropicApiKey"]
        print("Using Anthropic API key from client-side settings dialog")
    else:
        anthropic_api_key = ANTHROPIC_API_KEY
        if anthropic_api_key:
            print("Using Anthropic API key from environment variable")
    # Get the OpenAI Base URL from the request. Fall back to environment variable if not provided.
-    openai_base_url = None
+    openai_base_url: Union[str, None] = None
    # Disable user-specified OpenAI Base URL in prod
    if not os.environ.get("IS_PROD"):
        if "openAiBaseURL" in params and params["openAiBaseURL"]:
@ -219,6 +230,7 @@ async def stream_code(websocket: WebSocket):
    else:
        try:
            if validated_input_mode == "video":
                # if not ANTHROPIC_API_KEY:
                #     await throw_error(
                #         "Video only works with Anthropic models. No Anthropic API key found. Please add the environment variable ANTHROPIC_API_KEY to backend/.env"
@ -228,24 +240,27 @@ async def stream_code(websocket: WebSocket):
                completion = await stream_claude_response_native(
                    system_prompt=VIDEO_PROMPT,
                    messages=prompt_messages,  # type: ignore
-                    api_key=ANTHROPIC_API_KEY,
+                    api_key=anthropic_api_key,
                    callback=lambda x: process_chunk(x),
-                    model=Llm.CLAUDE_3_SONNET,
+                    model=Llm.CLAUDE_3_5_SONNET,
                    include_thinking=True,
                )
-                # exact_llm_version = Llm.CLAUDE_3_OPUS
+                exact_llm_version = Llm.CLAUDE_3_5_SONNET
-                exact_llm_version = Llm.CLAUDE_3_SONNET
+            elif (
-            elif code_generation_model == Llm.CLAUDE_3_SONNET:
+                code_generation_model == Llm.CLAUDE_3_SONNET
-                # if not ANTHROPIC_API_KEY:
+                or code_generation_model == Llm.CLAUDE_3_5_SONNET_2024_06_20
            ):
 #                 if not anthropic_api_key:
 #                     await throw_error(
-                #         "No Anthropic API key found. Please add the environment variable ANTHROPIC_API_KEY to backend/.env"
+#                         "No Anthropic API key found. Please add the environment variable ANTHROPIC_API_KEY to backend/.env or in the settings dialog"
 #                     )
 #                     raise Exception("No Anthropic key")
                completion = await stream_claude_response(
                    prompt_messages,  # type: ignore
-                    api_key=ANTHROPIC_API_KEY,
+                    api_key=anthropic_api_key,
                    callback=lambda x: process_chunk(x),
                    model=code_generation_model,
                )
                exact_llm_version = code_generation_model
            else:
--- a/frontend/.gitignore
+++ b/frontend/.gitignore
@ -25,3 +25,6 @@ dist-ssr
 # Env files
 .env*
 # Test files
 src/tests/results/
--- a/frontend/Dockerfile
+++ b/frontend/Dockerfile
@ -1,4 +1,4 @@
-FROM node:20.9-bullseye-slim
+FROM node:22-bullseye-slim
 # Set the working directory in the container
 WORKDIR /app
@ -6,6 +6,9 @@ WORKDIR /app
 # Copy package.json and yarn.lock
 COPY package.json yarn.lock /app/
 # Set the environment variable to skip Puppeteer download
 ENV PUPPETEER_SKIP_DOWNLOAD=true
 # Install dependencies
 RUN yarn install
--- a/frontend/jest.config.js
+++ b/frontend/jest.config.js
@ -0,0 +1,9 @@
 export default {
  preset: "ts-jest",
  testEnvironment: "node",
  setupFiles: ["<rootDir>/src/setupTests.ts"],
  transform: {
    "^.+\\.tsx?$": "ts-jest",
  },
  testTimeout: 30000,
 };
--- a/frontend/package.json
+++ b/frontend/package.json
@ -10,7 +10,7 @@
    "build-hosted": "tsc && vite build --mode prod",
    "lint": "eslint . --ext ts,tsx --report-unused-disable-directives --max-warnings 0",
    "preview": "vite preview",
-    "test": "vitest"
+    "test": "jest"
  },
  "dependencies": {
    "@codemirror/lang-html": "^6.4.6",
@ -46,21 +46,28 @@
    "tailwindcss-animate": "^1.0.7",
    "thememirror": "^2.0.1",
    "vite-plugin-checker": "^0.6.2",
-    "webm-duration-fix": "^1.0.4"
+    "webm-duration-fix": "^1.0.4",
    "zustand": "^4.5.2"
  },
  "devDependencies": {
    "@types/jest": "^29.5.12",
    "@types/node": "^20.9.0",
    "@types/puppeteer": "^7.0.4",
    "@types/react": "^18.2.15",
    "@types/react-dom": "^18.2.7",
    "@typescript-eslint/eslint-plugin": "^6.0.0",
    "@typescript-eslint/parser": "^6.0.0",
    "@vitejs/plugin-react": "^4.0.3",
    "autoprefixer": "^10.4.16",
    "dotenv": "^16.4.5",
    "eslint": "^8.45.0",
    "eslint-plugin-react-hooks": "^4.6.0",
    "eslint-plugin-react-refresh": "^0.4.3",
    "jest": "^29.7.0",
    "postcss": "^8.4.31",
    "puppeteer": "^22.6.4",
    "tailwindcss": "^3.3.5",
    "ts-jest": "^29.1.2",
    "typescript": "^5.0.2",
    "vite": "^4.4.5",
    "vite-plugin-html": "^3.2.0",
--- a/frontend/src/.env.jest.example
+++ b/frontend/src/.env.jest.example
@ -0,0 +1,2 @@
 TEST_SCREENSHOTONE_API_KEY=
 TEST_ROOT_PATH=
--- a/frontend/src/App.tsx
+++ b/frontend/src/App.tsx
@ -40,6 +40,8 @@ import ModelSettingsSection from "./components/ModelSettingsSection";
 import { extractHtml } from "./components/preview/extractHtml";
 import useBrowserTabIndicator from "./hooks/useBrowserTabIndicator";
 import TipLink from "./components/core/TipLink";
 import SelectAndEditModeToggleButton from "./components/select-and-edit/SelectAndEditModeToggleButton";
 import { useAppStore } from "./store/app-store";
 const IS_OPENAI_DOWN = false;
@ -54,11 +56,14 @@ function App() {
  const [updateInstruction, setUpdateInstruction] = useState("");
  const [isImportedFromCode, setIsImportedFromCode] = useState<boolean>(false);
  const { disableInSelectAndEditMode } = useAppStore();
  // Settings
  const [settings, setSettings] = usePersistedState<Settings>(
    {
      openAiApiKey: null,
      openAiBaseURL: null,
      anthropicApiKey: null,
      screenshotOneApiKey: null,
      isImageGenerationEnabled: true,
      editorTheme: EditorTheme.COBALT,
@ -89,9 +94,15 @@ function App() {
      CodeGenerationModel.GPT_4_TURBO_2024_04_09 &&
    settings.generatedCodeConfig === Stack.REACT_TAILWIND;
-  // const showGpt4OMessage =
+  const showBetterModelMessage =
-  //   selectedCodeGenerationModel !== CodeGenerationModel.GPT_4O_2024_05_13 &&
+    selectedCodeGenerationModel !== CodeGenerationModel.GPT_4O_2024_05_13 &&
-  //   appState === AppState.INITIAL;
+    selectedCodeGenerationModel !==
      CodeGenerationModel.CLAUDE_3_5_SONNET_2024_06_20 &&
    appState === AppState.INITIAL;
  const showSelectAndEditFeature =
    selectedCodeGenerationModel === CodeGenerationModel.GPT_4O_2024_05_13 &&
    settings.generatedCodeConfig === Stack.HTML_TAILWIND;
  // Indicate coding state using the browser tab's favicon and title
  useBrowserTabIndicator(appState === AppState.CODING);
@ -148,6 +159,7 @@ function App() {
    setAppHistory([]);
    setCurrentVersion(null);
    setShouldIncludeResultImage(false);
    disableInSelectAndEditMode();
  };
  const regenerate = () => {
@ -236,7 +248,9 @@ function App() {
                parentIndex: parentVersion,
                code,
                inputs: {
-                  prompt: updateInstruction,
+                  prompt: params.history
                    ? params.history[params.history.length - 1]
                    : updateInstruction,
                },
              },
            ];
@ -278,7 +292,10 @@ function App() {
  }
  // Subsequent updates
-  async function doUpdate() {
+  async function doUpdate(
    updateInstruction: string,
    selectedElement?: HTMLElement
  ) {
    if (currentVersion === null) {
      toast.error(
        "No current version set. Contact support or open a Github issue."
@ -296,7 +313,17 @@ function App() {
      return;
    }
-    const updatedHistory = [...historyTree, updateInstruction];
+    let modifiedUpdateInstruction = updateInstruction;
    // Send in a reference to the selected element if it exists
    if (selectedElement) {
      modifiedUpdateInstruction =
        updateInstruction +
        " referring to this element specifically: " +
        selectedElement.outerHTML;
    }
    const updatedHistory = [...historyTree, modifiedUpdateInstruction];
    if (shouldIncludeResultImage) {
      const resultImage = await takeScreenshot();
@ -407,11 +434,11 @@ function App() {
            </div>
          )}
-          {/* {showGpt4OMessage && (
+          {showBetterModelMessage && (
            <div className="rounded-lg p-2 bg-fuchsia-200">
              <p className="text-gray-800 text-sm">
-                Now supporting GPT-4o. Higher quality and 2x faster. Give it a
+                Now supporting Claude Sonnet 3.5. Higher quality and
-                try!
+                2x faster. Give it a try!
              </p>
            </div>
          )} */}
@ -481,8 +508,8 @@ function App() {
                      />
                    </div>
                    <Button
-                      onClick={doUpdate}
+                      onClick={() => doUpdate(updateInstruction)}
-                      className="dark:text-white dark:bg-gray-700"
+                      className="dark:text-white dark:bg-gray-700 update-btn"
                    >
                      Update
                    </Button>
@ -490,10 +517,13 @@ function App() {
                  <div className="flex items-center justify-end gap-x-2 mt-2">
                    <Button
                      onClick={regenerate}
-                      className="flex items-center gap-x-2 dark:text-white dark:bg-gray-700"
+                      className="flex items-center gap-x-2 dark:text-white dark:bg-gray-700 regenerate-btn"
                    >
                      🔄 Regenerate
                    </Button>
                    {showSelectAndEditFeature && (
                      <SelectAndEditModeToggleButton />
                    )}
                  </div>
                  <div className="flex justify-end items-center mt-2">
                    <TipLink />
@ -599,7 +629,7 @@ function App() {
                      <Button
                        onClick={downloadCode}
                        variant="secondary"
-                        className="flex items-center gap-x-2 mr-4 dark:text-white dark:bg-gray-700"
+                        className="flex items-center gap-x-2 mr-4 dark:text-white dark:bg-gray-700 download-btn"
                      >
                        <FaDownload /> Download
                      </Button>
@ -622,10 +652,18 @@ function App() {
                </div>
              </div>
              <TabsContent value="desktop">
-                <Preview code={previewCode} device="desktop" />
+                <Preview
                  code={previewCode}
                  device="desktop"
                  doUpdate={doUpdate}
                />
              </TabsContent>
              <TabsContent value="mobile">
-                <Preview code={previewCode} device="mobile" />
+                <Preview
                  code={previewCode}
                  device="mobile"
                  doUpdate={doUpdate}
                />
              </TabsContent>
              <TabsContent value="code">
                <CodeTab
--- a/frontend/src/components/ImageUpload.tsx
+++ b/frontend/src/components/ImageUpload.tsx
@ -166,7 +166,7 @@ function ImageUpload({ setReferenceImages }: Props) {
      {screenRecorderState === ScreenRecorderState.INITIAL && (
        /* eslint-disable-next-line @typescript-eslint/no-explicit-any */
        <div {...getRootProps({ style: style as any })}>
-          <input {...getInputProps()} />
+          <input {...getInputProps()} className="file-input" />
          <p className="text-slate-700 text-lg">
            Drag & drop a screenshot here, <br />
            or click to upload
--- a/frontend/src/components/ImportCodeSection.tsx
+++ b/frontend/src/components/ImportCodeSection.tsx
@ -38,7 +38,9 @@ function ImportCodeSection({ importFromCode }: Props) {
  return (
    <Dialog>
      <DialogTrigger asChild>
-        <Button variant="secondary">Import from Code</Button>
+        <Button className="import-from-code-btn" variant="secondary">
          Import from Code
        </Button>
      </DialogTrigger>
      <DialogContent className="sm:max-w-[425px]">
        <DialogHeader>
@ -62,7 +64,7 @@ function ImportCodeSection({ importFromCode }: Props) {
        />
        <DialogFooter>
-          <Button type="submit" onClick={doImport}>
+          <Button className="import-btn" type="submit" onClick={doImport}>
            Import
          </Button>
        </DialogFooter>
--- a/frontend/src/components/Preview.tsx
+++ b/frontend/src/components/Preview.tsx
@ -1,21 +1,35 @@
-import { useEffect, useRef } from "react";
+import { useEffect, useRef, useState } from "react";
 import classNames from "classnames";
 import useThrottle from "../hooks/useThrottle";
 import EditPopup from "./select-and-edit/EditPopup";
 interface Props {
  code: string;
  device: "mobile" | "desktop";
  doUpdate: (updateInstruction: string, selectedElement?: HTMLElement) => void;
 }
-function Preview({ code, device }: Props) {
+function Preview({ code, device, doUpdate }: Props) {
  const iframeRef = useRef<HTMLIFrameElement | null>(null);
  // Don't update code more often than every 200ms.
  const throttledCode = useThrottle(code, 200);
  // Select and edit functionality
  const [clickEvent, setClickEvent] = useState<MouseEvent | null>(null);
  useEffect(() => {
-    if (iframeRef.current) {
+    const iframe = iframeRef.current;
-      iframeRef.current.srcdoc = throttledCode;
+    if (iframe) {
      iframe.srcdoc = throttledCode;
      // Set up click handler for select and edit funtionality
      iframe.addEventListener("load", function () {
        iframe.contentWindow?.document.body.addEventListener(
          "click",
          setClickEvent
        );
      });
    }
  }, [throttledCode]);
@ -34,6 +48,7 @@ function Preview({ code, device }: Props) {
          }
        )}
      ></iframe>
      <EditPopup event={clickEvent} iframeRef={iframeRef} doUpdate={doUpdate} />
    </div>
  );
 }
--- a/frontend/src/components/SettingsDialog.tsx
+++ b/frontend/src/components/SettingsDialog.tsx
@ -49,7 +49,7 @@ function SettingsDialog({ settings, setSettings }: Props) {
        <div className="flex items-center space-x-2">
          <Label htmlFor="image-generation">
            <div>DALL-E Placeholder Image Generation</div>
-            <div className="font-light mt-2">
+            <div className="font-light mt-2 text-xs">
              More fun with it but if you want to save money, turn it off.
            </div>
          </Label>
@ -64,10 +64,11 @@ function SettingsDialog({ settings, setSettings }: Props) {
            }
          />
        </div>
-        <div className="flex flex-col space-y-4">
+        <div className="flex flex-col space-y-6">
          <div>
            <Label htmlFor="openai-api-key">
              <div>OpenAI API key</div>
-            <div className="font-light mt-2 leading-relaxed">
+              <div className="font-light mt-1 mb-2 text-xs leading-relaxed">
                Only stored in your browser. Never stored on servers. Overrides
                your .env config.
              </div>
@ -84,9 +85,10 @@ function SettingsDialog({ settings, setSettings }: Props) {
                }))
              }
            />
          </div>
          {!IS_RUNNING_ON_CLOUD && (
-            <>
+            <div>
              <Label htmlFor="openai-api-key">
                <div>OpenAI Base URL (optional)</div>
                <div className="font-light mt-2 leading-relaxed">
@ -105,9 +107,31 @@ function SettingsDialog({ settings, setSettings }: Props) {
                  }))
                }
              />
-            </>
+            </div>
          )}
          <div>
            <Label htmlFor="anthropic-api-key">
              <div>Anthropic API key</div>
              <div className="font-light mt-1 text-xs leading-relaxed">
                Only stored in your browser. Never stored on servers. Overrides
                your .env config.
              </div>
            </Label>
            <Input
              id="anthropic-api-key"
              placeholder="Anthropic API key"
              value={settings.anthropicApiKey || ""}
              onChange={(e) =>
                setSettings((s) => ({
                  ...s,
                  anthropicApiKey: e.target.value,
                }))
              }
            />
          </div>
          <Accordion type="single" collapsible className="w-full">
            <AccordionItem value="item-1">
              <AccordionTrigger>Screenshot by URL Config</AccordionTrigger>
--- a/frontend/src/components/UrlInputSection.tsx
+++ b/frontend/src/components/UrlInputSection.tsx
@ -69,7 +69,7 @@ export function UrlInputSection({ doCreate, screenshotOneApiKey }: Props) {
      <Button
        onClick={takeScreenshot}
        disabled={isLoading}
-        className="bg-slate-400"
+        className="bg-slate-400 capture-btn"
      >
        {isLoading ? "Capturing..." : "Capture"}
      </Button>
--- a/frontend/src/components/history/utils.test.ts
+++ b/frontend/src/components/history/utils.test.ts
@ -1,4 +1,3 @@
 import { expect, test } from "vitest";
 import { extractHistoryTree, renderHistory } from "./utils";
 import type { History } from "./history_types";
@ -84,6 +83,7 @@ const basicBadHistory: History = [
  },
 ];
 describe("History Utils", () => {
  test("should correctly extract the history tree", () => {
    expect(extractHistoryTree(basicLinearHistory, 2)).toEqual([
      "<html>1. create</html>",
@ -228,3 +228,4 @@ test("should correctly render the history tree", () => {
      },
    ]);
  });
 });
--- a/frontend/src/components/select-and-edit/EditPopup.tsx
+++ b/frontend/src/components/select-and-edit/EditPopup.tsx
@ -0,0 +1,143 @@
 import React, { useEffect, useRef, useState } from "react";
 import { Textarea } from "../ui/textarea";
 import { Button } from "../ui/button";
 import { addHighlight, getAdjustedCoordinates, removeHighlight } from "./utils";
 import { useAppStore } from "../../store/app-store";
 interface EditPopupProps {
  event: MouseEvent | null;
  iframeRef: React.RefObject<HTMLIFrameElement>;
  doUpdate: (updateInstruction: string, selectedElement?: HTMLElement) => void;
 }
 const EditPopup: React.FC<EditPopupProps> = ({
  event,
  iframeRef,
  doUpdate,
 }) => {
  // App state
  const { inSelectAndEditMode } = useAppStore();
  // Create a wrapper ref to store inSelectAndEditMode so the value is not stale
  // in a event listener
  const inSelectAndEditModeRef = useRef(inSelectAndEditMode);
  // Update the ref whenever the state changes
  useEffect(() => {
    inSelectAndEditModeRef.current = inSelectAndEditMode;
  }, [inSelectAndEditMode]);
  // Popup state
  const [popupVisible, setPopupVisible] = useState(false);
  const [popupPosition, setPopupPosition] = useState({ x: 0, y: 0 });
  // Edit state
  const [selectedElement, setSelectedElement] = useState<
    HTMLElement | undefined
  >(undefined);
  const [updateText, setUpdateText] = useState("");
  // Textarea ref for focusing
  const textareaRef = useRef<HTMLTextAreaElement | null>(null);
  function onUpdate(updateText: string) {
    // Perform the update
    doUpdate(
      updateText,
      selectedElement ? removeHighlight(selectedElement) : selectedElement
    );
    // Unselect the element
    setSelectedElement(undefined);
    // Hide the popup
    setPopupVisible(false);
  }
  // Remove highlight and reset state when not in select and edit mode
  useEffect(() => {
    if (!inSelectAndEditMode) {
      if (selectedElement) removeHighlight(selectedElement);
      setSelectedElement(undefined);
      setPopupVisible(false);
    }
  }, [inSelectAndEditMode, selectedElement]);
  // Handle the click event
  useEffect(() => {
    // Return if not in select and edit mode
    if (!inSelectAndEditModeRef.current || !event) {
      return;
    }
    // Prevent default to avoid issues like label clicks triggering textareas, etc.
    event.preventDefault();
    const targetElement = event.target as HTMLElement;
    // Return if no target element
    if (!targetElement) return;
    // Highlight and set the selected element
    setSelectedElement((prev) => {
      // Remove style from previous element
      if (prev) {
        removeHighlight(prev);
      }
      return addHighlight(targetElement);
    });
    // Calculate adjusted coordinates
    const adjustedCoordinates = getAdjustedCoordinates(
      event.clientX,
      event.clientY,
      iframeRef.current?.getBoundingClientRect()
    );
    // Show the popup at the click position
    setPopupVisible(true);
    setPopupPosition({ x: adjustedCoordinates.x, y: adjustedCoordinates.y });
    // Reset the update text
    setUpdateText("");
    // Focus the textarea
    textareaRef.current?.focus();
  }, [event, iframeRef]);
  // Focus the textarea when the popup is visible (we can't do this only when handling the click event
  // because the textarea is not rendered yet)
  // We need to also do it in the click event because popupVisible doesn't change values in that event
  useEffect(() => {
    if (popupVisible) {
      textareaRef.current?.focus();
    }
  }, [popupVisible]);
  if (!popupVisible) return;
  return (
    <div
      className="absolute bg-white p-4 border border-gray-300 rounded shadow-lg w-60"
      style={{ top: popupPosition.y, left: popupPosition.x }}
    >
      <Textarea
        ref={textareaRef}
        value={updateText}
        onChange={(e) => setUpdateText(e.target.value)}
        placeholder="Tell the AI what to change about this element..."
        onKeyDown={(e) => {
          if (e.key === "Enter") {
            e.preventDefault();
            onUpdate(updateText);
          }
        }}
      />
      <div className="flex justify-end mt-2">
        <Button onClick={() => onUpdate(updateText)}>Update</Button>
      </div>
    </div>
  );
 };
 export default EditPopup;
--- a/frontend/src/components/select-and-edit/SelectAndEditModeToggleButton.tsx
+++ b/frontend/src/components/select-and-edit/SelectAndEditModeToggleButton.tsx
@ -0,0 +1,22 @@
 import { GiClick } from "react-icons/gi";
 import { useAppStore } from "../../store/app-store";
 import { Button } from "../ui/button";
 function SelectAndEditModeToggleButton() {
  const { inSelectAndEditMode, toggleInSelectAndEditMode } = useAppStore();
  return (
    <Button
      onClick={toggleInSelectAndEditMode}
      className="flex items-center gap-x-2 dark:text-white dark:bg-gray-700 regenerate-btn"
      variant={inSelectAndEditMode ? "destructive" : "default"}
    >
      <GiClick className="text-lg" />
      <span>
        {inSelectAndEditMode ? "Exit selection mode" : "Select and update"}
      </span>
    </Button>
  );
 }
 export default SelectAndEditModeToggleButton;
--- a/frontend/src/components/select-and-edit/utils.ts
+++ b/frontend/src/components/select-and-edit/utils.ts
@ -0,0 +1,22 @@
 export function removeHighlight(element: HTMLElement) {
  element.style.outline = "";
  element.style.backgroundColor = "";
  return element;
 }
 export function addHighlight(element: HTMLElement) {
  element.style.outline = "2px dashed #1846db";
  element.style.backgroundColor = "#bfcbf5";
  return element;
 }
 export function getAdjustedCoordinates(
  x: number,
  y: number,
  rect: DOMRect | undefined
 ) {
  const offsetX = rect ? rect.left : 0;
  const offsetY = rect ? rect.top : 0;
  return { x: x + offsetX, y: y + offsetY };
 }
--- a/frontend/src/lib/models.ts
+++ b/frontend/src/lib/models.ts
@ -4,6 +4,7 @@ export enum CodeGenerationModel {
  CLAUDE_3_5_SONNET = "claude_3_5_sonnet",
  CLAUDE_3_SONNET = "claude_3_sonnet",
  GPT_4O_2024_05_13 = "gpt-4o-2024-05-13",
  CLAUDE_3_5_SONNET_2024_06_20 = "claude-3-5-sonnet-20240620",
  GPT_4_TURBO_2024_04_09 = "gpt-4-turbo-2024-04-09",
  GPT_4_VISION = "gpt_4_vision",
 }
@ -13,6 +14,7 @@ export const CODE_GENERATION_MODEL_DESCRIPTIONS: {
  [key in CodeGenerationModel]: { name: string; inBeta: boolean };
 } = {
  "gpt-4o-2024-05-13": { name: "GPT-4o 🌟", inBeta: false },
  "claude-3-5-sonnet-20240620": { name: "Claude 3.5 Sonnet 🌟", inBeta: false },
  "gpt-4-turbo-2024-04-09": { name: "GPT-4 Turbo (Apr 2024)", inBeta: false },
  gpt_4_vision: { name: "GPT-4 Vision (Nov 2023)", inBeta: false },
  claude_3_sonnet: { name: "Claude 3 Sonnet", inBeta: false },
--- a/frontend/src/setupTests.ts
+++ b/frontend/src/setupTests.ts
@ -0,0 +1,3 @@
 // So jest test runner can read env vars from .env file
 import { config } from "dotenv";
 config({ path: ".env.jest" });
--- a/frontend/src/store/app-store.ts
+++ b/frontend/src/store/app-store.ts
@ -0,0 +1,15 @@
 import { create } from "zustand";
 // Store for app-wide state
 interface AppStore {
  inSelectAndEditMode: boolean;
  toggleInSelectAndEditMode: () => void;
  disableInSelectAndEditMode: () => void;
 }
 export const useAppStore = create<AppStore>((set) => ({
  inSelectAndEditMode: false,
  toggleInSelectAndEditMode: () =>
    set((state) => ({ inSelectAndEditMode: !state.inSelectAndEditMode })),
  disableInSelectAndEditMode: () => set({ inSelectAndEditMode: false }),
 }));
--- a/frontend/src/tests/fixtures/simple_button.png
+++ b/frontend/src/tests/fixtures/simple_button.png
--- a/frontend/src/tests/fixtures/simple_ui_with_image.png
+++ b/frontend/src/tests/fixtures/simple_ui_with_image.png
--- a/frontend/src/tests/qa.test.ts
+++ b/frontend/src/tests/qa.test.ts
@ -0,0 +1,274 @@
 import puppeteer, { Browser, Page, ElementHandle } from "puppeteer";
 import { Stack } from "../lib/stacks";
 import { CodeGenerationModel } from "../lib/models";
 const TESTS_ROOT_PATH = process.env.TEST_ROOT_PATH;
 // Fixtures
 const FIXTURES_PATH = `${TESTS_ROOT_PATH}/fixtures`;
 const SIMPLE_SCREENSHOT = FIXTURES_PATH + "/simple_button.png";
 const SCREENSHOT_WITH_IMAGES = `${FIXTURES_PATH}/simple_ui_with_image.png`;
 // Results
 const RESULTS_DIR = `${TESTS_ROOT_PATH}/results`;
 describe("e2e tests", () => {
  let browser: Browser;
  let page: Page;
  const DEBUG = false;
  const IS_HEADLESS = true;
  const stacks = Object.values(Stack).slice(0, DEBUG ? 1 : undefined);
  const models = Object.values(CodeGenerationModel).slice(
    0,
    DEBUG ? 1 : undefined
  );
  beforeAll(async () => {
    browser = await puppeteer.launch({ headless: IS_HEADLESS });
    page = await browser.newPage();
    await page.goto("http://localhost:5173/");
    // Set screen size
    await page.setViewport({ width: 1080, height: 1024 });
    // TODO: Does this need to be moved?
    // const client = await page.createCDPSession();
    // Set download behavior path
    // await client.send("Page.setDownloadBehavior", {
    //   behavior: "allow",
    //   downloadPath: DOWNLOAD_PATH,
    // });
  });
  afterAll(async () => {
    await browser.close();
  });
  // Create tests
  models.forEach((model) => {
    stacks.forEach((stack) => {
      it(
        `Create for : ${model} & ${stack}`,
        async () => {
          const app = new App(
            page,
            stack,
            model,
            `create_screenshot_${model}_${stack}`
          );
          await app.init();
          // Generate from screenshot
          await app.uploadImage(SCREENSHOT_WITH_IMAGES);
        },
        60 * 1000
      );
      it(
        `Create from URL for : ${model} & ${stack}`,
        async () => {
          const app = new App(
            page,
            stack,
            model,
            `create_url_${model}_${stack}`
          );
          await app.init();
          // Generate from screenshot
          await app.generateFromUrl("https://a.picoapps.xyz/design-fear");
        },
        60 * 1000
      );
    });
  });
  // Update tests - for every model (doesn’t need to be repeated for each stack - fix to HTML Tailwind only)
  models.forEach((model) => {
    ["html_tailwind"].forEach((stack) => {
      it(
        `update: ${model}`,
        async () => {
          const app = new App(page, stack, model, `update_${model}_${stack}`);
          await app.init();
          // Generate from screenshot
          await app.uploadImage(SIMPLE_SCREENSHOT);
          // Regenerate works for v1
          await app.regenerate();
          // Make an update
          await app.edit("make the button background blue", "v2");
          // Make another update
          await app.edit("make the text italic", "v3");
          // Branch off v2 and make an update
          await app.clickVersion("v2");
          await app.edit("make the text yellow", "v4");
        },
        90 * 1000
      );
    });
  });
  // Start from code tests - for every model
  models.forEach((model) => {
    ["html_tailwind"].forEach((stack) => {
      it.skip(
        `Start from code: ${model}`,
        async () => {
          const app = new App(
            page,
            stack,
            model,
            `start_from_code_${model}_${stack}`
          );
          await app.init();
          await app.importFromCode();
          // Regenerate works for v1
          // await app.regenerate();
          // // Make an update
          // await app.edit("make the header blue", "v2");
          // // Make another update
          // await app.edit("make all text italic", "v3");
          // // Branch off v2 and make an update
          // await app.clickVersion("v2");
          // await app.edit("make all text red", "v4");
        },
        90 * 1000
      );
    });
  });
 });
 class App {
  private screenshotPathPrefix: string;
  private page: Page;
  private stack: string;
  private model: string;
  constructor(page: Page, stack: string, model: string, testId: string) {
    this.page = page;
    this.stack = stack;
    this.model = model;
    this.screenshotPathPrefix = `${RESULTS_DIR}/${testId}`;
  }
  async init() {
    await this.setupLocalStorage();
  }
  async setupLocalStorage() {
    const setting = {
      openAiApiKey: null,
      openAiBaseURL: null,
      screenshotOneApiKey: process.env.TEST_SCREENSHOTONE_API_KEY,
      isImageGenerationEnabled: true,
      editorTheme: "cobalt",
      generatedCodeConfig: this.stack,
      codeGenerationModel: this.model,
      isTermOfServiceAccepted: false,
      accessCode: null,
    };
    await this.page.evaluate((setting) => {
      localStorage.setItem("setting", JSON.stringify(setting));
    }, setting);
    // Reload the page to apply the local storage
    await this.page.reload();
  }
  async _screenshot(step: string) {
    await this.page.screenshot({
      path: `${this.screenshotPathPrefix}_${step}.png`,
    });
  }
  async _waitUntilVersionIsReady(version: string) {
    await this.page.waitForNetworkIdle();
    await this.page.waitForFunction(
      (version) => document.body.innerText.includes(version),
      {
        timeout: 30000,
      },
      version
    );
    // Wait for 3s so that the HTML and JS has time to render before screenshotting
    await new Promise((resolve) => setTimeout(resolve, 3000));
  }
  async generateFromUrl(url: string) {
    // Type in the URL
    await this.page.type('input[placeholder="Enter URL"]', url);
    await this._screenshot("typed_url");
    // Click the capture button and wait for the code to be generated
    await this.page.click("button.capture-btn");
    await this._waitUntilVersionIsReady("v1");
    await this._screenshot("url_result");
  }
  // Uploads a screenshot and generates the image
  async uploadImage(screenshotPath: string) {
    // Upload file
    const fileInput = (await this.page.$(
      ".file-input"
    )) as ElementHandle<HTMLInputElement>;
    if (!fileInput) {
      throw new Error("File input element not found");
    }
    await fileInput.uploadFile(screenshotPath);
    await this._screenshot("image_uploaded");
    // Click the generate button and wait for the code to be generated
    await this._waitUntilVersionIsReady("v1");
    await this._screenshot("image_results");
  }
  // Makes a text edit and waits for a new version
  async edit(edit: string, version: string) {
    // Type in the edit
    await this.page.type(
      'textarea[placeholder="Tell the AI what to change..."]',
      edit
    );
    await this._screenshot(`typed_${version}`);
    // Click the update button and wait for the code to be generated
    await this.page.click(".update-btn");
    await this._waitUntilVersionIsReady(version);
    await this._screenshot(`done_${version}`);
  }
  async clickVersion(version: string) {
    await this.page.evaluate((version) => {
      document.querySelectorAll("div").forEach((div) => {
        if (div.innerText.includes(version)) {
          div.click();
        }
      });
    }, version);
  }
  async regenerate() {
    await this.page.click(".regenerate-btn");
    await this._waitUntilVersionIsReady("v1");
    await this._screenshot("regenerate_results");
  }
  // Work in progress
  async importFromCode() {
    await this.page.click(".import-from-code-btn");
    await this.page.type("textarea", "<html>hello world</html>");
    await this.page.select("#output-settings-js", "HTML + Tailwind");
    await this._screenshot("typed_code");
    await this.page.click(".import-btn");
    await this._waitUntilVersionIsReady("v1");
  }
 }
--- a/frontend/src/types.ts
+++ b/frontend/src/types.ts
@ -16,6 +16,7 @@ export interface Settings {
  codeGenerationModel: CodeGenerationModel;
  // Only relevant for hosted version
  isTermOfServiceAccepted: boolean;
  anthropicApiKey: string | null; // Added property for anthropic API key
 }
 export enum AppState {
--- a/frontend/yarn.lock
+++ b/frontend/yarn.lock
`@ -1,4 +1,4 @@`
	`### Getting an OpenAI API key with GPT4-Vision model access`	`### Getting an OpenAI API key with GPT-4 model access`

	`You don't need a ChatGPT Pro account. Screenshot to code uses API keys from your OpenAI developer account. In order to get access to the GPT4 Vision model, log into your OpenAI account and then, follow these instructions:`	`You don't need a ChatGPT Pro account. Screenshot to code uses API keys from your OpenAI developer account. In order to get access to the GPT4 Vision model, log into your OpenAI account and then, follow these instructions:`
`@ -1,4 +1,4 @@`
	`FROM python:3.12-slim-bullseye`	`FROM python:3.12.3-slim-bullseye`

	`ENV POETRY_VERSION 1.4.1`	`ENV POETRY_VERSION 1.4.1`
		`@ -0,0 +1,2 @@`
							`TEST_SCREENSHOTONE_API_KEY=`
							`TEST_ROOT_PATH=`