Merge branch 'main' into hosted

2024-06-22 19:43:52 +08:00 · 2024-06-22 19:43:52 +08:00 · d2369cb0a0
commit d2369cb0a0
parent ae08466405 f75294ca64
13 changed files with 135 additions and 76 deletions
--- a/README.md
+++ b/README.md
@ -35,14 +35,18 @@ We also just added experimental support for taking a video/screen recording of a

 <a href="https://konghq.com/products/kong-konnect/register?utm_medium=referral&utm_source=github&utm_campaign=platform&utm_content=screenshot-to-code" target="_blank" title="Kong - powering the API world"><img src="https://picoapps.xyz/s2c-sponsors/Kong-GitHub-240x100.png"></a>

-
-## 🚀 Try It Out without no install
+## 🚀 Hosted Version

 [Try it live on the hosted version (paid)](https://screenshottocode.com).

 ## 🛠 Getting Started

-The app has a React/Vite frontend and a FastAPI backend. You will need an OpenAI API key with access to the GPT-4 Vision API or an Anthropic key if you want to use Claude Sonnet, or for experimental video support.
+The app has a React/Vite frontend and a FastAPI backend. 
+
+Keys needed:
+
+* [OpenAI API key with access to GPT-4](https://github.com/abi/screenshot-to-code/blob/main/Troubleshooting.md)
+* Anthropic key (optional) - only if you want to use Claude Sonnet, or for experimental video support.

 Run the backend (I use Poetry for package management - `pip install poetry` if you don't have it):

@ -54,7 +58,7 @@ poetry shell
 poetry run uvicorn main:app --reload --port 7001
 ```

-If you want to use Anthropic, add the `ANTHROPIC_API_KEY` to `backend/.env` with your API key from Anthropic.
+If you want to use Anthropic, add `ANTHROPIC_API_KEY` to `backend/.env`. You can also set up the keys using the settings dialog on the front-end (click the gear icon after loading the frontend).

 Run the frontend:

@ -113,5 +117,3 @@ https://github.com/abi/screenshot-to-code/assets/23818/3fec0f77-44e8-4fb3-a769-a
 ## 🌍 Hosted Version

 🆕 [Try it here (paid)](https://screenshottocode.com). Or see [Getting Started](#-getting-started) for local install instructions to use with your own API keys.
-
-[!["Buy Me A Coffee"](https://www.buymeacoffee.com/assets/img/custom_images/orange_img.png)](https://www.buymeacoffee.com/abiraja)
--- a/Troubleshooting.md
+++ b/Troubleshooting.md
@ -1,4 +1,4 @@
-### Getting an OpenAI API key with GPT4-Vision model access
+### Getting an OpenAI API key with GPT-4 model access

 You don't need a ChatGPT Pro account. Screenshot to code uses API keys from your OpenAI developer account. In order to get access to the GPT4 Vision model, log into your OpenAI account and then, follow these instructions:

--- a/backend/evals/core.py
+++ b/backend/evals/core.py
@ -15,7 +15,7 @@ async def generate_code_core(image_url: str, stack: Stack, model: Llm) -> str:
    async def process_chunk(content: str):
        pass

-    if model == Llm.CLAUDE_3_SONNET:
+    if model == Llm.CLAUDE_3_SONNET or model == Llm.CLAUDE_3_5_SONNET_2024_06_20:
        if not anthropic_api_key:
            raise Exception("Anthropic API key not found")

@ -23,6 +23,7 @@ async def generate_code_core(image_url: str, stack: Stack, model: Llm) -> str:
            prompt_messages,
            api_key=anthropic_api_key,
            callback=lambda x: process_chunk(x),
+            model=model,
        )
    else:
        if not openai_api_key:
--- a/backend/image_generation.py
+++ b/backend/image_generation.py
@ -5,7 +5,7 @@ from openai import AsyncOpenAI
 from bs4 import BeautifulSoup


-async def process_tasks(prompts: List[str], api_key: str, base_url: str):
+async def process_tasks(prompts: List[str], api_key: str, base_url: str | None):
    tasks = [generate_image(prompt, api_key, base_url) for prompt in prompts]
    results = await asyncio.gather(*tasks, return_exceptions=True)

@ -15,22 +15,23 @@ async def process_tasks(prompts: List[str], api_key: str, base_url: str):
            print(f"An exception occurred: {result}")
            processed_results.append(None)
        else:
-            processed_results.append(result)  # type: ignore
+            processed_results.append(result)

    return processed_results


-async def generate_image(prompt: str, api_key: str, base_url: str):
+async def generate_image(
+    prompt: str, api_key: str, base_url: str | None
+) -> Union[str, None]:
    client = AsyncOpenAI(api_key=api_key, base_url=base_url)
-    image_params: Dict[str, Union[str, int]] = {
-        "model": "dall-e-3",
-        "quality": "standard",
-        "style": "natural",
-        "n": 1,
-        "size": "1024x1024",
-        "prompt": prompt,
-    }
-    res = await client.images.generate(**image_params)  # type: ignore
+    res = await client.images.generate(
+        model="dall-e-3",
+        quality="standard",
+        style="natural",
+        n=1,
+        size="1024x1024",
+        prompt=prompt,
+    )
    await client.close()
    return res.data[0].url

@ -63,13 +64,13 @@ def create_alt_url_mapping(code: str) -> Dict[str, str]:

 async def generate_images(
    code: str, api_key: str, base_url: Union[str, None], image_cache: Dict[str, str]
-):
+) -> str:
    # Find all images
    soup = BeautifulSoup(code, "html.parser")
    images = soup.find_all("img")

    # Extract alt texts as image prompts
-    alts = []
+    alts: List[str | None] = []
    for img in images:
        # Only include URL if the image starts with https://placehold.co
        # and it's not already in the image_cache
@ -77,26 +78,26 @@ async def generate_images(
            img["src"].startswith("https://placehold.co")
            and image_cache.get(img.get("alt")) is None
        ):
-            alts.append(img.get("alt", None))  # type: ignore
+            alts.append(img.get("alt", None))

    # Exclude images with no alt text
-    alts = [alt for alt in alts if alt is not None]  # type: ignore
+    filtered_alts: List[str] = [alt for alt in alts if alt is not None]

    # Remove duplicates
-    prompts = list(set(alts))  # type: ignore
+    prompts = list(set(filtered_alts))

    # Return early if there are no images to replace
-    if len(prompts) == 0:  # type: ignore
+    if len(prompts) == 0:
        return code

    # Generate images
-    results = await process_tasks(prompts, api_key, base_url)  # type: ignore
+    results = await process_tasks(prompts, api_key, base_url)

    # Create a dict mapping alt text to image URL
-    mapped_image_urls = dict(zip(prompts, results))  # type: ignore
+    mapped_image_urls = dict(zip(prompts, results))

    # Merge with image_cache
-    mapped_image_urls = {**mapped_image_urls, **image_cache}  # type: ignore
+    mapped_image_urls = {**mapped_image_urls, **image_cache}

    # Replace old image URLs with the generated URLs
    for img in images:
--- a/backend/llm.py
+++ b/backend/llm.py
@ -18,6 +18,7 @@ class Llm(Enum):
    CLAUDE_3_SONNET = "claude-3-sonnet-20240229"
    CLAUDE_3_OPUS = "claude-3-opus-20240229"
    CLAUDE_3_HAIKU = "claude-3-haiku-20240307"
+    CLAUDE_3_5_SONNET_2024_06_20 = "claude-3-5-sonnet-20240620"


 # Will throw errors if you send a garbage string
@ -60,9 +61,15 @@ async def stream_openai_response(
    full_response = ""
    async for chunk in stream:  # type: ignore
        assert isinstance(chunk, ChatCompletionChunk)
-        content = chunk.choices[0].delta.content or ""
-        full_response += content
-        await callback(content)
+        if (
+            chunk.choices
+            and len(chunk.choices) > 0
+            and chunk.choices[0].delta
+            and chunk.choices[0].delta.content
+        ):
+            content = chunk.choices[0].delta.content or ""
+            full_response += content
+            await callback(content)

    await client.close()

@ -74,12 +81,12 @@ async def stream_claude_response(
    messages: List[ChatCompletionMessageParam],
    api_key: str,
    callback: Callable[[str], Awaitable[None]],
+    model: Llm,
 ) -> str:

    client = AsyncAnthropic(api_key=api_key)

    # Base parameters
-    model = Llm.CLAUDE_3_SONNET
    max_tokens = 4096
    temperature = 0.0

--- a/backend/routes/generate_code.py
+++ b/backend/routes/generate_code.py
@ -14,7 +14,7 @@ from llm import (
 )
 from openai.types.chat import ChatCompletionMessageParam
 from mock_llm import mock_completion
-from typing import Dict, List, cast, get_args
+from typing import Dict, List, Union, cast, get_args
 from image_generation import create_alt_url_mapping, generate_images
 from prompts import assemble_imported_code_prompt, assemble_prompt
 from datetime import datetime
@ -24,7 +24,7 @@ from routes.saas_utils import does_user_have_subscription_credits
 from prompts.claude_prompts import VIDEO_PROMPT
 from prompts.types import Stack

-from utils import pprint_prompt
+# from utils import pprint_prompt
 from video.utils import extract_tag_content, assemble_claude_prompt_video
 from ws.constants import APP_ERROR_WEB_SOCKET_CODE  # type: ignore

@ -161,8 +161,19 @@ async def stream_code(websocket: WebSocket):
        )
        raise Exception("No OpenAI API key found")

+    # Get the Anthropic API key from the request. Fall back to environment variable if not provided.
+    # If neither is provided, we throw an error later only if Claude is used.
+    anthropic_api_key = None
+    if "anthropicApiKey" in params and params["anthropicApiKey"]:
+        anthropic_api_key = params["anthropicApiKey"]
+        print("Using Anthropic API key from client-side settings dialog")
+    else:
+        anthropic_api_key = ANTHROPIC_API_KEY
+        if anthropic_api_key:
+            print("Using Anthropic API key from environment variable")
+
    # Get the OpenAI Base URL from the request. Fall back to environment variable if not provided.
-    openai_base_url = None
+    openai_base_url: Union[str, None] = None
    # Disable user-specified OpenAI Base URL in prod
    if not os.environ.get("IS_PROD"):
        if "openAiBaseURL" in params and params["openAiBaseURL"]:
@ -255,7 +266,7 @@ async def stream_code(websocket: WebSocket):
        video_data_url = params["image"]
        prompt_messages = await assemble_claude_prompt_video(video_data_url)

-    pprint_prompt(prompt_messages)  # type: ignore
+    # pprint_prompt(prompt_messages)  # type: ignore

    if SHOULD_MOCK_AI_RESPONSE:
        completion = await mock_completion(
@ -267,25 +278,28 @@ async def stream_code(websocket: WebSocket):
                if IS_PROD:
                    raise Exception("Video mode is not supported in prod")

-                if not ANTHROPIC_API_KEY:
+                if not anthropic_api_key:
                    await throw_error(
-                        "Video only works with Anthropic models. No Anthropic API key found. Please add the environment variable ANTHROPIC_API_KEY to backend/.env"
+                        "Video only works with Anthropic models. No Anthropic API key found. Please add the environment variable ANTHROPIC_API_KEY to backend/.env or in the settings dialog"
                    )
                    raise Exception("No Anthropic key")

                completion = await stream_claude_response_native(
                    system_prompt=VIDEO_PROMPT,
                    messages=prompt_messages,  # type: ignore
-                    api_key=ANTHROPIC_API_KEY,
+                    api_key=anthropic_api_key,
                    callback=lambda x: process_chunk(x),
                    model=Llm.CLAUDE_3_OPUS,
                    include_thinking=True,
                )
                exact_llm_version = Llm.CLAUDE_3_OPUS
-            elif code_generation_model == Llm.CLAUDE_3_SONNET:
-                if not ANTHROPIC_API_KEY:
+            elif (
+                code_generation_model == Llm.CLAUDE_3_SONNET
+                or code_generation_model == Llm.CLAUDE_3_5_SONNET_2024_06_20
+            ):
+                if not anthropic_api_key:
                    await throw_error(
-                        "No Anthropic API key found. Please add the environment variable ANTHROPIC_API_KEY to backend/.env"
+                        "No Anthropic API key found. Please add the environment variable ANTHROPIC_API_KEY to backend/.env or in the settings dialog"
                    )
                    raise Exception("No Anthropic key")

@ -298,8 +312,9 @@ async def stream_code(websocket: WebSocket):

                completion = await stream_claude_response(
                    prompt_messages,  # type: ignore
-                    api_key=ANTHROPIC_API_KEY,
+                    api_key=anthropic_api_key,
                    callback=lambda x: process_chunk(x),
+                    model=code_generation_model,
                )
                exact_llm_version = code_generation_model
            else:
--- a/frontend/Dockerfile
+++ b/frontend/Dockerfile
@ -1,4 +1,4 @@
-FROM node:20.9-bullseye-slim
+FROM node:22-bullseye-slim

 # Set the working directory in the container
 WORKDIR /app
@ -6,6 +6,9 @@ WORKDIR /app
 # Copy package.json and yarn.lock
 COPY package.json yarn.lock /app/

+# Set the environment variable to skip Puppeteer download
+ENV PUPPETEER_SKIP_DOWNLOAD=true
+
 # Install dependencies
 RUN yarn install

--- a/frontend/src/App.tsx
+++ b/frontend/src/App.tsx
@ -79,6 +79,7 @@ function App({ navbarComponent }: Props) {
    {
      openAiApiKey: null,
      openAiBaseURL: null,
+      anthropicApiKey: null,
      screenshotOneApiKey: null,
      isImageGenerationEnabled: true,
      editorTheme: EditorTheme.COBALT,
@ -109,8 +110,10 @@ function App({ navbarComponent }: Props) {
      CodeGenerationModel.GPT_4_TURBO_2024_04_09 &&
    settings.generatedCodeConfig === Stack.REACT_TAILWIND;

-  const showGpt4OMessage =
+  const showBetterModelMessage =
    selectedCodeGenerationModel !== CodeGenerationModel.GPT_4O_2024_05_13 &&
+    selectedCodeGenerationModel !==
+      CodeGenerationModel.CLAUDE_3_5_SONNET_2024_06_20 &&
    appState === AppState.INITIAL;

  const showFeedbackCallNote =
@ -471,11 +474,11 @@ function App({ navbarComponent }: Props) {
            </div>
          )}

-          {showGpt4OMessage && (
+          {showBetterModelMessage && (
            <div className="rounded-lg p-2 bg-fuchsia-200">
              <p className="text-gray-800 text-sm">
-                Now supporting GPT-4o. Higher quality and 2x faster. Give it a
-                try!
+                Now supporting GPT-4o and Claude Sonnet 3.5. Higher quality and
+                2x faster. Give it a try!
              </p>
            </div>
          )}
--- a/frontend/src/components/SettingsDialog.tsx
+++ b/frontend/src/components/SettingsDialog.tsx
@ -49,7 +49,7 @@ function SettingsDialog({ settings, setSettings }: Props) {
        <div className="flex items-center space-x-2">
          <Label htmlFor="image-generation">
            <div>DALL-E Placeholder Image Generation</div>
-            <div className="font-light mt-2">
+            <div className="font-light mt-2 text-xs">
              More fun with it but if you want to save money, turn it off.
            </div>
          </Label>
@ -64,29 +64,31 @@ function SettingsDialog({ settings, setSettings }: Props) {
            }
          />
        </div>
-        <div className="flex flex-col space-y-4">
-          <Label htmlFor="openai-api-key">
-            <div>OpenAI API key</div>
-            <div className="font-light mt-2 leading-relaxed">
-              Only stored in your browser. Never stored on servers. Overrides
-              your .env config.
-            </div>
-          </Label>
+        <div className="flex flex-col space-y-6">
+          <div>
+            <Label htmlFor="openai-api-key">
+              <div>OpenAI API key</div>
+              <div className="font-light mt-1 mb-2 text-xs leading-relaxed">
+                Only stored in your browser. Never stored on servers. Overrides
+                your .env config.
+              </div>
+            </Label>

-          <Input
-            id="openai-api-key"
-            placeholder="OpenAI API key"
-            value={settings.openAiApiKey || ""}
-            onChange={(e) =>
-              setSettings((s) => ({
-                ...s,
-                openAiApiKey: e.target.value,
-              }))
-            }
-          />
+            <Input
+              id="openai-api-key"
+              placeholder="OpenAI API key"
+              value={settings.openAiApiKey || ""}
+              onChange={(e) =>
+                setSettings((s) => ({
+                  ...s,
+                  openAiApiKey: e.target.value,
+                }))
+              }
+            />
+          </div>

          {!IS_RUNNING_ON_CLOUD && (
-            <>
+            <div>
              <Label htmlFor="openai-api-key">
                <div>OpenAI Base URL (optional)</div>
                <div className="font-light mt-2 leading-relaxed">
@ -105,9 +107,31 @@ function SettingsDialog({ settings, setSettings }: Props) {
                  }))
                }
              />
-            </>
+            </div>
          )}

+          <div>
+            <Label htmlFor="anthropic-api-key">
+              <div>Anthropic API key</div>
+              <div className="font-light mt-1 text-xs leading-relaxed">
+                Only stored in your browser. Never stored on servers. Overrides
+                your .env config.
+              </div>
+            </Label>
+
+            <Input
+              id="anthropic-api-key"
+              placeholder="Anthropic API key"
+              value={settings.anthropicApiKey || ""}
+              onChange={(e) =>
+                setSettings((s) => ({
+                  ...s,
+                  anthropicApiKey: e.target.value,
+                }))
+              }
+            />
+          </div>
+
          <Accordion type="single" collapsible className="w-full">
            <AccordionItem value="item-1">
              <AccordionTrigger>Screenshot by URL Config</AccordionTrigger>
--- a/frontend/src/components/select-and-edit/utils.ts
+++ b/frontend/src/components/select-and-edit/utils.ts
@ -18,7 +18,5 @@ export function getAdjustedCoordinates(
  const offsetX = rect ? rect.left : 0;
  const offsetY = rect ? rect.top : 0;

-  // Adjust for scale
-  const scale = 1; // the scale factor applied to the iframe
-  return { x: x / scale + offsetX, y: y / scale + offsetY };
+  return { x: x + offsetX, y: y + offsetY };
 }
--- a/frontend/src/lib/models.ts
+++ b/frontend/src/lib/models.ts
@ -2,6 +2,7 @@
 // Order here matches dropdown order
 export enum CodeGenerationModel {
  GPT_4O_2024_05_13 = "gpt-4o-2024-05-13",
+  CLAUDE_3_5_SONNET_2024_06_20 = "claude-3-5-sonnet-20240620",
  GPT_4_TURBO_2024_04_09 = "gpt-4-turbo-2024-04-09",
  GPT_4_VISION = "gpt_4_vision",
  CLAUDE_3_SONNET = "claude_3_sonnet",
@ -16,6 +17,11 @@ export const CODE_GENERATION_MODEL_DESCRIPTIONS: {
  };
 } = {
  "gpt-4o-2024-05-13": { name: "GPT-4o 🌟", inBeta: false, isPaid: false },
+  "claude-3-5-sonnet-20240620": {
+    name: "Claude 3.5 Sonnet 🌟",
+    inBeta: false,
+    isPaid: false,
+  },
  "gpt-4-turbo-2024-04-09": {
    name: "GPT-4 Turbo (Apr 2024)",
    inBeta: false,
--- a/frontend/src/store/app-store.ts
+++ b/frontend/src/store/app-store.ts
@ -3,13 +3,11 @@ import { create } from "zustand";
 // Store for app-wide state
 interface AppStore {
  inSelectAndEditMode: boolean;
-  inputMode: "image" | "video";
  toggleInSelectAndEditMode: () => void;
  disableInSelectAndEditMode: () => void;
 }

 export const useAppStore = create<AppStore>((set) => ({
-  inputMode: "image",
  inSelectAndEditMode: false,
  toggleInSelectAndEditMode: () =>
    set((state) => ({ inSelectAndEditMode: !state.inSelectAndEditMode })),
--- a/frontend/src/types.ts
+++ b/frontend/src/types.ts
@ -16,6 +16,7 @@ export interface Settings {
  codeGenerationModel: CodeGenerationModel;
  // Only relevant for hosted version
  isTermOfServiceAccepted: boolean;
+  anthropicApiKey: string | null; // Added property for anthropic API key
 }

 export enum AppState {