Merge branch 'main' into hosted

2024-04-15 14:19:35 -04:00 · 2024-04-15 14:19:35 -04:00 · 522b7b8e23
commit 522b7b8e23
parent c51ff4d7ad 911c008fcb
9 changed files with 92 additions and 34 deletions
--- a/Evaluation.md
+++ b/Evaluation.md
@ -5,8 +5,8 @@ Evaluation dataset consists of 16 screenshots. A Python script for running scree
 ### Running evals
 - Input screenshots should be located at `backend/evals_data/inputs` and the outputs will be `backend/evals_data/outputs`. If you want to modify this, modify `EVALS_DIR` in `backend/evals/config.py`. You can download the input screenshot dataset here: TODO.
- Set a stack (`STACK` var) in `backend/run_evals.py`
+- Set a stack and model (`STACK` var, `MODEL` var) in `backend/run_evals.py`
- Run `python backend/run_evals.py` - this runs the screenshot-to-code on the input dataset in parallel but it will still take a few minutes to complete.
+- Run `OPENAI_API_KEY=sk-... python run_evals.py` - this runs the screenshot-to-code on the input dataset in parallel but it will still take a few minutes to complete.
 - Once the script is done, you can find the outputs in `backend/evals_data/outputs`.
 ### Rating evals
--- a/README.md
+++ b/README.md
@ -82,7 +82,7 @@ The app will be up and running at http://localhost:5173. Note that you can't dev
 - **I'm running into an error when setting up the backend. How can I fix it?** [Try this](https://github.com/abi/screenshot-to-code/issues/3#issuecomment-1814777959). If that still doesn't work, open an issue.
 - **How do I get an OpenAI API key?** See https://github.com/abi/screenshot-to-code/blob/main/Troubleshooting.md
- **How can I configure an OpenAI proxy?** - you can configure the OpenAI base URL if you need to use a proxy: Set OPENAI_BASE_URL in the `backend/.env` or directly in the UI in the settings dialog
+- **How can I configure an OpenAI proxy?** - If you're not able to access the OpenAI API directly (due to e.g. country restrictions), you can try a VPN or you can configure the OpenAI base URL to use a proxy: Set OPENAI_BASE_URL in the `backend/.env` or directly in the UI in the settings dialog. Make sure the URL has "v1" in the path so it should look like this:  `https://xxx.xxxxx.xxx/v1`
 - **How can I update the backend host that my front-end connects to?** - Configure VITE_HTTP_BACKEND_URL and VITE_WS_BACKEND_URL in front/.env.local For example, set VITE_HTTP_BACKEND_URL=http://124.10.20.1:7001
 - **Seeing UTF-8 errors when running the backend?** - On windows, open the .env file with notepad++, then go to Encoding and select UTF-8. 
 - **How can I provide feedback?** For feedback, feature requests and bug reports, open an issue or ping me on [Twitter](https://twitter.com/_abi_).
--- a/Troubleshooting.md
+++ b/Troubleshooting.md
@ -11,7 +11,8 @@ You don't need a ChatGPT Pro account. Screenshot to code uses API keys from your
 5. Go to Settings > Limits and check at the bottom of the page, your current tier has to be "Tier 1" to have GPT4 access
 <img width="900" alt="285636973-da38bd4d-8a78-4904-8027-ca67d729b933" src="https://github.com/abi/screenshot-to-code/assets/23818/8d07cd84-0cf9-4f88-bc00-80eba492eadf">
-6. Go to Screenshot to code and paste it in the Settings dialog under OpenAI key (gear icon). Your key is only stored in your browser. Never stored on our servers.
+6. Navigate to OpenAI [api keys](https://platform.openai.com/api-keys) page and create and copy a new secret key.
 7. Go to Screenshot to code and paste it in the Settings dialog under OpenAI key (gear icon). Your key is only stored in your browser. Never stored on our servers.
 ## Still not working?
--- a/backend/evals/core.py
+++ b/backend/evals/core.py
@ -1,14 +1,12 @@
 import os
 from config import ANTHROPIC_API_KEY
-from llm import stream_claude_response, stream_openai_response
+from llm import Llm, stream_claude_response, stream_openai_response
 from prompts import assemble_prompt
 from prompts.types import Stack
-async def generate_code_core(image_url: str, stack: Stack) -> str:
+async def generate_code_core(image_url: str, stack: Stack, model: Llm) -> str:
    model = "CLAUDE"
    prompt_messages = assemble_prompt(image_url, stack)
    openai_api_key = os.environ.get("OPENAI_API_KEY")
    anthropic_api_key = ANTHROPIC_API_KEY
@ -17,7 +15,7 @@ async def generate_code_core(image_url: str, stack: Stack) -> str:
    async def process_chunk(content: str):
        pass
-    if model == "CLAUDE":
+    if model == Llm.CLAUDE_3_SONNET:
        if not anthropic_api_key:
            raise Exception("Anthropic API key not found")
@ -35,6 +33,7 @@ async def generate_code_core(image_url: str, stack: Stack) -> str:
            api_key=openai_api_key,
            base_url=openai_base_url,
            callback=lambda x: process_chunk(x),
            model=model,
        )
    return completion
--- a/backend/llm.py
+++ b/backend/llm.py
@ -13,18 +13,20 @@ from utils import pprint_prompt
 # Keep in sync with s2c-saas repo & DB column `llm_version`
 class Llm(Enum):
    GPT_4_VISION = "gpt-4-vision-preview"
    GPT_4_TURBO_2024_04_09 = "gpt-4-turbo-2024-04-09"
    CLAUDE_3_SONNET = "claude-3-sonnet-20240229"
    CLAUDE_3_OPUS = "claude-3-opus-20240229"
    CLAUDE_3_HAIKU = "claude-3-haiku-20240307"
-# Keep in sync with frontend (lib/models.ts)
+# Will throw errors if you send a garbage string
-# User-facing names for the models (for example, in the future, gpt_4_vision might
+def convert_frontend_str_to_llm(frontend_str: str) -> Llm:
-# be backed by a different model version)
+    if frontend_str == "gpt_4_vision":
-CODE_GENERATION_MODELS = [
+        return Llm.GPT_4_VISION
-    "gpt_4_vision",
+    elif frontend_str == "claude_3_sonnet":
-    "claude_3_sonnet",
+        return Llm.CLAUDE_3_SONNET
-]
+    else:
        return Llm(frontend_str)
 async def stream_openai_response(
@ -32,23 +34,22 @@ async def stream_openai_response(
    api_key: str,
    base_url: str | None,
    callback: Callable[[str], Awaitable[None]],
    model: Llm,
 ) -> str:
    client = AsyncOpenAI(api_key=api_key, base_url=base_url)
    model = Llm.GPT_4_VISION
    # Base parameters
    params = {
        "model": model.value,
        "messages": messages,
        "stream": True,
        "timeout": 600,
        "temperature": 0.0,
    }
-    # Add 'max_tokens' only if the model is a GPT4 vision model
+    # Add 'max_tokens' only if the model is a GPT4 vision or Turbo model
-    if model == Llm.GPT_4_VISION:
+    if model == Llm.GPT_4_VISION or model == Llm.GPT_4_TURBO_2024_04_09:
        params["max_tokens"] = 4096
        params["temperature"] = 0
    stream = await client.chat.completions.create(**params)  # type: ignore
    full_response = ""
--- a/backend/routes/generate_code.py
+++ b/backend/routes/generate_code.py
@ -5,8 +5,8 @@ import openai
 from config import ANTHROPIC_API_KEY, IS_PROD, SHOULD_MOCK_AI_RESPONSE
 from custom_types import InputMode
 from llm import (
    CODE_GENERATION_MODELS,
    Llm,
    convert_frontend_str_to_llm,
    stream_claude_response,
    stream_claude_response_native,
    stream_openai_response,
@ -84,10 +84,14 @@ async def stream_code(websocket: WebSocket):
    validated_input_mode = cast(InputMode, input_mode)
    # Read the model from the request. Fall back to default if not provided.
-    code_generation_model = params.get("codeGenerationModel", "gpt_4_vision")
+    code_generation_model_str = params.get(
-    if code_generation_model not in CODE_GENERATION_MODELS:
+        "codeGenerationModel", Llm.GPT_4_VISION.value
-        await throw_error(f"Invalid model: {code_generation_model}")
+    )
-        raise Exception(f"Invalid model: {code_generation_model}")
+    try:
        code_generation_model = convert_frontend_str_to_llm(code_generation_model_str)
    except:
        await throw_error(f"Invalid model: {code_generation_model_str}")
        raise Exception(f"Invalid model: {code_generation_model_str}")
    exact_llm_version = None
    print(
@ -139,8 +143,10 @@ async def stream_code(websocket: WebSocket):
        if openai_api_key:
            print("Using OpenAI API key from environment variable")
-    # If we still don't have an API key, throw an error
+    if not openai_api_key and (
-    if not openai_api_key and code_generation_model == "gpt_4_vision":
+        code_generation_model == Llm.GPT_4_VISION
        or code_generation_model == Llm.GPT_4_TURBO_2024_04_09
    ):
        print("OpenAI API key not found")
        await throw_error(
            "Please subscribe to a paid plan to generate code. If you are a subscriber and seeing this error, please contact support."
@ -264,7 +270,7 @@ async def stream_code(websocket: WebSocket):
                    include_thinking=True,
                )
                exact_llm_version = Llm.CLAUDE_3_OPUS
-            elif code_generation_model == "claude_3_sonnet":
+            elif code_generation_model == Llm.CLAUDE_3_SONNET:
                if not ANTHROPIC_API_KEY:
                    await throw_error(
                        "No Anthropic API key found. Please add the environment variable ANTHROPIC_API_KEY to backend/.env"
@ -283,15 +289,16 @@ async def stream_code(websocket: WebSocket):
                    api_key=ANTHROPIC_API_KEY,
                    callback=lambda x: process_chunk(x),
                )
-                exact_llm_version = Llm.CLAUDE_3_SONNET
+                exact_llm_version = code_generation_model
            else:
                completion = await stream_openai_response(
                    prompt_messages,  # type: ignore
                    api_key=openai_api_key,
                    base_url=openai_base_url,
                    callback=lambda x: process_chunk(x),
                    model=code_generation_model,
                )
-                exact_llm_version = Llm.GPT_4_VISION
+                exact_llm_version = code_generation_model
        except openai.AuthenticationError as e:
            print("[GENERATE_CODE] Authentication failed", e)
            error_message = (
--- a/backend/run_evals.py
+++ b/backend/run_evals.py
@ -1,6 +1,8 @@
 # Load environment variables first
 from dotenv import load_dotenv
 from llm import Llm
 load_dotenv()
 import os
@ -12,6 +14,7 @@ from evals.core import generate_code_core
 from evals.utils import image_to_data_url
 STACK = "html_tailwind"
 MODEL = Llm.CLAUDE_3_SONNET
 async def main():
@ -25,7 +28,7 @@ async def main():
    for filename in evals:
        filepath = os.path.join(INPUT_DIR, filename)
        data_url = await image_to_data_url(filepath)
-        task = generate_code_core(data_url, STACK)
+        task = generate_code_core(image_url=data_url, stack=STACK, model=MODEL)
        tasks.append(task)
    results = await asyncio.gather(*tasks)
--- a/backend/test_llm.py
+++ b/backend/test_llm.py
@ -0,0 +1,36 @@
 import unittest
 from llm import convert_frontend_str_to_llm, Llm
 class TestConvertFrontendStrToLlm(unittest.TestCase):
    def test_convert_valid_strings(self):
        self.assertEqual(
            convert_frontend_str_to_llm("gpt_4_vision"),
            Llm.GPT_4_VISION,
            "Should convert 'gpt_4_vision' to Llm.GPT_4_VISION",
        )
        self.assertEqual(
            convert_frontend_str_to_llm("claude_3_sonnet"),
            Llm.CLAUDE_3_SONNET,
            "Should convert 'claude_3_sonnet' to Llm.CLAUDE_3_SONNET",
        )
        self.assertEqual(
            convert_frontend_str_to_llm("claude-3-opus-20240229"),
            Llm.CLAUDE_3_OPUS,
            "Should convert 'claude-3-opus-20240229' to Llm.CLAUDE_3_OPUS",
        )
        self.assertEqual(
            convert_frontend_str_to_llm("gpt-4-turbo-2024-04-09"),
            Llm.GPT_4_TURBO_2024_04_09,
            "Should convert 'gpt-4-turbo-2024-04-09' to Llm.GPT_4_TURBO_2024_04_09",
        )
    def test_convert_invalid_string_raises_exception(self):
        with self.assertRaises(ValueError):
            convert_frontend_str_to_llm("invalid_string")
        with self.assertRaises(ValueError):
            convert_frontend_str_to_llm("another_invalid_string")
 if __name__ == "__main__":
    unittest.main()
--- a/frontend/src/lib/models.ts
+++ b/frontend/src/lib/models.ts
@ -1,9 +1,11 @@
 // Keep in sync with backend (llm.py)
 export enum CodeGenerationModel {
  GPT_4_VISION = "gpt_4_vision",
  GPT_4_TURBO_2024_04_09 = "gpt-4-turbo-2024-04-09",
  CLAUDE_3_SONNET = "claude_3_sonnet",
 }
 // Will generate a static error if a model in the enum above is not in the descriptions
 export const CODE_GENERATION_MODEL_DESCRIPTIONS: {
  [key in CodeGenerationModel]: {
    name: string;
@ -11,6 +13,15 @@ export const CODE_GENERATION_MODEL_DESCRIPTIONS: {
    isPaid: boolean;
  };
 } = {
-  gpt_4_vision: { name: "GPT-4 Vision", inBeta: false, isPaid: false },
+  gpt_4_vision: {
-  claude_3_sonnet: { name: "Claude 3 Sonnet", inBeta: true, isPaid: true },
+    name: "GPT-4 Vision (Nov 2023)",
    inBeta: false,
    isPaid: false,
  },
  claude_3_sonnet: { name: "Claude 3 Sonnet", inBeta: false, isPaid: false },
  "gpt-4-turbo-2024-04-09": {
    name: "GPT-4 Turbo (Apr 2024)",
    inBeta: false,
    isPaid: false,
  },
 };