diff --git a/Evaluation.md b/Evaluation.md index 5fd5da8..e937b78 100644 --- a/Evaluation.md +++ b/Evaluation.md @@ -5,8 +5,8 @@ Evaluation dataset consists of 16 screenshots. A Python script for running scree ### Running evals - Input screenshots should be located at `backend/evals_data/inputs` and the outputs will be `backend/evals_data/outputs`. If you want to modify this, modify `EVALS_DIR` in `backend/evals/config.py`. You can download the input screenshot dataset here: TODO. -- Set a stack (`STACK` var) in `backend/run_evals.py` -- Run `python backend/run_evals.py` - this runs the screenshot-to-code on the input dataset in parallel but it will still take a few minutes to complete. +- Set a stack and model (`STACK` var, `MODEL` var) in `backend/run_evals.py` +- Run `OPENAI_API_KEY=sk-... python run_evals.py` - this runs the screenshot-to-code on the input dataset in parallel but it will still take a few minutes to complete. - Once the script is done, you can find the outputs in `backend/evals_data/outputs`. ### Rating evals diff --git a/README.md b/README.md index a56c528..9e73dd9 100644 --- a/README.md +++ b/README.md @@ -82,7 +82,7 @@ The app will be up and running at http://localhost:5173. Note that you can't dev - **I'm running into an error when setting up the backend. How can I fix it?** [Try this](https://github.com/abi/screenshot-to-code/issues/3#issuecomment-1814777959). If that still doesn't work, open an issue. - **How do I get an OpenAI API key?** See https://github.com/abi/screenshot-to-code/blob/main/Troubleshooting.md -- **How can I configure an OpenAI proxy?** - you can configure the OpenAI base URL if you need to use a proxy: Set OPENAI_BASE_URL in the `backend/.env` or directly in the UI in the settings dialog +- **How can I configure an OpenAI proxy?** - If you're not able to access the OpenAI API directly (due to e.g. country restrictions), you can try a VPN or you can configure the OpenAI base URL to use a proxy: Set OPENAI_BASE_URL in the `backend/.env` or directly in the UI in the settings dialog. Make sure the URL has "v1" in the path so it should look like this: `https://xxx.xxxxx.xxx/v1` - **How can I update the backend host that my front-end connects to?** - Configure VITE_HTTP_BACKEND_URL and VITE_WS_BACKEND_URL in front/.env.local For example, set VITE_HTTP_BACKEND_URL=http://124.10.20.1:7001 - **Seeing UTF-8 errors when running the backend?** - On windows, open the .env file with notepad++, then go to Encoding and select UTF-8. - **How can I provide feedback?** For feedback, feature requests and bug reports, open an issue or ping me on [Twitter](https://twitter.com/_abi_). diff --git a/Troubleshooting.md b/Troubleshooting.md index 3891db3..89aa3ba 100644 --- a/Troubleshooting.md +++ b/Troubleshooting.md @@ -11,7 +11,8 @@ You don't need a ChatGPT Pro account. Screenshot to code uses API keys from your 5. Go to Settings > Limits and check at the bottom of the page, your current tier has to be "Tier 1" to have GPT4 access 285636973-da38bd4d-8a78-4904-8027-ca67d729b933 -6. Go to Screenshot to code and paste it in the Settings dialog under OpenAI key (gear icon). Your key is only stored in your browser. Never stored on our servers. +6. Navigate to OpenAI [api keys](https://platform.openai.com/api-keys) page and create and copy a new secret key. +7. Go to Screenshot to code and paste it in the Settings dialog under OpenAI key (gear icon). Your key is only stored in your browser. Never stored on our servers. ## Still not working? diff --git a/backend/evals/core.py b/backend/evals/core.py index 3438a7d..5e05362 100644 --- a/backend/evals/core.py +++ b/backend/evals/core.py @@ -1,14 +1,12 @@ import os from config import ANTHROPIC_API_KEY -from llm import stream_claude_response, stream_openai_response +from llm import Llm, stream_claude_response, stream_openai_response from prompts import assemble_prompt from prompts.types import Stack -async def generate_code_core(image_url: str, stack: Stack) -> str: - model = "CLAUDE" - +async def generate_code_core(image_url: str, stack: Stack, model: Llm) -> str: prompt_messages = assemble_prompt(image_url, stack) openai_api_key = os.environ.get("OPENAI_API_KEY") anthropic_api_key = ANTHROPIC_API_KEY @@ -17,7 +15,7 @@ async def generate_code_core(image_url: str, stack: Stack) -> str: async def process_chunk(content: str): pass - if model == "CLAUDE": + if model == Llm.CLAUDE_3_SONNET: if not anthropic_api_key: raise Exception("Anthropic API key not found") @@ -35,6 +33,7 @@ async def generate_code_core(image_url: str, stack: Stack) -> str: api_key=openai_api_key, base_url=openai_base_url, callback=lambda x: process_chunk(x), + model=model, ) return completion diff --git a/backend/llm.py b/backend/llm.py index 2a19e9b..35afac5 100644 --- a/backend/llm.py +++ b/backend/llm.py @@ -13,18 +13,20 @@ from utils import pprint_prompt # Keep in sync with s2c-saas repo & DB column `llm_version` class Llm(Enum): GPT_4_VISION = "gpt-4-vision-preview" + GPT_4_TURBO_2024_04_09 = "gpt-4-turbo-2024-04-09" CLAUDE_3_SONNET = "claude-3-sonnet-20240229" CLAUDE_3_OPUS = "claude-3-opus-20240229" CLAUDE_3_HAIKU = "claude-3-haiku-20240307" -# Keep in sync with frontend (lib/models.ts) -# User-facing names for the models (for example, in the future, gpt_4_vision might -# be backed by a different model version) -CODE_GENERATION_MODELS = [ - "gpt_4_vision", - "claude_3_sonnet", -] +# Will throw errors if you send a garbage string +def convert_frontend_str_to_llm(frontend_str: str) -> Llm: + if frontend_str == "gpt_4_vision": + return Llm.GPT_4_VISION + elif frontend_str == "claude_3_sonnet": + return Llm.CLAUDE_3_SONNET + else: + return Llm(frontend_str) async def stream_openai_response( @@ -32,23 +34,22 @@ async def stream_openai_response( api_key: str, base_url: str | None, callback: Callable[[str], Awaitable[None]], + model: Llm, ) -> str: client = AsyncOpenAI(api_key=api_key, base_url=base_url) - model = Llm.GPT_4_VISION - # Base parameters params = { "model": model.value, "messages": messages, "stream": True, "timeout": 600, + "temperature": 0.0, } - # Add 'max_tokens' only if the model is a GPT4 vision model - if model == Llm.GPT_4_VISION: + # Add 'max_tokens' only if the model is a GPT4 vision or Turbo model + if model == Llm.GPT_4_VISION or model == Llm.GPT_4_TURBO_2024_04_09: params["max_tokens"] = 4096 - params["temperature"] = 0 stream = await client.chat.completions.create(**params) # type: ignore full_response = "" diff --git a/backend/routes/generate_code.py b/backend/routes/generate_code.py index cbd9250..0189dfb 100644 --- a/backend/routes/generate_code.py +++ b/backend/routes/generate_code.py @@ -5,8 +5,8 @@ import openai from config import ANTHROPIC_API_KEY, IS_PROD, SHOULD_MOCK_AI_RESPONSE from custom_types import InputMode from llm import ( - CODE_GENERATION_MODELS, Llm, + convert_frontend_str_to_llm, stream_claude_response, stream_claude_response_native, stream_openai_response, @@ -84,10 +84,14 @@ async def stream_code(websocket: WebSocket): validated_input_mode = cast(InputMode, input_mode) # Read the model from the request. Fall back to default if not provided. - code_generation_model = params.get("codeGenerationModel", "gpt_4_vision") - if code_generation_model not in CODE_GENERATION_MODELS: - await throw_error(f"Invalid model: {code_generation_model}") - raise Exception(f"Invalid model: {code_generation_model}") + code_generation_model_str = params.get( + "codeGenerationModel", Llm.GPT_4_VISION.value + ) + try: + code_generation_model = convert_frontend_str_to_llm(code_generation_model_str) + except: + await throw_error(f"Invalid model: {code_generation_model_str}") + raise Exception(f"Invalid model: {code_generation_model_str}") exact_llm_version = None print( @@ -139,8 +143,10 @@ async def stream_code(websocket: WebSocket): if openai_api_key: print("Using OpenAI API key from environment variable") - # If we still don't have an API key, throw an error - if not openai_api_key and code_generation_model == "gpt_4_vision": + if not openai_api_key and ( + code_generation_model == Llm.GPT_4_VISION + or code_generation_model == Llm.GPT_4_TURBO_2024_04_09 + ): print("OpenAI API key not found") await throw_error( "Please subscribe to a paid plan to generate code. If you are a subscriber and seeing this error, please contact support." @@ -264,7 +270,7 @@ async def stream_code(websocket: WebSocket): include_thinking=True, ) exact_llm_version = Llm.CLAUDE_3_OPUS - elif code_generation_model == "claude_3_sonnet": + elif code_generation_model == Llm.CLAUDE_3_SONNET: if not ANTHROPIC_API_KEY: await throw_error( "No Anthropic API key found. Please add the environment variable ANTHROPIC_API_KEY to backend/.env" @@ -283,15 +289,16 @@ async def stream_code(websocket: WebSocket): api_key=ANTHROPIC_API_KEY, callback=lambda x: process_chunk(x), ) - exact_llm_version = Llm.CLAUDE_3_SONNET + exact_llm_version = code_generation_model else: completion = await stream_openai_response( prompt_messages, # type: ignore api_key=openai_api_key, base_url=openai_base_url, callback=lambda x: process_chunk(x), + model=code_generation_model, ) - exact_llm_version = Llm.GPT_4_VISION + exact_llm_version = code_generation_model except openai.AuthenticationError as e: print("[GENERATE_CODE] Authentication failed", e) error_message = ( diff --git a/backend/run_evals.py b/backend/run_evals.py index a5cfefb..f26c708 100644 --- a/backend/run_evals.py +++ b/backend/run_evals.py @@ -1,6 +1,8 @@ # Load environment variables first from dotenv import load_dotenv +from llm import Llm + load_dotenv() import os @@ -12,6 +14,7 @@ from evals.core import generate_code_core from evals.utils import image_to_data_url STACK = "html_tailwind" +MODEL = Llm.CLAUDE_3_SONNET async def main(): @@ -25,7 +28,7 @@ async def main(): for filename in evals: filepath = os.path.join(INPUT_DIR, filename) data_url = await image_to_data_url(filepath) - task = generate_code_core(data_url, STACK) + task = generate_code_core(image_url=data_url, stack=STACK, model=MODEL) tasks.append(task) results = await asyncio.gather(*tasks) diff --git a/backend/test_llm.py b/backend/test_llm.py new file mode 100644 index 0000000..ec005a3 --- /dev/null +++ b/backend/test_llm.py @@ -0,0 +1,36 @@ +import unittest +from llm import convert_frontend_str_to_llm, Llm + + +class TestConvertFrontendStrToLlm(unittest.TestCase): + def test_convert_valid_strings(self): + self.assertEqual( + convert_frontend_str_to_llm("gpt_4_vision"), + Llm.GPT_4_VISION, + "Should convert 'gpt_4_vision' to Llm.GPT_4_VISION", + ) + self.assertEqual( + convert_frontend_str_to_llm("claude_3_sonnet"), + Llm.CLAUDE_3_SONNET, + "Should convert 'claude_3_sonnet' to Llm.CLAUDE_3_SONNET", + ) + self.assertEqual( + convert_frontend_str_to_llm("claude-3-opus-20240229"), + Llm.CLAUDE_3_OPUS, + "Should convert 'claude-3-opus-20240229' to Llm.CLAUDE_3_OPUS", + ) + self.assertEqual( + convert_frontend_str_to_llm("gpt-4-turbo-2024-04-09"), + Llm.GPT_4_TURBO_2024_04_09, + "Should convert 'gpt-4-turbo-2024-04-09' to Llm.GPT_4_TURBO_2024_04_09", + ) + + def test_convert_invalid_string_raises_exception(self): + with self.assertRaises(ValueError): + convert_frontend_str_to_llm("invalid_string") + with self.assertRaises(ValueError): + convert_frontend_str_to_llm("another_invalid_string") + + +if __name__ == "__main__": + unittest.main() diff --git a/frontend/src/lib/models.ts b/frontend/src/lib/models.ts index c76f42a..02e0d8f 100644 --- a/frontend/src/lib/models.ts +++ b/frontend/src/lib/models.ts @@ -1,9 +1,11 @@ // Keep in sync with backend (llm.py) export enum CodeGenerationModel { GPT_4_VISION = "gpt_4_vision", + GPT_4_TURBO_2024_04_09 = "gpt-4-turbo-2024-04-09", CLAUDE_3_SONNET = "claude_3_sonnet", } +// Will generate a static error if a model in the enum above is not in the descriptions export const CODE_GENERATION_MODEL_DESCRIPTIONS: { [key in CodeGenerationModel]: { name: string; @@ -11,6 +13,15 @@ export const CODE_GENERATION_MODEL_DESCRIPTIONS: { isPaid: boolean; }; } = { - gpt_4_vision: { name: "GPT-4 Vision", inBeta: false, isPaid: false }, - claude_3_sonnet: { name: "Claude 3 Sonnet", inBeta: true, isPaid: true }, + gpt_4_vision: { + name: "GPT-4 Vision (Nov 2023)", + inBeta: false, + isPaid: false, + }, + claude_3_sonnet: { name: "Claude 3 Sonnet", inBeta: false, isPaid: false }, + "gpt-4-turbo-2024-04-09": { + name: "GPT-4 Turbo (Apr 2024)", + inBeta: false, + isPaid: false, + }, };