From f65fab073e6bdc6261a4965dccb102db4670e43b Mon Sep 17 00:00:00 2001 From: Abi Raja Date: Wed, 10 Apr 2024 16:51:36 -0400 Subject: [PATCH 1/4] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a56c528..9e73dd9 100644 --- a/README.md +++ b/README.md @@ -82,7 +82,7 @@ The app will be up and running at http://localhost:5173. Note that you can't dev - **I'm running into an error when setting up the backend. How can I fix it?** [Try this](https://github.com/abi/screenshot-to-code/issues/3#issuecomment-1814777959). If that still doesn't work, open an issue. - **How do I get an OpenAI API key?** See https://github.com/abi/screenshot-to-code/blob/main/Troubleshooting.md -- **How can I configure an OpenAI proxy?** - you can configure the OpenAI base URL if you need to use a proxy: Set OPENAI_BASE_URL in the `backend/.env` or directly in the UI in the settings dialog +- **How can I configure an OpenAI proxy?** - If you're not able to access the OpenAI API directly (due to e.g. country restrictions), you can try a VPN or you can configure the OpenAI base URL to use a proxy: Set OPENAI_BASE_URL in the `backend/.env` or directly in the UI in the settings dialog. Make sure the URL has "v1" in the path so it should look like this: `https://xxx.xxxxx.xxx/v1` - **How can I update the backend host that my front-end connects to?** - Configure VITE_HTTP_BACKEND_URL and VITE_WS_BACKEND_URL in front/.env.local For example, set VITE_HTTP_BACKEND_URL=http://124.10.20.1:7001 - **Seeing UTF-8 errors when running the backend?** - On windows, open the .env file with notepad++, then go to Encoding and select UTF-8. - **How can I provide feedback?** For feedback, feature requests and bug reports, open an issue or ping me on [Twitter](https://twitter.com/_abi_). From 6587b626c5138a55d45a6b558d390f49103c3718 Mon Sep 17 00:00:00 2001 From: Abi Raja Date: Thu, 11 Apr 2024 09:55:55 -0400 Subject: [PATCH 2/4] clean up model strings and add support for GPT-4 Turbo (Apr 2024) --- backend/llm.py | 25 ++++++++++++----------- backend/routes/generate_code.py | 26 +++++++++++++++--------- backend/test_llm.py | 36 +++++++++++++++++++++++++++++++++ frontend/src/lib/models.ts | 7 +++++-- 4 files changed, 71 insertions(+), 23 deletions(-) create mode 100644 backend/test_llm.py diff --git a/backend/llm.py b/backend/llm.py index 3c2c853..3d653b2 100644 --- a/backend/llm.py +++ b/backend/llm.py @@ -12,18 +12,20 @@ from utils import pprint_prompt # Actual model versions that are passed to the LLMs and stored in our logs class Llm(Enum): GPT_4_VISION = "gpt-4-vision-preview" + GPT_4_TURBO_2024_04_09 = "gpt-4-turbo-2024-04-09" CLAUDE_3_SONNET = "claude-3-sonnet-20240229" CLAUDE_3_OPUS = "claude-3-opus-20240229" CLAUDE_3_HAIKU = "claude-3-haiku-20240307" -# Keep in sync with frontend (lib/models.ts) -# User-facing names for the models (for example, in the future, gpt_4_vision might -# be backed by a different model version) -CODE_GENERATION_MODELS = [ - "gpt_4_vision", - "claude_3_sonnet", -] +# Will throw errors if you send a garbage string +def convert_frontend_str_to_llm(frontend_str: str) -> Llm: + if frontend_str == "gpt_4_vision": + return Llm.GPT_4_VISION + elif frontend_str == "claude_3_sonnet": + return Llm.CLAUDE_3_SONNET + else: + return Llm(frontend_str) async def stream_openai_response( @@ -31,23 +33,22 @@ async def stream_openai_response( api_key: str, base_url: str | None, callback: Callable[[str], Awaitable[None]], + model: Llm, ) -> str: client = AsyncOpenAI(api_key=api_key, base_url=base_url) - model = Llm.GPT_4_VISION - # Base parameters params = { "model": model.value, "messages": messages, "stream": True, "timeout": 600, + "temperature": 0.0, } - # Add 'max_tokens' only if the model is a GPT4 vision model - if model == Llm.GPT_4_VISION: + # Add 'max_tokens' only if the model is a GPT4 vision or Turbo model + if model == Llm.GPT_4_VISION or model == Llm.GPT_4_TURBO_2024_04_09: params["max_tokens"] = 4096 - params["temperature"] = 0 stream = await client.chat.completions.create(**params) # type: ignore full_response = "" diff --git a/backend/routes/generate_code.py b/backend/routes/generate_code.py index a7edb9b..fa5c7a5 100644 --- a/backend/routes/generate_code.py +++ b/backend/routes/generate_code.py @@ -5,8 +5,8 @@ import openai from config import ANTHROPIC_API_KEY, IS_PROD, SHOULD_MOCK_AI_RESPONSE from custom_types import InputMode from llm import ( - CODE_GENERATION_MODELS, Llm, + convert_frontend_str_to_llm, stream_claude_response, stream_claude_response_native, stream_openai_response, @@ -84,10 +84,14 @@ async def stream_code(websocket: WebSocket): validated_input_mode = cast(InputMode, input_mode) # Read the model from the request. Fall back to default if not provided. - code_generation_model = params.get("codeGenerationModel", "gpt_4_vision") - if code_generation_model not in CODE_GENERATION_MODELS: - await throw_error(f"Invalid model: {code_generation_model}") - raise Exception(f"Invalid model: {code_generation_model}") + code_generation_model_str = params.get( + "codeGenerationModel", Llm.GPT_4_VISION.value + ) + try: + code_generation_model = convert_frontend_str_to_llm(code_generation_model_str) + except: + await throw_error(f"Invalid model: {code_generation_model_str}") + raise Exception(f"Invalid model: {code_generation_model_str}") exact_llm_version = None print( @@ -105,7 +109,10 @@ async def stream_code(websocket: WebSocket): if openai_api_key: print("Using OpenAI API key from environment variable") - if not openai_api_key and code_generation_model == "gpt_4_vision": + if not openai_api_key and ( + code_generation_model == Llm.GPT_4_VISION + or code_generation_model == Llm.GPT_4_TURBO_2024_04_09 + ): print("OpenAI API key not found") await throw_error( "No OpenAI API key found. Please add your API key in the settings dialog or add it to backend/.env file. If you add it to .env, make sure to restart the backend server." @@ -226,7 +233,7 @@ async def stream_code(websocket: WebSocket): include_thinking=True, ) exact_llm_version = Llm.CLAUDE_3_OPUS - elif code_generation_model == "claude_3_sonnet": + elif code_generation_model == Llm.CLAUDE_3_SONNET: if not ANTHROPIC_API_KEY: await throw_error( "No Anthropic API key found. Please add the environment variable ANTHROPIC_API_KEY to backend/.env" @@ -238,15 +245,16 @@ async def stream_code(websocket: WebSocket): api_key=ANTHROPIC_API_KEY, callback=lambda x: process_chunk(x), ) - exact_llm_version = Llm.CLAUDE_3_SONNET + exact_llm_version = code_generation_model else: completion = await stream_openai_response( prompt_messages, # type: ignore api_key=openai_api_key, base_url=openai_base_url, callback=lambda x: process_chunk(x), + model=code_generation_model, ) - exact_llm_version = Llm.GPT_4_VISION + exact_llm_version = code_generation_model except openai.AuthenticationError as e: print("[GENERATE_CODE] Authentication failed", e) error_message = ( diff --git a/backend/test_llm.py b/backend/test_llm.py new file mode 100644 index 0000000..ec005a3 --- /dev/null +++ b/backend/test_llm.py @@ -0,0 +1,36 @@ +import unittest +from llm import convert_frontend_str_to_llm, Llm + + +class TestConvertFrontendStrToLlm(unittest.TestCase): + def test_convert_valid_strings(self): + self.assertEqual( + convert_frontend_str_to_llm("gpt_4_vision"), + Llm.GPT_4_VISION, + "Should convert 'gpt_4_vision' to Llm.GPT_4_VISION", + ) + self.assertEqual( + convert_frontend_str_to_llm("claude_3_sonnet"), + Llm.CLAUDE_3_SONNET, + "Should convert 'claude_3_sonnet' to Llm.CLAUDE_3_SONNET", + ) + self.assertEqual( + convert_frontend_str_to_llm("claude-3-opus-20240229"), + Llm.CLAUDE_3_OPUS, + "Should convert 'claude-3-opus-20240229' to Llm.CLAUDE_3_OPUS", + ) + self.assertEqual( + convert_frontend_str_to_llm("gpt-4-turbo-2024-04-09"), + Llm.GPT_4_TURBO_2024_04_09, + "Should convert 'gpt-4-turbo-2024-04-09' to Llm.GPT_4_TURBO_2024_04_09", + ) + + def test_convert_invalid_string_raises_exception(self): + with self.assertRaises(ValueError): + convert_frontend_str_to_llm("invalid_string") + with self.assertRaises(ValueError): + convert_frontend_str_to_llm("another_invalid_string") + + +if __name__ == "__main__": + unittest.main() diff --git a/frontend/src/lib/models.ts b/frontend/src/lib/models.ts index a972f78..58b3e31 100644 --- a/frontend/src/lib/models.ts +++ b/frontend/src/lib/models.ts @@ -1,12 +1,15 @@ // Keep in sync with backend (llm.py) export enum CodeGenerationModel { GPT_4_VISION = "gpt_4_vision", + GPT_4_TURBO_2024_04_09 = "gpt-4-turbo-2024-04-09", CLAUDE_3_SONNET = "claude_3_sonnet", } +// Will generate a static error if a model in the enum above is not in the descriptions export const CODE_GENERATION_MODEL_DESCRIPTIONS: { [key in CodeGenerationModel]: { name: string; inBeta: boolean }; } = { - gpt_4_vision: { name: "GPT-4 Vision", inBeta: false }, - claude_3_sonnet: { name: "Claude 3 Sonnet", inBeta: true }, + gpt_4_vision: { name: "GPT-4 Vision (Nov 2023)", inBeta: false }, + claude_3_sonnet: { name: "Claude 3 Sonnet", inBeta: false }, + "gpt-4-turbo-2024-04-09": { name: "GPT-4 Turbo (Apr 2024)", inBeta: false }, }; From bb642b320e212032a265db7f1b45e8030a9bf70f Mon Sep 17 00:00:00 2001 From: Abi Raja Date: Thu, 11 Apr 2024 10:52:25 -0400 Subject: [PATCH 3/4] improve evaluation docs and the way the model is passed into the evaluation script --- Evaluation.md | 4 ++-- backend/evals/core.py | 9 ++++----- backend/run_evals.py | 5 ++++- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/Evaluation.md b/Evaluation.md index 5fd5da8..e937b78 100644 --- a/Evaluation.md +++ b/Evaluation.md @@ -5,8 +5,8 @@ Evaluation dataset consists of 16 screenshots. A Python script for running scree ### Running evals - Input screenshots should be located at `backend/evals_data/inputs` and the outputs will be `backend/evals_data/outputs`. If you want to modify this, modify `EVALS_DIR` in `backend/evals/config.py`. You can download the input screenshot dataset here: TODO. -- Set a stack (`STACK` var) in `backend/run_evals.py` -- Run `python backend/run_evals.py` - this runs the screenshot-to-code on the input dataset in parallel but it will still take a few minutes to complete. +- Set a stack and model (`STACK` var, `MODEL` var) in `backend/run_evals.py` +- Run `OPENAI_API_KEY=sk-... python run_evals.py` - this runs the screenshot-to-code on the input dataset in parallel but it will still take a few minutes to complete. - Once the script is done, you can find the outputs in `backend/evals_data/outputs`. ### Rating evals diff --git a/backend/evals/core.py b/backend/evals/core.py index 3438a7d..5e05362 100644 --- a/backend/evals/core.py +++ b/backend/evals/core.py @@ -1,14 +1,12 @@ import os from config import ANTHROPIC_API_KEY -from llm import stream_claude_response, stream_openai_response +from llm import Llm, stream_claude_response, stream_openai_response from prompts import assemble_prompt from prompts.types import Stack -async def generate_code_core(image_url: str, stack: Stack) -> str: - model = "CLAUDE" - +async def generate_code_core(image_url: str, stack: Stack, model: Llm) -> str: prompt_messages = assemble_prompt(image_url, stack) openai_api_key = os.environ.get("OPENAI_API_KEY") anthropic_api_key = ANTHROPIC_API_KEY @@ -17,7 +15,7 @@ async def generate_code_core(image_url: str, stack: Stack) -> str: async def process_chunk(content: str): pass - if model == "CLAUDE": + if model == Llm.CLAUDE_3_SONNET: if not anthropic_api_key: raise Exception("Anthropic API key not found") @@ -35,6 +33,7 @@ async def generate_code_core(image_url: str, stack: Stack) -> str: api_key=openai_api_key, base_url=openai_base_url, callback=lambda x: process_chunk(x), + model=model, ) return completion diff --git a/backend/run_evals.py b/backend/run_evals.py index a5cfefb..f26c708 100644 --- a/backend/run_evals.py +++ b/backend/run_evals.py @@ -1,6 +1,8 @@ # Load environment variables first from dotenv import load_dotenv +from llm import Llm + load_dotenv() import os @@ -12,6 +14,7 @@ from evals.core import generate_code_core from evals.utils import image_to_data_url STACK = "html_tailwind" +MODEL = Llm.CLAUDE_3_SONNET async def main(): @@ -25,7 +28,7 @@ async def main(): for filename in evals: filepath = os.path.join(INPUT_DIR, filename) data_url = await image_to_data_url(filepath) - task = generate_code_core(data_url, STACK) + task = generate_code_core(image_url=data_url, stack=STACK, model=MODEL) tasks.append(task) results = await asyncio.gather(*tasks) From 2c4450db89811313b7ee64205103ba378b59fe2b Mon Sep 17 00:00:00 2001 From: Milton Date: Fri, 12 Apr 2024 00:47:51 -0300 Subject: [PATCH 4/4] Fix: step by step API key generation Fix step by step adding a reference to the specific page in which open ai API keys are generated --- Troubleshooting.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Troubleshooting.md b/Troubleshooting.md index 3891db3..89aa3ba 100644 --- a/Troubleshooting.md +++ b/Troubleshooting.md @@ -11,7 +11,8 @@ You don't need a ChatGPT Pro account. Screenshot to code uses API keys from your 5. Go to Settings > Limits and check at the bottom of the page, your current tier has to be "Tier 1" to have GPT4 access 285636973-da38bd4d-8a78-4904-8027-ca67d729b933 -6. Go to Screenshot to code and paste it in the Settings dialog under OpenAI key (gear icon). Your key is only stored in your browser. Never stored on our servers. +6. Navigate to OpenAI [api keys](https://platform.openai.com/api-keys) page and create and copy a new secret key. +7. Go to Screenshot to code and paste it in the Settings dialog under OpenAI key (gear icon). Your key is only stored in your browser. Never stored on our servers. ## Still not working?