diff --git a/README.md b/README.md index 1762f91..8f01c7f 100644 --- a/README.md +++ b/README.md @@ -35,14 +35,18 @@ We also just added experimental support for taking a video/screen recording of a - -## 🚀 Try It Out without no install +## 🚀 Hosted Version [Try it live on the hosted version (paid)](https://screenshottocode.com). ## 🛠 Getting Started -The app has a React/Vite frontend and a FastAPI backend. You will need an OpenAI API key with access to the GPT-4 Vision API or an Anthropic key if you want to use Claude Sonnet, or for experimental video support. +The app has a React/Vite frontend and a FastAPI backend. + +Keys needed: + +* [OpenAI API key with access to GPT-4](https://github.com/abi/screenshot-to-code/blob/main/Troubleshooting.md) +* Anthropic key (optional) - only if you want to use Claude Sonnet, or for experimental video support. Run the backend (I use Poetry for package management - `pip install poetry` if you don't have it): @@ -54,7 +58,7 @@ poetry shell poetry run uvicorn main:app --reload --port 7001 ``` -If you want to use Anthropic, add the `ANTHROPIC_API_KEY` to `backend/.env` with your API key from Anthropic. +If you want to use Anthropic, add `ANTHROPIC_API_KEY` to `backend/.env`. You can also set up the keys using the settings dialog on the front-end (click the gear icon after loading the frontend). Run the frontend: @@ -113,5 +117,3 @@ https://github.com/abi/screenshot-to-code/assets/23818/3fec0f77-44e8-4fb3-a769-a ## 🌍 Hosted Version 🆕 [Try it here (paid)](https://screenshottocode.com). Or see [Getting Started](#-getting-started) for local install instructions to use with your own API keys. - -[!["Buy Me A Coffee"](https://www.buymeacoffee.com/assets/img/custom_images/orange_img.png)](https://www.buymeacoffee.com/abiraja) diff --git a/Troubleshooting.md b/Troubleshooting.md index 89aa3ba..dbb1186 100644 --- a/Troubleshooting.md +++ b/Troubleshooting.md @@ -1,4 +1,4 @@ -### Getting an OpenAI API key with GPT4-Vision model access +### Getting an OpenAI API key with GPT-4 model access You don't need a ChatGPT Pro account. Screenshot to code uses API keys from your OpenAI developer account. In order to get access to the GPT4 Vision model, log into your OpenAI account and then, follow these instructions: diff --git a/backend/evals/core.py b/backend/evals/core.py index 5e05362..2fc0352 100644 --- a/backend/evals/core.py +++ b/backend/evals/core.py @@ -15,7 +15,7 @@ async def generate_code_core(image_url: str, stack: Stack, model: Llm) -> str: async def process_chunk(content: str): pass - if model == Llm.CLAUDE_3_SONNET: + if model == Llm.CLAUDE_3_SONNET or model == Llm.CLAUDE_3_5_SONNET_2024_06_20: if not anthropic_api_key: raise Exception("Anthropic API key not found") @@ -23,6 +23,7 @@ async def generate_code_core(image_url: str, stack: Stack, model: Llm) -> str: prompt_messages, api_key=anthropic_api_key, callback=lambda x: process_chunk(x), + model=model, ) else: if not openai_api_key: diff --git a/backend/image_generation.py b/backend/image_generation.py index b93792c..e3f609f 100644 --- a/backend/image_generation.py +++ b/backend/image_generation.py @@ -5,7 +5,7 @@ from openai import AsyncOpenAI from bs4 import BeautifulSoup -async def process_tasks(prompts: List[str], api_key: str, base_url: str): +async def process_tasks(prompts: List[str], api_key: str, base_url: str | None): tasks = [generate_image(prompt, api_key, base_url) for prompt in prompts] results = await asyncio.gather(*tasks, return_exceptions=True) @@ -15,22 +15,23 @@ async def process_tasks(prompts: List[str], api_key: str, base_url: str): print(f"An exception occurred: {result}") processed_results.append(None) else: - processed_results.append(result) # type: ignore + processed_results.append(result) return processed_results -async def generate_image(prompt: str, api_key: str, base_url: str): +async def generate_image( + prompt: str, api_key: str, base_url: str | None +) -> Union[str, None]: client = AsyncOpenAI(api_key=api_key, base_url=base_url) - image_params: Dict[str, Union[str, int]] = { - "model": "dall-e-3", - "quality": "standard", - "style": "natural", - "n": 1, - "size": "1024x1024", - "prompt": prompt, - } - res = await client.images.generate(**image_params) # type: ignore + res = await client.images.generate( + model="dall-e-3", + quality="standard", + style="natural", + n=1, + size="1024x1024", + prompt=prompt, + ) await client.close() return res.data[0].url @@ -63,13 +64,13 @@ def create_alt_url_mapping(code: str) -> Dict[str, str]: async def generate_images( code: str, api_key: str, base_url: Union[str, None], image_cache: Dict[str, str] -): +) -> str: # Find all images soup = BeautifulSoup(code, "html.parser") images = soup.find_all("img") # Extract alt texts as image prompts - alts = [] + alts: List[str | None] = [] for img in images: # Only include URL if the image starts with https://placehold.co # and it's not already in the image_cache @@ -77,26 +78,26 @@ async def generate_images( img["src"].startswith("https://placehold.co") and image_cache.get(img.get("alt")) is None ): - alts.append(img.get("alt", None)) # type: ignore + alts.append(img.get("alt", None)) # Exclude images with no alt text - alts = [alt for alt in alts if alt is not None] # type: ignore + filtered_alts: List[str] = [alt for alt in alts if alt is not None] # Remove duplicates - prompts = list(set(alts)) # type: ignore + prompts = list(set(filtered_alts)) # Return early if there are no images to replace - if len(prompts) == 0: # type: ignore + if len(prompts) == 0: return code # Generate images - results = await process_tasks(prompts, api_key, base_url) # type: ignore + results = await process_tasks(prompts, api_key, base_url) # Create a dict mapping alt text to image URL - mapped_image_urls = dict(zip(prompts, results)) # type: ignore + mapped_image_urls = dict(zip(prompts, results)) # Merge with image_cache - mapped_image_urls = {**mapped_image_urls, **image_cache} # type: ignore + mapped_image_urls = {**mapped_image_urls, **image_cache} # Replace old image URLs with the generated URLs for img in images: diff --git a/backend/llm.py b/backend/llm.py index 83ae94c..1ef6c2f 100644 --- a/backend/llm.py +++ b/backend/llm.py @@ -18,6 +18,7 @@ class Llm(Enum): CLAUDE_3_SONNET = "claude-3-sonnet-20240229" CLAUDE_3_OPUS = "claude-3-opus-20240229" CLAUDE_3_HAIKU = "claude-3-haiku-20240307" + CLAUDE_3_5_SONNET_2024_06_20 = "claude-3-5-sonnet-20240620" # Will throw errors if you send a garbage string @@ -60,9 +61,15 @@ async def stream_openai_response( full_response = "" async for chunk in stream: # type: ignore assert isinstance(chunk, ChatCompletionChunk) - content = chunk.choices[0].delta.content or "" - full_response += content - await callback(content) + if ( + chunk.choices + and len(chunk.choices) > 0 + and chunk.choices[0].delta + and chunk.choices[0].delta.content + ): + content = chunk.choices[0].delta.content or "" + full_response += content + await callback(content) await client.close() @@ -74,12 +81,12 @@ async def stream_claude_response( messages: List[ChatCompletionMessageParam], api_key: str, callback: Callable[[str], Awaitable[None]], + model: Llm, ) -> str: client = AsyncAnthropic(api_key=api_key) # Base parameters - model = Llm.CLAUDE_3_SONNET max_tokens = 4096 temperature = 0.0 diff --git a/backend/routes/generate_code.py b/backend/routes/generate_code.py index f77fcc7..cb6a549 100644 --- a/backend/routes/generate_code.py +++ b/backend/routes/generate_code.py @@ -14,7 +14,7 @@ from llm import ( ) from openai.types.chat import ChatCompletionMessageParam from mock_llm import mock_completion -from typing import Dict, List, cast, get_args +from typing import Dict, List, Union, cast, get_args from image_generation import create_alt_url_mapping, generate_images from prompts import assemble_imported_code_prompt, assemble_prompt from datetime import datetime @@ -24,7 +24,7 @@ from routes.saas_utils import does_user_have_subscription_credits from prompts.claude_prompts import VIDEO_PROMPT from prompts.types import Stack -from utils import pprint_prompt +# from utils import pprint_prompt from video.utils import extract_tag_content, assemble_claude_prompt_video from ws.constants import APP_ERROR_WEB_SOCKET_CODE # type: ignore @@ -161,8 +161,19 @@ async def stream_code(websocket: WebSocket): ) raise Exception("No OpenAI API key found") + # Get the Anthropic API key from the request. Fall back to environment variable if not provided. + # If neither is provided, we throw an error later only if Claude is used. + anthropic_api_key = None + if "anthropicApiKey" in params and params["anthropicApiKey"]: + anthropic_api_key = params["anthropicApiKey"] + print("Using Anthropic API key from client-side settings dialog") + else: + anthropic_api_key = ANTHROPIC_API_KEY + if anthropic_api_key: + print("Using Anthropic API key from environment variable") + # Get the OpenAI Base URL from the request. Fall back to environment variable if not provided. - openai_base_url = None + openai_base_url: Union[str, None] = None # Disable user-specified OpenAI Base URL in prod if not os.environ.get("IS_PROD"): if "openAiBaseURL" in params and params["openAiBaseURL"]: @@ -255,7 +266,7 @@ async def stream_code(websocket: WebSocket): video_data_url = params["image"] prompt_messages = await assemble_claude_prompt_video(video_data_url) - pprint_prompt(prompt_messages) # type: ignore + # pprint_prompt(prompt_messages) # type: ignore if SHOULD_MOCK_AI_RESPONSE: completion = await mock_completion( @@ -267,25 +278,28 @@ async def stream_code(websocket: WebSocket): if IS_PROD: raise Exception("Video mode is not supported in prod") - if not ANTHROPIC_API_KEY: + if not anthropic_api_key: await throw_error( - "Video only works with Anthropic models. No Anthropic API key found. Please add the environment variable ANTHROPIC_API_KEY to backend/.env" + "Video only works with Anthropic models. No Anthropic API key found. Please add the environment variable ANTHROPIC_API_KEY to backend/.env or in the settings dialog" ) raise Exception("No Anthropic key") completion = await stream_claude_response_native( system_prompt=VIDEO_PROMPT, messages=prompt_messages, # type: ignore - api_key=ANTHROPIC_API_KEY, + api_key=anthropic_api_key, callback=lambda x: process_chunk(x), model=Llm.CLAUDE_3_OPUS, include_thinking=True, ) exact_llm_version = Llm.CLAUDE_3_OPUS - elif code_generation_model == Llm.CLAUDE_3_SONNET: - if not ANTHROPIC_API_KEY: + elif ( + code_generation_model == Llm.CLAUDE_3_SONNET + or code_generation_model == Llm.CLAUDE_3_5_SONNET_2024_06_20 + ): + if not anthropic_api_key: await throw_error( - "No Anthropic API key found. Please add the environment variable ANTHROPIC_API_KEY to backend/.env" + "No Anthropic API key found. Please add the environment variable ANTHROPIC_API_KEY to backend/.env or in the settings dialog" ) raise Exception("No Anthropic key") @@ -298,8 +312,9 @@ async def stream_code(websocket: WebSocket): completion = await stream_claude_response( prompt_messages, # type: ignore - api_key=ANTHROPIC_API_KEY, + api_key=anthropic_api_key, callback=lambda x: process_chunk(x), + model=code_generation_model, ) exact_llm_version = code_generation_model else: diff --git a/frontend/Dockerfile b/frontend/Dockerfile index b176926..8579e17 100644 --- a/frontend/Dockerfile +++ b/frontend/Dockerfile @@ -1,4 +1,4 @@ -FROM node:20.9-bullseye-slim +FROM node:22-bullseye-slim # Set the working directory in the container WORKDIR /app @@ -6,6 +6,9 @@ WORKDIR /app # Copy package.json and yarn.lock COPY package.json yarn.lock /app/ +# Set the environment variable to skip Puppeteer download +ENV PUPPETEER_SKIP_DOWNLOAD=true + # Install dependencies RUN yarn install diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index d7f8258..2e1d177 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -79,6 +79,7 @@ function App({ navbarComponent }: Props) { { openAiApiKey: null, openAiBaseURL: null, + anthropicApiKey: null, screenshotOneApiKey: null, isImageGenerationEnabled: true, editorTheme: EditorTheme.COBALT, @@ -109,8 +110,10 @@ function App({ navbarComponent }: Props) { CodeGenerationModel.GPT_4_TURBO_2024_04_09 && settings.generatedCodeConfig === Stack.REACT_TAILWIND; - const showGpt4OMessage = + const showBetterModelMessage = selectedCodeGenerationModel !== CodeGenerationModel.GPT_4O_2024_05_13 && + selectedCodeGenerationModel !== + CodeGenerationModel.CLAUDE_3_5_SONNET_2024_06_20 && appState === AppState.INITIAL; const showFeedbackCallNote = @@ -471,11 +474,11 @@ function App({ navbarComponent }: Props) { )} - {showGpt4OMessage && ( + {showBetterModelMessage && (

- Now supporting GPT-4o. Higher quality and 2x faster. Give it a - try! + Now supporting GPT-4o and Claude Sonnet 3.5. Higher quality and + 2x faster. Give it a try!

)} diff --git a/frontend/src/components/SettingsDialog.tsx b/frontend/src/components/SettingsDialog.tsx index 2e7814b..97d8f38 100644 --- a/frontend/src/components/SettingsDialog.tsx +++ b/frontend/src/components/SettingsDialog.tsx @@ -49,7 +49,7 @@ function SettingsDialog({ settings, setSettings }: Props) {