diff --git a/README.md b/README.md
index 1762f91..8f01c7f 100644
--- a/README.md
+++ b/README.md
@@ -35,14 +35,18 @@ We also just added experimental support for taking a video/screen recording of a
-
-## 🚀 Try It Out without no install
+## 🚀 Hosted Version
[Try it live on the hosted version (paid)](https://screenshottocode.com).
## 🛠 Getting Started
-The app has a React/Vite frontend and a FastAPI backend. You will need an OpenAI API key with access to the GPT-4 Vision API or an Anthropic key if you want to use Claude Sonnet, or for experimental video support.
+The app has a React/Vite frontend and a FastAPI backend.
+
+Keys needed:
+
+* [OpenAI API key with access to GPT-4](https://github.com/abi/screenshot-to-code/blob/main/Troubleshooting.md)
+* Anthropic key (optional) - only if you want to use Claude Sonnet, or for experimental video support.
Run the backend (I use Poetry for package management - `pip install poetry` if you don't have it):
@@ -54,7 +58,7 @@ poetry shell
poetry run uvicorn main:app --reload --port 7001
```
-If you want to use Anthropic, add the `ANTHROPIC_API_KEY` to `backend/.env` with your API key from Anthropic.
+If you want to use Anthropic, add `ANTHROPIC_API_KEY` to `backend/.env`. You can also set up the keys using the settings dialog on the front-end (click the gear icon after loading the frontend).
Run the frontend:
@@ -113,5 +117,3 @@ https://github.com/abi/screenshot-to-code/assets/23818/3fec0f77-44e8-4fb3-a769-a
## 🌍 Hosted Version
🆕 [Try it here (paid)](https://screenshottocode.com). Or see [Getting Started](#-getting-started) for local install instructions to use with your own API keys.
-
-[](https://www.buymeacoffee.com/abiraja)
diff --git a/Troubleshooting.md b/Troubleshooting.md
index 89aa3ba..dbb1186 100644
--- a/Troubleshooting.md
+++ b/Troubleshooting.md
@@ -1,4 +1,4 @@
-### Getting an OpenAI API key with GPT4-Vision model access
+### Getting an OpenAI API key with GPT-4 model access
You don't need a ChatGPT Pro account. Screenshot to code uses API keys from your OpenAI developer account. In order to get access to the GPT4 Vision model, log into your OpenAI account and then, follow these instructions:
diff --git a/backend/evals/core.py b/backend/evals/core.py
index 5e05362..2fc0352 100644
--- a/backend/evals/core.py
+++ b/backend/evals/core.py
@@ -15,7 +15,7 @@ async def generate_code_core(image_url: str, stack: Stack, model: Llm) -> str:
async def process_chunk(content: str):
pass
- if model == Llm.CLAUDE_3_SONNET:
+ if model == Llm.CLAUDE_3_SONNET or model == Llm.CLAUDE_3_5_SONNET_2024_06_20:
if not anthropic_api_key:
raise Exception("Anthropic API key not found")
@@ -23,6 +23,7 @@ async def generate_code_core(image_url: str, stack: Stack, model: Llm) -> str:
prompt_messages,
api_key=anthropic_api_key,
callback=lambda x: process_chunk(x),
+ model=model,
)
else:
if not openai_api_key:
diff --git a/backend/image_generation.py b/backend/image_generation.py
index b93792c..e3f609f 100644
--- a/backend/image_generation.py
+++ b/backend/image_generation.py
@@ -5,7 +5,7 @@ from openai import AsyncOpenAI
from bs4 import BeautifulSoup
-async def process_tasks(prompts: List[str], api_key: str, base_url: str):
+async def process_tasks(prompts: List[str], api_key: str, base_url: str | None):
tasks = [generate_image(prompt, api_key, base_url) for prompt in prompts]
results = await asyncio.gather(*tasks, return_exceptions=True)
@@ -15,22 +15,23 @@ async def process_tasks(prompts: List[str], api_key: str, base_url: str):
print(f"An exception occurred: {result}")
processed_results.append(None)
else:
- processed_results.append(result) # type: ignore
+ processed_results.append(result)
return processed_results
-async def generate_image(prompt: str, api_key: str, base_url: str):
+async def generate_image(
+ prompt: str, api_key: str, base_url: str | None
+) -> Union[str, None]:
client = AsyncOpenAI(api_key=api_key, base_url=base_url)
- image_params: Dict[str, Union[str, int]] = {
- "model": "dall-e-3",
- "quality": "standard",
- "style": "natural",
- "n": 1,
- "size": "1024x1024",
- "prompt": prompt,
- }
- res = await client.images.generate(**image_params) # type: ignore
+ res = await client.images.generate(
+ model="dall-e-3",
+ quality="standard",
+ style="natural",
+ n=1,
+ size="1024x1024",
+ prompt=prompt,
+ )
await client.close()
return res.data[0].url
@@ -63,13 +64,13 @@ def create_alt_url_mapping(code: str) -> Dict[str, str]:
async def generate_images(
code: str, api_key: str, base_url: Union[str, None], image_cache: Dict[str, str]
-):
+) -> str:
# Find all images
soup = BeautifulSoup(code, "html.parser")
images = soup.find_all("img")
# Extract alt texts as image prompts
- alts = []
+ alts: List[str | None] = []
for img in images:
# Only include URL if the image starts with https://placehold.co
# and it's not already in the image_cache
@@ -77,26 +78,26 @@ async def generate_images(
img["src"].startswith("https://placehold.co")
and image_cache.get(img.get("alt")) is None
):
- alts.append(img.get("alt", None)) # type: ignore
+ alts.append(img.get("alt", None))
# Exclude images with no alt text
- alts = [alt for alt in alts if alt is not None] # type: ignore
+ filtered_alts: List[str] = [alt for alt in alts if alt is not None]
# Remove duplicates
- prompts = list(set(alts)) # type: ignore
+ prompts = list(set(filtered_alts))
# Return early if there are no images to replace
- if len(prompts) == 0: # type: ignore
+ if len(prompts) == 0:
return code
# Generate images
- results = await process_tasks(prompts, api_key, base_url) # type: ignore
+ results = await process_tasks(prompts, api_key, base_url)
# Create a dict mapping alt text to image URL
- mapped_image_urls = dict(zip(prompts, results)) # type: ignore
+ mapped_image_urls = dict(zip(prompts, results))
# Merge with image_cache
- mapped_image_urls = {**mapped_image_urls, **image_cache} # type: ignore
+ mapped_image_urls = {**mapped_image_urls, **image_cache}
# Replace old image URLs with the generated URLs
for img in images:
diff --git a/backend/llm.py b/backend/llm.py
index 83ae94c..1ef6c2f 100644
--- a/backend/llm.py
+++ b/backend/llm.py
@@ -18,6 +18,7 @@ class Llm(Enum):
CLAUDE_3_SONNET = "claude-3-sonnet-20240229"
CLAUDE_3_OPUS = "claude-3-opus-20240229"
CLAUDE_3_HAIKU = "claude-3-haiku-20240307"
+ CLAUDE_3_5_SONNET_2024_06_20 = "claude-3-5-sonnet-20240620"
# Will throw errors if you send a garbage string
@@ -60,9 +61,15 @@ async def stream_openai_response(
full_response = ""
async for chunk in stream: # type: ignore
assert isinstance(chunk, ChatCompletionChunk)
- content = chunk.choices[0].delta.content or ""
- full_response += content
- await callback(content)
+ if (
+ chunk.choices
+ and len(chunk.choices) > 0
+ and chunk.choices[0].delta
+ and chunk.choices[0].delta.content
+ ):
+ content = chunk.choices[0].delta.content or ""
+ full_response += content
+ await callback(content)
await client.close()
@@ -74,12 +81,12 @@ async def stream_claude_response(
messages: List[ChatCompletionMessageParam],
api_key: str,
callback: Callable[[str], Awaitable[None]],
+ model: Llm,
) -> str:
client = AsyncAnthropic(api_key=api_key)
# Base parameters
- model = Llm.CLAUDE_3_SONNET
max_tokens = 4096
temperature = 0.0
diff --git a/backend/routes/generate_code.py b/backend/routes/generate_code.py
index f77fcc7..cb6a549 100644
--- a/backend/routes/generate_code.py
+++ b/backend/routes/generate_code.py
@@ -14,7 +14,7 @@ from llm import (
)
from openai.types.chat import ChatCompletionMessageParam
from mock_llm import mock_completion
-from typing import Dict, List, cast, get_args
+from typing import Dict, List, Union, cast, get_args
from image_generation import create_alt_url_mapping, generate_images
from prompts import assemble_imported_code_prompt, assemble_prompt
from datetime import datetime
@@ -24,7 +24,7 @@ from routes.saas_utils import does_user_have_subscription_credits
from prompts.claude_prompts import VIDEO_PROMPT
from prompts.types import Stack
-from utils import pprint_prompt
+# from utils import pprint_prompt
from video.utils import extract_tag_content, assemble_claude_prompt_video
from ws.constants import APP_ERROR_WEB_SOCKET_CODE # type: ignore
@@ -161,8 +161,19 @@ async def stream_code(websocket: WebSocket):
)
raise Exception("No OpenAI API key found")
+ # Get the Anthropic API key from the request. Fall back to environment variable if not provided.
+ # If neither is provided, we throw an error later only if Claude is used.
+ anthropic_api_key = None
+ if "anthropicApiKey" in params and params["anthropicApiKey"]:
+ anthropic_api_key = params["anthropicApiKey"]
+ print("Using Anthropic API key from client-side settings dialog")
+ else:
+ anthropic_api_key = ANTHROPIC_API_KEY
+ if anthropic_api_key:
+ print("Using Anthropic API key from environment variable")
+
# Get the OpenAI Base URL from the request. Fall back to environment variable if not provided.
- openai_base_url = None
+ openai_base_url: Union[str, None] = None
# Disable user-specified OpenAI Base URL in prod
if not os.environ.get("IS_PROD"):
if "openAiBaseURL" in params and params["openAiBaseURL"]:
@@ -255,7 +266,7 @@ async def stream_code(websocket: WebSocket):
video_data_url = params["image"]
prompt_messages = await assemble_claude_prompt_video(video_data_url)
- pprint_prompt(prompt_messages) # type: ignore
+ # pprint_prompt(prompt_messages) # type: ignore
if SHOULD_MOCK_AI_RESPONSE:
completion = await mock_completion(
@@ -267,25 +278,28 @@ async def stream_code(websocket: WebSocket):
if IS_PROD:
raise Exception("Video mode is not supported in prod")
- if not ANTHROPIC_API_KEY:
+ if not anthropic_api_key:
await throw_error(
- "Video only works with Anthropic models. No Anthropic API key found. Please add the environment variable ANTHROPIC_API_KEY to backend/.env"
+ "Video only works with Anthropic models. No Anthropic API key found. Please add the environment variable ANTHROPIC_API_KEY to backend/.env or in the settings dialog"
)
raise Exception("No Anthropic key")
completion = await stream_claude_response_native(
system_prompt=VIDEO_PROMPT,
messages=prompt_messages, # type: ignore
- api_key=ANTHROPIC_API_KEY,
+ api_key=anthropic_api_key,
callback=lambda x: process_chunk(x),
model=Llm.CLAUDE_3_OPUS,
include_thinking=True,
)
exact_llm_version = Llm.CLAUDE_3_OPUS
- elif code_generation_model == Llm.CLAUDE_3_SONNET:
- if not ANTHROPIC_API_KEY:
+ elif (
+ code_generation_model == Llm.CLAUDE_3_SONNET
+ or code_generation_model == Llm.CLAUDE_3_5_SONNET_2024_06_20
+ ):
+ if not anthropic_api_key:
await throw_error(
- "No Anthropic API key found. Please add the environment variable ANTHROPIC_API_KEY to backend/.env"
+ "No Anthropic API key found. Please add the environment variable ANTHROPIC_API_KEY to backend/.env or in the settings dialog"
)
raise Exception("No Anthropic key")
@@ -298,8 +312,9 @@ async def stream_code(websocket: WebSocket):
completion = await stream_claude_response(
prompt_messages, # type: ignore
- api_key=ANTHROPIC_API_KEY,
+ api_key=anthropic_api_key,
callback=lambda x: process_chunk(x),
+ model=code_generation_model,
)
exact_llm_version = code_generation_model
else:
diff --git a/frontend/Dockerfile b/frontend/Dockerfile
index b176926..8579e17 100644
--- a/frontend/Dockerfile
+++ b/frontend/Dockerfile
@@ -1,4 +1,4 @@
-FROM node:20.9-bullseye-slim
+FROM node:22-bullseye-slim
# Set the working directory in the container
WORKDIR /app
@@ -6,6 +6,9 @@ WORKDIR /app
# Copy package.json and yarn.lock
COPY package.json yarn.lock /app/
+# Set the environment variable to skip Puppeteer download
+ENV PUPPETEER_SKIP_DOWNLOAD=true
+
# Install dependencies
RUN yarn install
diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx
index d7f8258..2e1d177 100644
--- a/frontend/src/App.tsx
+++ b/frontend/src/App.tsx
@@ -79,6 +79,7 @@ function App({ navbarComponent }: Props) {
{
openAiApiKey: null,
openAiBaseURL: null,
+ anthropicApiKey: null,
screenshotOneApiKey: null,
isImageGenerationEnabled: true,
editorTheme: EditorTheme.COBALT,
@@ -109,8 +110,10 @@ function App({ navbarComponent }: Props) {
CodeGenerationModel.GPT_4_TURBO_2024_04_09 &&
settings.generatedCodeConfig === Stack.REACT_TAILWIND;
- const showGpt4OMessage =
+ const showBetterModelMessage =
selectedCodeGenerationModel !== CodeGenerationModel.GPT_4O_2024_05_13 &&
+ selectedCodeGenerationModel !==
+ CodeGenerationModel.CLAUDE_3_5_SONNET_2024_06_20 &&
appState === AppState.INITIAL;
const showFeedbackCallNote =
@@ -471,11 +474,11 @@ function App({ navbarComponent }: Props) {
)}
- {showGpt4OMessage && (
+ {showBetterModelMessage && (
- Now supporting GPT-4o. Higher quality and 2x faster. Give it a - try! + Now supporting GPT-4o and Claude Sonnet 3.5. Higher quality and + 2x faster. Give it a try!