Merge branch 'main' into hosted

This commit is contained in:
Abi Raja 2024-06-22 19:43:52 +08:00
commit d2369cb0a0
13 changed files with 135 additions and 76 deletions

View File

@ -35,14 +35,18 @@ We also just added experimental support for taking a video/screen recording of a
<a href="https://konghq.com/products/kong-konnect/register?utm_medium=referral&utm_source=github&utm_campaign=platform&utm_content=screenshot-to-code" target="_blank" title="Kong - powering the API world"><img src="https://picoapps.xyz/s2c-sponsors/Kong-GitHub-240x100.png"></a>
## 🚀 Try It Out without no install
## 🚀 Hosted Version
[Try it live on the hosted version (paid)](https://screenshottocode.com).
## 🛠 Getting Started
The app has a React/Vite frontend and a FastAPI backend. You will need an OpenAI API key with access to the GPT-4 Vision API or an Anthropic key if you want to use Claude Sonnet, or for experimental video support.
The app has a React/Vite frontend and a FastAPI backend.
Keys needed:
* [OpenAI API key with access to GPT-4](https://github.com/abi/screenshot-to-code/blob/main/Troubleshooting.md)
* Anthropic key (optional) - only if you want to use Claude Sonnet, or for experimental video support.
Run the backend (I use Poetry for package management - `pip install poetry` if you don't have it):
@ -54,7 +58,7 @@ poetry shell
poetry run uvicorn main:app --reload --port 7001
```
If you want to use Anthropic, add the `ANTHROPIC_API_KEY` to `backend/.env` with your API key from Anthropic.
If you want to use Anthropic, add `ANTHROPIC_API_KEY` to `backend/.env`. You can also set up the keys using the settings dialog on the front-end (click the gear icon after loading the frontend).
Run the frontend:
@ -113,5 +117,3 @@ https://github.com/abi/screenshot-to-code/assets/23818/3fec0f77-44e8-4fb3-a769-a
## 🌍 Hosted Version
🆕 [Try it here (paid)](https://screenshottocode.com). Or see [Getting Started](#-getting-started) for local install instructions to use with your own API keys.
[!["Buy Me A Coffee"](https://www.buymeacoffee.com/assets/img/custom_images/orange_img.png)](https://www.buymeacoffee.com/abiraja)

View File

@ -1,4 +1,4 @@
### Getting an OpenAI API key with GPT4-Vision model access
### Getting an OpenAI API key with GPT-4 model access
You don't need a ChatGPT Pro account. Screenshot to code uses API keys from your OpenAI developer account. In order to get access to the GPT4 Vision model, log into your OpenAI account and then, follow these instructions:

View File

@ -15,7 +15,7 @@ async def generate_code_core(image_url: str, stack: Stack, model: Llm) -> str:
async def process_chunk(content: str):
pass
if model == Llm.CLAUDE_3_SONNET:
if model == Llm.CLAUDE_3_SONNET or model == Llm.CLAUDE_3_5_SONNET_2024_06_20:
if not anthropic_api_key:
raise Exception("Anthropic API key not found")
@ -23,6 +23,7 @@ async def generate_code_core(image_url: str, stack: Stack, model: Llm) -> str:
prompt_messages,
api_key=anthropic_api_key,
callback=lambda x: process_chunk(x),
model=model,
)
else:
if not openai_api_key:

View File

@ -5,7 +5,7 @@ from openai import AsyncOpenAI
from bs4 import BeautifulSoup
async def process_tasks(prompts: List[str], api_key: str, base_url: str):
async def process_tasks(prompts: List[str], api_key: str, base_url: str | None):
tasks = [generate_image(prompt, api_key, base_url) for prompt in prompts]
results = await asyncio.gather(*tasks, return_exceptions=True)
@ -15,22 +15,23 @@ async def process_tasks(prompts: List[str], api_key: str, base_url: str):
print(f"An exception occurred: {result}")
processed_results.append(None)
else:
processed_results.append(result) # type: ignore
processed_results.append(result)
return processed_results
async def generate_image(prompt: str, api_key: str, base_url: str):
async def generate_image(
prompt: str, api_key: str, base_url: str | None
) -> Union[str, None]:
client = AsyncOpenAI(api_key=api_key, base_url=base_url)
image_params: Dict[str, Union[str, int]] = {
"model": "dall-e-3",
"quality": "standard",
"style": "natural",
"n": 1,
"size": "1024x1024",
"prompt": prompt,
}
res = await client.images.generate(**image_params) # type: ignore
res = await client.images.generate(
model="dall-e-3",
quality="standard",
style="natural",
n=1,
size="1024x1024",
prompt=prompt,
)
await client.close()
return res.data[0].url
@ -63,13 +64,13 @@ def create_alt_url_mapping(code: str) -> Dict[str, str]:
async def generate_images(
code: str, api_key: str, base_url: Union[str, None], image_cache: Dict[str, str]
):
) -> str:
# Find all images
soup = BeautifulSoup(code, "html.parser")
images = soup.find_all("img")
# Extract alt texts as image prompts
alts = []
alts: List[str | None] = []
for img in images:
# Only include URL if the image starts with https://placehold.co
# and it's not already in the image_cache
@ -77,26 +78,26 @@ async def generate_images(
img["src"].startswith("https://placehold.co")
and image_cache.get(img.get("alt")) is None
):
alts.append(img.get("alt", None)) # type: ignore
alts.append(img.get("alt", None))
# Exclude images with no alt text
alts = [alt for alt in alts if alt is not None] # type: ignore
filtered_alts: List[str] = [alt for alt in alts if alt is not None]
# Remove duplicates
prompts = list(set(alts)) # type: ignore
prompts = list(set(filtered_alts))
# Return early if there are no images to replace
if len(prompts) == 0: # type: ignore
if len(prompts) == 0:
return code
# Generate images
results = await process_tasks(prompts, api_key, base_url) # type: ignore
results = await process_tasks(prompts, api_key, base_url)
# Create a dict mapping alt text to image URL
mapped_image_urls = dict(zip(prompts, results)) # type: ignore
mapped_image_urls = dict(zip(prompts, results))
# Merge with image_cache
mapped_image_urls = {**mapped_image_urls, **image_cache} # type: ignore
mapped_image_urls = {**mapped_image_urls, **image_cache}
# Replace old image URLs with the generated URLs
for img in images:

View File

@ -18,6 +18,7 @@ class Llm(Enum):
CLAUDE_3_SONNET = "claude-3-sonnet-20240229"
CLAUDE_3_OPUS = "claude-3-opus-20240229"
CLAUDE_3_HAIKU = "claude-3-haiku-20240307"
CLAUDE_3_5_SONNET_2024_06_20 = "claude-3-5-sonnet-20240620"
# Will throw errors if you send a garbage string
@ -60,9 +61,15 @@ async def stream_openai_response(
full_response = ""
async for chunk in stream: # type: ignore
assert isinstance(chunk, ChatCompletionChunk)
content = chunk.choices[0].delta.content or ""
full_response += content
await callback(content)
if (
chunk.choices
and len(chunk.choices) > 0
and chunk.choices[0].delta
and chunk.choices[0].delta.content
):
content = chunk.choices[0].delta.content or ""
full_response += content
await callback(content)
await client.close()
@ -74,12 +81,12 @@ async def stream_claude_response(
messages: List[ChatCompletionMessageParam],
api_key: str,
callback: Callable[[str], Awaitable[None]],
model: Llm,
) -> str:
client = AsyncAnthropic(api_key=api_key)
# Base parameters
model = Llm.CLAUDE_3_SONNET
max_tokens = 4096
temperature = 0.0

View File

@ -14,7 +14,7 @@ from llm import (
)
from openai.types.chat import ChatCompletionMessageParam
from mock_llm import mock_completion
from typing import Dict, List, cast, get_args
from typing import Dict, List, Union, cast, get_args
from image_generation import create_alt_url_mapping, generate_images
from prompts import assemble_imported_code_prompt, assemble_prompt
from datetime import datetime
@ -24,7 +24,7 @@ from routes.saas_utils import does_user_have_subscription_credits
from prompts.claude_prompts import VIDEO_PROMPT
from prompts.types import Stack
from utils import pprint_prompt
# from utils import pprint_prompt
from video.utils import extract_tag_content, assemble_claude_prompt_video
from ws.constants import APP_ERROR_WEB_SOCKET_CODE # type: ignore
@ -161,8 +161,19 @@ async def stream_code(websocket: WebSocket):
)
raise Exception("No OpenAI API key found")
# Get the Anthropic API key from the request. Fall back to environment variable if not provided.
# If neither is provided, we throw an error later only if Claude is used.
anthropic_api_key = None
if "anthropicApiKey" in params and params["anthropicApiKey"]:
anthropic_api_key = params["anthropicApiKey"]
print("Using Anthropic API key from client-side settings dialog")
else:
anthropic_api_key = ANTHROPIC_API_KEY
if anthropic_api_key:
print("Using Anthropic API key from environment variable")
# Get the OpenAI Base URL from the request. Fall back to environment variable if not provided.
openai_base_url = None
openai_base_url: Union[str, None] = None
# Disable user-specified OpenAI Base URL in prod
if not os.environ.get("IS_PROD"):
if "openAiBaseURL" in params and params["openAiBaseURL"]:
@ -255,7 +266,7 @@ async def stream_code(websocket: WebSocket):
video_data_url = params["image"]
prompt_messages = await assemble_claude_prompt_video(video_data_url)
pprint_prompt(prompt_messages) # type: ignore
# pprint_prompt(prompt_messages) # type: ignore
if SHOULD_MOCK_AI_RESPONSE:
completion = await mock_completion(
@ -267,25 +278,28 @@ async def stream_code(websocket: WebSocket):
if IS_PROD:
raise Exception("Video mode is not supported in prod")
if not ANTHROPIC_API_KEY:
if not anthropic_api_key:
await throw_error(
"Video only works with Anthropic models. No Anthropic API key found. Please add the environment variable ANTHROPIC_API_KEY to backend/.env"
"Video only works with Anthropic models. No Anthropic API key found. Please add the environment variable ANTHROPIC_API_KEY to backend/.env or in the settings dialog"
)
raise Exception("No Anthropic key")
completion = await stream_claude_response_native(
system_prompt=VIDEO_PROMPT,
messages=prompt_messages, # type: ignore
api_key=ANTHROPIC_API_KEY,
api_key=anthropic_api_key,
callback=lambda x: process_chunk(x),
model=Llm.CLAUDE_3_OPUS,
include_thinking=True,
)
exact_llm_version = Llm.CLAUDE_3_OPUS
elif code_generation_model == Llm.CLAUDE_3_SONNET:
if not ANTHROPIC_API_KEY:
elif (
code_generation_model == Llm.CLAUDE_3_SONNET
or code_generation_model == Llm.CLAUDE_3_5_SONNET_2024_06_20
):
if not anthropic_api_key:
await throw_error(
"No Anthropic API key found. Please add the environment variable ANTHROPIC_API_KEY to backend/.env"
"No Anthropic API key found. Please add the environment variable ANTHROPIC_API_KEY to backend/.env or in the settings dialog"
)
raise Exception("No Anthropic key")
@ -298,8 +312,9 @@ async def stream_code(websocket: WebSocket):
completion = await stream_claude_response(
prompt_messages, # type: ignore
api_key=ANTHROPIC_API_KEY,
api_key=anthropic_api_key,
callback=lambda x: process_chunk(x),
model=code_generation_model,
)
exact_llm_version = code_generation_model
else:

View File

@ -1,4 +1,4 @@
FROM node:20.9-bullseye-slim
FROM node:22-bullseye-slim
# Set the working directory in the container
WORKDIR /app
@ -6,6 +6,9 @@ WORKDIR /app
# Copy package.json and yarn.lock
COPY package.json yarn.lock /app/
# Set the environment variable to skip Puppeteer download
ENV PUPPETEER_SKIP_DOWNLOAD=true
# Install dependencies
RUN yarn install

View File

@ -79,6 +79,7 @@ function App({ navbarComponent }: Props) {
{
openAiApiKey: null,
openAiBaseURL: null,
anthropicApiKey: null,
screenshotOneApiKey: null,
isImageGenerationEnabled: true,
editorTheme: EditorTheme.COBALT,
@ -109,8 +110,10 @@ function App({ navbarComponent }: Props) {
CodeGenerationModel.GPT_4_TURBO_2024_04_09 &&
settings.generatedCodeConfig === Stack.REACT_TAILWIND;
const showGpt4OMessage =
const showBetterModelMessage =
selectedCodeGenerationModel !== CodeGenerationModel.GPT_4O_2024_05_13 &&
selectedCodeGenerationModel !==
CodeGenerationModel.CLAUDE_3_5_SONNET_2024_06_20 &&
appState === AppState.INITIAL;
const showFeedbackCallNote =
@ -471,11 +474,11 @@ function App({ navbarComponent }: Props) {
</div>
)}
{showGpt4OMessage && (
{showBetterModelMessage && (
<div className="rounded-lg p-2 bg-fuchsia-200">
<p className="text-gray-800 text-sm">
Now supporting GPT-4o. Higher quality and 2x faster. Give it a
try!
Now supporting GPT-4o and Claude Sonnet 3.5. Higher quality and
2x faster. Give it a try!
</p>
</div>
)}

View File

@ -49,7 +49,7 @@ function SettingsDialog({ settings, setSettings }: Props) {
<div className="flex items-center space-x-2">
<Label htmlFor="image-generation">
<div>DALL-E Placeholder Image Generation</div>
<div className="font-light mt-2">
<div className="font-light mt-2 text-xs">
More fun with it but if you want to save money, turn it off.
</div>
</Label>
@ -64,29 +64,31 @@ function SettingsDialog({ settings, setSettings }: Props) {
}
/>
</div>
<div className="flex flex-col space-y-4">
<Label htmlFor="openai-api-key">
<div>OpenAI API key</div>
<div className="font-light mt-2 leading-relaxed">
Only stored in your browser. Never stored on servers. Overrides
your .env config.
</div>
</Label>
<div className="flex flex-col space-y-6">
<div>
<Label htmlFor="openai-api-key">
<div>OpenAI API key</div>
<div className="font-light mt-1 mb-2 text-xs leading-relaxed">
Only stored in your browser. Never stored on servers. Overrides
your .env config.
</div>
</Label>
<Input
id="openai-api-key"
placeholder="OpenAI API key"
value={settings.openAiApiKey || ""}
onChange={(e) =>
setSettings((s) => ({
...s,
openAiApiKey: e.target.value,
}))
}
/>
<Input
id="openai-api-key"
placeholder="OpenAI API key"
value={settings.openAiApiKey || ""}
onChange={(e) =>
setSettings((s) => ({
...s,
openAiApiKey: e.target.value,
}))
}
/>
</div>
{!IS_RUNNING_ON_CLOUD && (
<>
<div>
<Label htmlFor="openai-api-key">
<div>OpenAI Base URL (optional)</div>
<div className="font-light mt-2 leading-relaxed">
@ -105,9 +107,31 @@ function SettingsDialog({ settings, setSettings }: Props) {
}))
}
/>
</>
</div>
)}
<div>
<Label htmlFor="anthropic-api-key">
<div>Anthropic API key</div>
<div className="font-light mt-1 text-xs leading-relaxed">
Only stored in your browser. Never stored on servers. Overrides
your .env config.
</div>
</Label>
<Input
id="anthropic-api-key"
placeholder="Anthropic API key"
value={settings.anthropicApiKey || ""}
onChange={(e) =>
setSettings((s) => ({
...s,
anthropicApiKey: e.target.value,
}))
}
/>
</div>
<Accordion type="single" collapsible className="w-full">
<AccordionItem value="item-1">
<AccordionTrigger>Screenshot by URL Config</AccordionTrigger>

View File

@ -18,7 +18,5 @@ export function getAdjustedCoordinates(
const offsetX = rect ? rect.left : 0;
const offsetY = rect ? rect.top : 0;
// Adjust for scale
const scale = 1; // the scale factor applied to the iframe
return { x: x / scale + offsetX, y: y / scale + offsetY };
return { x: x + offsetX, y: y + offsetY };
}

View File

@ -2,6 +2,7 @@
// Order here matches dropdown order
export enum CodeGenerationModel {
GPT_4O_2024_05_13 = "gpt-4o-2024-05-13",
CLAUDE_3_5_SONNET_2024_06_20 = "claude-3-5-sonnet-20240620",
GPT_4_TURBO_2024_04_09 = "gpt-4-turbo-2024-04-09",
GPT_4_VISION = "gpt_4_vision",
CLAUDE_3_SONNET = "claude_3_sonnet",
@ -16,6 +17,11 @@ export const CODE_GENERATION_MODEL_DESCRIPTIONS: {
};
} = {
"gpt-4o-2024-05-13": { name: "GPT-4o 🌟", inBeta: false, isPaid: false },
"claude-3-5-sonnet-20240620": {
name: "Claude 3.5 Sonnet 🌟",
inBeta: false,
isPaid: false,
},
"gpt-4-turbo-2024-04-09": {
name: "GPT-4 Turbo (Apr 2024)",
inBeta: false,

View File

@ -3,13 +3,11 @@ import { create } from "zustand";
// Store for app-wide state
interface AppStore {
inSelectAndEditMode: boolean;
inputMode: "image" | "video";
toggleInSelectAndEditMode: () => void;
disableInSelectAndEditMode: () => void;
}
export const useAppStore = create<AppStore>((set) => ({
inputMode: "image",
inSelectAndEditMode: false,
toggleInSelectAndEditMode: () =>
set((state) => ({ inSelectAndEditMode: !state.inSelectAndEditMode })),

View File

@ -16,6 +16,7 @@ export interface Settings {
codeGenerationModel: CodeGenerationModel;
// Only relevant for hosted version
isTermOfServiceAccepted: boolean;
anthropicApiKey: string | null; // Added property for anthropic API key
}
export enum AppState {