diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 0000000..f3c0ced --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1 @@ +github: [abi] diff --git a/README.md b/README.md index 8c520b3..839bf99 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # screenshot-to-code -A simple tool to convert screenshots, mockups and Figma designs into clean, functional code using AI. **Now supporting GPT-4O!** +A simple tool to convert screenshots, mockups and Figma designs into clean, functional code using AI. **Now supporting Claude Sonnet 3.5 and GPT-4O!** https://github.com/abi/screenshot-to-code/assets/23818/6cebadae-2fe3-4986-ac6a-8fb9db030045 @@ -15,7 +15,8 @@ Supported stacks: Supported AI models: -- GPT-4O - Best model! +- Claude Sonnet 3.5 - Best model! +- GPT-4O - also recommended! - GPT-4 Turbo (Apr 2024) - GPT-4 Vision (Nov 2023) - Claude 3 Sonnet @@ -31,7 +32,11 @@ We also just added experimental support for taking a video/screen recording of a [Follow me on Twitter for updates](https://twitter.com/_abi_). -## 🚀 Try It Out without no install +## Sponsors + + + +## 🚀 Hosted Version [Try it live on the hosted version (paid)](https://screenshottocode.com). @@ -41,9 +46,9 @@ We also just added experimental support for taking a video/screen recording of a - 如果使用Bedrock Claude 3/3.5需要在运行机器上安装 https://aws.amazon.com/cn/cli/, 并配置aws iam 账号的ak sk,另外还需要开通该账号Bedrock Claude 3 访问的权限。 - 如果使用Bedrock Claude 3/3.5,则无须配置OPENAI_API_KEY 或者 ANTHROPIC_API_KEY 到.env中 - The app has a React/Vite frontend and a FastAPI backend. You will need an OpenAI API key with access to the GPT-4 Vision API or an Anthropic key if you want to use Claude Sonnet, or for experimental video support. + Run the backend (I use Poetry for package management - `pip install poetry` if you don't have it): ```bash @@ -55,7 +60,7 @@ poetry shell poetry run uvicorn main:app --reload --port 7001 ``` -If you want to use Anthropic, add the `ANTHROPIC_API_KEY` to `backend/.env` with your API key from Anthropic. +If you want to use Anthropic, add `ANTHROPIC_API_KEY` to `backend/.env`. You can also set up the keys using the settings dialog on the front-end (click the gear icon after loading the frontend). Run the frontend: @@ -114,5 +119,3 @@ https://github.com/abi/screenshot-to-code/assets/23818/3fec0f77-44e8-4fb3-a769-a ## 🌍 Hosted Version 🆕 [Try it here (paid)](https://screenshottocode.com). Or see [Getting Started](#-getting-started) for local install instructions to use with your own API keys. - -[!["Buy Me A Coffee"](https://www.buymeacoffee.com/assets/img/custom_images/orange_img.png)](https://www.buymeacoffee.com/abiraja) diff --git a/Troubleshooting.md b/Troubleshooting.md index 89aa3ba..dbb1186 100644 --- a/Troubleshooting.md +++ b/Troubleshooting.md @@ -1,4 +1,4 @@ -### Getting an OpenAI API key with GPT4-Vision model access +### Getting an OpenAI API key with GPT-4 model access You don't need a ChatGPT Pro account. Screenshot to code uses API keys from your OpenAI developer account. In order to get access to the GPT4 Vision model, log into your OpenAI account and then, follow these instructions: diff --git a/backend/Dockerfile b/backend/Dockerfile index c520517..5eaf95d 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.12-slim-bullseye +FROM python:3.12.3-slim-bullseye ENV POETRY_VERSION 1.4.1 diff --git a/backend/evals/core.py b/backend/evals/core.py index 5e05362..2fc0352 100644 --- a/backend/evals/core.py +++ b/backend/evals/core.py @@ -15,7 +15,7 @@ async def generate_code_core(image_url: str, stack: Stack, model: Llm) -> str: async def process_chunk(content: str): pass - if model == Llm.CLAUDE_3_SONNET: + if model == Llm.CLAUDE_3_SONNET or model == Llm.CLAUDE_3_5_SONNET_2024_06_20: if not anthropic_api_key: raise Exception("Anthropic API key not found") @@ -23,6 +23,7 @@ async def generate_code_core(image_url: str, stack: Stack, model: Llm) -> str: prompt_messages, api_key=anthropic_api_key, callback=lambda x: process_chunk(x), + model=model, ) else: if not openai_api_key: diff --git a/backend/image_generation.py b/backend/image_generation.py index b93792c..e3f609f 100644 --- a/backend/image_generation.py +++ b/backend/image_generation.py @@ -5,7 +5,7 @@ from openai import AsyncOpenAI from bs4 import BeautifulSoup -async def process_tasks(prompts: List[str], api_key: str, base_url: str): +async def process_tasks(prompts: List[str], api_key: str, base_url: str | None): tasks = [generate_image(prompt, api_key, base_url) for prompt in prompts] results = await asyncio.gather(*tasks, return_exceptions=True) @@ -15,22 +15,23 @@ async def process_tasks(prompts: List[str], api_key: str, base_url: str): print(f"An exception occurred: {result}") processed_results.append(None) else: - processed_results.append(result) # type: ignore + processed_results.append(result) return processed_results -async def generate_image(prompt: str, api_key: str, base_url: str): +async def generate_image( + prompt: str, api_key: str, base_url: str | None +) -> Union[str, None]: client = AsyncOpenAI(api_key=api_key, base_url=base_url) - image_params: Dict[str, Union[str, int]] = { - "model": "dall-e-3", - "quality": "standard", - "style": "natural", - "n": 1, - "size": "1024x1024", - "prompt": prompt, - } - res = await client.images.generate(**image_params) # type: ignore + res = await client.images.generate( + model="dall-e-3", + quality="standard", + style="natural", + n=1, + size="1024x1024", + prompt=prompt, + ) await client.close() return res.data[0].url @@ -63,13 +64,13 @@ def create_alt_url_mapping(code: str) -> Dict[str, str]: async def generate_images( code: str, api_key: str, base_url: Union[str, None], image_cache: Dict[str, str] -): +) -> str: # Find all images soup = BeautifulSoup(code, "html.parser") images = soup.find_all("img") # Extract alt texts as image prompts - alts = [] + alts: List[str | None] = [] for img in images: # Only include URL if the image starts with https://placehold.co # and it's not already in the image_cache @@ -77,26 +78,26 @@ async def generate_images( img["src"].startswith("https://placehold.co") and image_cache.get(img.get("alt")) is None ): - alts.append(img.get("alt", None)) # type: ignore + alts.append(img.get("alt", None)) # Exclude images with no alt text - alts = [alt for alt in alts if alt is not None] # type: ignore + filtered_alts: List[str] = [alt for alt in alts if alt is not None] # Remove duplicates - prompts = list(set(alts)) # type: ignore + prompts = list(set(filtered_alts)) # Return early if there are no images to replace - if len(prompts) == 0: # type: ignore + if len(prompts) == 0: return code # Generate images - results = await process_tasks(prompts, api_key, base_url) # type: ignore + results = await process_tasks(prompts, api_key, base_url) # Create a dict mapping alt text to image URL - mapped_image_urls = dict(zip(prompts, results)) # type: ignore + mapped_image_urls = dict(zip(prompts, results)) # Merge with image_cache - mapped_image_urls = {**mapped_image_urls, **image_cache} # type: ignore + mapped_image_urls = {**mapped_image_urls, **image_cache} # Replace old image URLs with the generated URLs for img in images: diff --git a/backend/llm.py b/backend/llm.py index 039e7eb..c4dc0f2 100644 --- a/backend/llm.py +++ b/backend/llm.py @@ -36,6 +36,7 @@ class Llm(Enum): CLAUDE_3_5_SONNET = "claude-3-5-sonnet-20240620" CLAUDE_3_OPUS = "claude-3-opus-20240229" CLAUDE_3_HAIKU = "claude-3-haiku-20240307" + CLAUDE_3_5_SONNET_2024_06_20 = "claude-3-5-sonnet-20240620" BEDROCK_LLM_MODELID_LIST = {Llm.CLAUDE_3_5_SONNET: 'anthropic.claude-3-sonnet-20240229-v1:0', @@ -83,9 +84,15 @@ async def stream_openai_response( full_response = "" async for chunk in stream: # type: ignore assert isinstance(chunk, ChatCompletionChunk) - content = chunk.choices[0].delta.content or "" - full_response += content - await callback(content) + if ( + chunk.choices + and len(chunk.choices) > 0 + and chunk.choices[0].delta + and chunk.choices[0].delta.content + ): + content = chunk.choices[0].delta.content or "" + full_response += content + await callback(content) await client.close() @@ -96,6 +103,7 @@ async def stream_claude_response( messages: List[ChatCompletionMessageParam], api_key: str, callback: Callable[[str], Awaitable[None]], + model: Llm, ) -> str: # client = AsyncAnthropic(api_key=api_key) diff --git a/backend/routes/generate_code.py b/backend/routes/generate_code.py index a0460b1..554d994 100644 --- a/backend/routes/generate_code.py +++ b/backend/routes/generate_code.py @@ -13,7 +13,7 @@ from llm import ( ) from openai.types.chat import ChatCompletionMessageParam from mock_llm import mock_completion -from typing import Dict, List, cast, get_args +from typing import Dict, List, Union, cast, get_args from image_generation import create_alt_url_mapping, generate_images from prompts import assemble_imported_code_prompt, assemble_prompt from datetime import datetime @@ -120,8 +120,19 @@ async def stream_code(websocket: WebSocket): ) return + # Get the Anthropic API key from the request. Fall back to environment variable if not provided. + # If neither is provided, we throw an error later only if Claude is used. + anthropic_api_key = None + if "anthropicApiKey" in params and params["anthropicApiKey"]: + anthropic_api_key = params["anthropicApiKey"] + print("Using Anthropic API key from client-side settings dialog") + else: + anthropic_api_key = ANTHROPIC_API_KEY + if anthropic_api_key: + print("Using Anthropic API key from environment variable") + # Get the OpenAI Base URL from the request. Fall back to environment variable if not provided. - openai_base_url = None + openai_base_url: Union[str, None] = None # Disable user-specified OpenAI Base URL in prod if not os.environ.get("IS_PROD"): if "openAiBaseURL" in params and params["openAiBaseURL"]: @@ -219,6 +230,7 @@ async def stream_code(websocket: WebSocket): else: try: if validated_input_mode == "video": + # if not ANTHROPIC_API_KEY: # await throw_error( # "Video only works with Anthropic models. No Anthropic API key found. Please add the environment variable ANTHROPIC_API_KEY to backend/.env" @@ -228,24 +240,27 @@ async def stream_code(websocket: WebSocket): completion = await stream_claude_response_native( system_prompt=VIDEO_PROMPT, messages=prompt_messages, # type: ignore - api_key=ANTHROPIC_API_KEY, + api_key=anthropic_api_key, callback=lambda x: process_chunk(x), - model=Llm.CLAUDE_3_SONNET, + model=Llm.CLAUDE_3_5_SONNET, include_thinking=True, ) - # exact_llm_version = Llm.CLAUDE_3_OPUS - exact_llm_version = Llm.CLAUDE_3_SONNET - elif code_generation_model == Llm.CLAUDE_3_SONNET: - # if not ANTHROPIC_API_KEY: - # await throw_error( - # "No Anthropic API key found. Please add the environment variable ANTHROPIC_API_KEY to backend/.env" - # ) - # raise Exception("No Anthropic key") + exact_llm_version = Llm.CLAUDE_3_5_SONNET + elif ( + code_generation_model == Llm.CLAUDE_3_SONNET + or code_generation_model == Llm.CLAUDE_3_5_SONNET_2024_06_20 + ): +# if not anthropic_api_key: +# await throw_error( +# "No Anthropic API key found. Please add the environment variable ANTHROPIC_API_KEY to backend/.env or in the settings dialog" +# ) +# raise Exception("No Anthropic key") completion = await stream_claude_response( prompt_messages, # type: ignore - api_key=ANTHROPIC_API_KEY, + api_key=anthropic_api_key, callback=lambda x: process_chunk(x), + model=code_generation_model, ) exact_llm_version = code_generation_model else: diff --git a/frontend/.gitignore b/frontend/.gitignore index 17ceca3..a0d3702 100644 --- a/frontend/.gitignore +++ b/frontend/.gitignore @@ -25,3 +25,6 @@ dist-ssr # Env files .env* + +# Test files +src/tests/results/ diff --git a/frontend/Dockerfile b/frontend/Dockerfile index b176926..8579e17 100644 --- a/frontend/Dockerfile +++ b/frontend/Dockerfile @@ -1,4 +1,4 @@ -FROM node:20.9-bullseye-slim +FROM node:22-bullseye-slim # Set the working directory in the container WORKDIR /app @@ -6,6 +6,9 @@ WORKDIR /app # Copy package.json and yarn.lock COPY package.json yarn.lock /app/ +# Set the environment variable to skip Puppeteer download +ENV PUPPETEER_SKIP_DOWNLOAD=true + # Install dependencies RUN yarn install diff --git a/frontend/jest.config.js b/frontend/jest.config.js new file mode 100644 index 0000000..310efb5 --- /dev/null +++ b/frontend/jest.config.js @@ -0,0 +1,9 @@ +export default { + preset: "ts-jest", + testEnvironment: "node", + setupFiles: ["/src/setupTests.ts"], + transform: { + "^.+\\.tsx?$": "ts-jest", + }, + testTimeout: 30000, +}; diff --git a/frontend/package.json b/frontend/package.json index 7109443..4652dc7 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -10,7 +10,7 @@ "build-hosted": "tsc && vite build --mode prod", "lint": "eslint . --ext ts,tsx --report-unused-disable-directives --max-warnings 0", "preview": "vite preview", - "test": "vitest" + "test": "jest" }, "dependencies": { "@codemirror/lang-html": "^6.4.6", @@ -46,21 +46,28 @@ "tailwindcss-animate": "^1.0.7", "thememirror": "^2.0.1", "vite-plugin-checker": "^0.6.2", - "webm-duration-fix": "^1.0.4" + "webm-duration-fix": "^1.0.4", + "zustand": "^4.5.2" }, "devDependencies": { + "@types/jest": "^29.5.12", "@types/node": "^20.9.0", + "@types/puppeteer": "^7.0.4", "@types/react": "^18.2.15", "@types/react-dom": "^18.2.7", "@typescript-eslint/eslint-plugin": "^6.0.0", "@typescript-eslint/parser": "^6.0.0", "@vitejs/plugin-react": "^4.0.3", "autoprefixer": "^10.4.16", + "dotenv": "^16.4.5", "eslint": "^8.45.0", "eslint-plugin-react-hooks": "^4.6.0", "eslint-plugin-react-refresh": "^0.4.3", + "jest": "^29.7.0", "postcss": "^8.4.31", + "puppeteer": "^22.6.4", "tailwindcss": "^3.3.5", + "ts-jest": "^29.1.2", "typescript": "^5.0.2", "vite": "^4.4.5", "vite-plugin-html": "^3.2.0", diff --git a/frontend/src/.env.jest.example b/frontend/src/.env.jest.example new file mode 100644 index 0000000..59bc657 --- /dev/null +++ b/frontend/src/.env.jest.example @@ -0,0 +1,2 @@ +TEST_SCREENSHOTONE_API_KEY= +TEST_ROOT_PATH= diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index f1e366b..78ee29f 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -40,6 +40,8 @@ import ModelSettingsSection from "./components/ModelSettingsSection"; import { extractHtml } from "./components/preview/extractHtml"; import useBrowserTabIndicator from "./hooks/useBrowserTabIndicator"; import TipLink from "./components/core/TipLink"; +import SelectAndEditModeToggleButton from "./components/select-and-edit/SelectAndEditModeToggleButton"; +import { useAppStore } from "./store/app-store"; const IS_OPENAI_DOWN = false; @@ -54,11 +56,14 @@ function App() { const [updateInstruction, setUpdateInstruction] = useState(""); const [isImportedFromCode, setIsImportedFromCode] = useState(false); + const { disableInSelectAndEditMode } = useAppStore(); + // Settings const [settings, setSettings] = usePersistedState( { openAiApiKey: null, openAiBaseURL: null, + anthropicApiKey: null, screenshotOneApiKey: null, isImageGenerationEnabled: true, editorTheme: EditorTheme.COBALT, @@ -89,9 +94,15 @@ function App() { CodeGenerationModel.GPT_4_TURBO_2024_04_09 && settings.generatedCodeConfig === Stack.REACT_TAILWIND; - // const showGpt4OMessage = - // selectedCodeGenerationModel !== CodeGenerationModel.GPT_4O_2024_05_13 && - // appState === AppState.INITIAL; + const showBetterModelMessage = + selectedCodeGenerationModel !== CodeGenerationModel.GPT_4O_2024_05_13 && + selectedCodeGenerationModel !== + CodeGenerationModel.CLAUDE_3_5_SONNET_2024_06_20 && + appState === AppState.INITIAL; + + const showSelectAndEditFeature = + selectedCodeGenerationModel === CodeGenerationModel.GPT_4O_2024_05_13 && + settings.generatedCodeConfig === Stack.HTML_TAILWIND; // Indicate coding state using the browser tab's favicon and title useBrowserTabIndicator(appState === AppState.CODING); @@ -148,6 +159,7 @@ function App() { setAppHistory([]); setCurrentVersion(null); setShouldIncludeResultImage(false); + disableInSelectAndEditMode(); }; const regenerate = () => { @@ -236,7 +248,9 @@ function App() { parentIndex: parentVersion, code, inputs: { - prompt: updateInstruction, + prompt: params.history + ? params.history[params.history.length - 1] + : updateInstruction, }, }, ]; @@ -278,7 +292,10 @@ function App() { } // Subsequent updates - async function doUpdate() { + async function doUpdate( + updateInstruction: string, + selectedElement?: HTMLElement + ) { if (currentVersion === null) { toast.error( "No current version set. Contact support or open a Github issue." @@ -296,7 +313,17 @@ function App() { return; } - const updatedHistory = [...historyTree, updateInstruction]; + let modifiedUpdateInstruction = updateInstruction; + + // Send in a reference to the selected element if it exists + if (selectedElement) { + modifiedUpdateInstruction = + updateInstruction + + " referring to this element specifically: " + + selectedElement.outerHTML; + } + + const updatedHistory = [...historyTree, modifiedUpdateInstruction]; if (shouldIncludeResultImage) { const resultImage = await takeScreenshot(); @@ -407,11 +434,11 @@ function App() { )} - {/* {showGpt4OMessage && ( + {showBetterModelMessage && (

- Now supporting GPT-4o. Higher quality and 2x faster. Give it a - try! + Now supporting Claude Sonnet 3.5. Higher quality and + 2x faster. Give it a try!

)} */} @@ -481,8 +508,8 @@ function App() { /> @@ -490,10 +517,13 @@ function App() {
+ {showSelectAndEditFeature && ( + + )}
@@ -599,7 +629,7 @@ function App() { @@ -622,10 +652,18 @@ function App() {
- + - + - +

Drag & drop a screenshot here,
or click to upload diff --git a/frontend/src/components/ImportCodeSection.tsx b/frontend/src/components/ImportCodeSection.tsx index b320a97..c31e753 100644 --- a/frontend/src/components/ImportCodeSection.tsx +++ b/frontend/src/components/ImportCodeSection.tsx @@ -38,7 +38,9 @@ function ImportCodeSection({ importFromCode }: Props) { return (

- + @@ -62,7 +64,7 @@ function ImportCodeSection({ importFromCode }: Props) { /> - diff --git a/frontend/src/components/Preview.tsx b/frontend/src/components/Preview.tsx index eb9ea6d..d601f78 100644 --- a/frontend/src/components/Preview.tsx +++ b/frontend/src/components/Preview.tsx @@ -1,21 +1,35 @@ -import { useEffect, useRef } from "react"; +import { useEffect, useRef, useState } from "react"; import classNames from "classnames"; import useThrottle from "../hooks/useThrottle"; +import EditPopup from "./select-and-edit/EditPopup"; interface Props { code: string; device: "mobile" | "desktop"; + doUpdate: (updateInstruction: string, selectedElement?: HTMLElement) => void; } -function Preview({ code, device }: Props) { +function Preview({ code, device, doUpdate }: Props) { const iframeRef = useRef(null); // Don't update code more often than every 200ms. const throttledCode = useThrottle(code, 200); + // Select and edit functionality + const [clickEvent, setClickEvent] = useState(null); + useEffect(() => { - if (iframeRef.current) { - iframeRef.current.srcdoc = throttledCode; + const iframe = iframeRef.current; + if (iframe) { + iframe.srcdoc = throttledCode; + + // Set up click handler for select and edit funtionality + iframe.addEventListener("load", function () { + iframe.contentWindow?.document.body.addEventListener( + "click", + setClickEvent + ); + }); } }, [throttledCode]); @@ -34,6 +48,7 @@ function Preview({ code, device }: Props) { } )} > + ); } diff --git a/frontend/src/components/SettingsDialog.tsx b/frontend/src/components/SettingsDialog.tsx index 2e7814b..97d8f38 100644 --- a/frontend/src/components/SettingsDialog.tsx +++ b/frontend/src/components/SettingsDialog.tsx @@ -49,7 +49,7 @@ function SettingsDialog({ settings, setSettings }: Props) {