diff --git a/README.md b/README.md index 8f01c7f..9d6fba7 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # screenshot-to-code -A simple tool to convert screenshots, mockups and Figma designs into clean, functional code using AI. **Now supporting GPT-4O!** +A simple tool to convert screenshots, mockups and Figma designs into clean, functional code using AI. **Now supporting Claude Sonnet 3.5 and GPT-4O!** https://github.com/abi/screenshot-to-code/assets/23818/6cebadae-2fe3-4986-ac6a-8fb9db030045 @@ -15,7 +15,8 @@ Supported stacks: Supported AI models: -- GPT-4O - Best model! +- Claude Sonnet 3.5 - Best model! +- GPT-4O - also recommended! - GPT-4 Turbo (Apr 2024) - GPT-4 Vision (Nov 2023) - Claude 3 Sonnet diff --git a/backend/Dockerfile b/backend/Dockerfile index c520517..5eaf95d 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.12-slim-bullseye +FROM python:3.12.3-slim-bullseye ENV POETRY_VERSION 1.4.1 diff --git a/backend/image_processing/__init__.py b/backend/image_processing/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/image_processing/utils.py b/backend/image_processing/utils.py new file mode 100644 index 0000000..0bac644 --- /dev/null +++ b/backend/image_processing/utils.py @@ -0,0 +1,52 @@ +import base64 +import io +import time +from PIL import Image + +CLAUDE_IMAGE_MAX_SIZE = 5 * 1024 * 1024 + + +# Process image so it meets Claude requirements +def process_image(image_data_url: str) -> tuple[str, str]: + + media_type = image_data_url.split(";")[0].split(":")[1] + base64_data = image_data_url.split(",")[1] + + # If image is already under max size, return as is + if len(base64_data) <= CLAUDE_IMAGE_MAX_SIZE: + print("[CLAUDE IMAGE PROCESSING] no processing needed") + return (media_type, base64_data) + + # Time image processing + start_time = time.time() + + image_bytes = base64.b64decode(base64_data) + img = Image.open(io.BytesIO(image_bytes)) + + # Convert and compress as JPEG + quality = 95 + output = io.BytesIO() + img = img.convert("RGB") # Ensure image is in RGB mode for JPEG conversion + img.save(output, format="JPEG", quality=quality) + + # Reduce quality until image is under max size + while ( + len(base64.b64encode(output.getvalue())) > CLAUDE_IMAGE_MAX_SIZE + and quality > 10 + ): + output = io.BytesIO() + img.save(output, format="JPEG", quality=quality) + quality -= 5 + + # Log so we know it was modified + old_size = len(base64_data) + new_size = len(base64.b64encode(output.getvalue())) + print( + f"[CLAUDE IMAGE PROCESSING] image size updated: old size = {old_size} bytes, new size = {new_size} bytes" + ) + + end_time = time.time() + processing_time = end_time - start_time + print(f"[CLAUDE IMAGE PROCESSING] processing time: {processing_time:.2f} seconds") + + return ("image/jpeg", base64.b64encode(output.getvalue()).decode("utf-8")) diff --git a/backend/llm.py b/backend/llm.py index 1ef6c2f..85d60dc 100644 --- a/backend/llm.py +++ b/backend/llm.py @@ -1,3 +1,4 @@ +import base64 from enum import Enum from typing import Any, Awaitable, Callable, List, cast from anthropic import AsyncAnthropic @@ -5,6 +6,7 @@ from openai import AsyncOpenAI from openai.types.chat import ChatCompletionMessageParam, ChatCompletionChunk from config import IS_DEBUG_ENABLED from debug.DebugFileWriter import DebugFileWriter +from image_processing.utils import process_image from utils import pprint_prompt @@ -104,8 +106,10 @@ async def stream_claude_response( # Extract base64 data and media type from data URL # Example base64 data URL: data:image/png;base64,iVBOR... image_data_url = cast(str, content["image_url"]["url"]) - media_type = image_data_url.split(";")[0].split(":")[1] - base64_data = image_data_url.split(",")[1] + + # Process image and split media type and data + # so it works with Claude (under 5mb in base64 encoding) + (media_type, base64_data) = process_image(image_data_url) # Remove OpenAI parameter del content["image_url"] diff --git a/backend/pyproject.toml b/backend/pyproject.toml index 8030244..7b96b3b 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -18,6 +18,8 @@ pre-commit = "^3.6.2" anthropic = "^0.18.0" moviepy = "^1.0.3" sentry-sdk = {extras = ["fastapi"], version = "^1.38.0"} +pillow = "^10.3.0" +types-pillow = "^10.2.0.20240520" [tool.poetry.group.dev.dependencies] pytest = "^7.4.3" diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index 2e1d177..6c8fbd2 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -120,7 +120,9 @@ function App({ navbarComponent }: Props) { subscriberTier !== "free" && SHOULD_SHOW_FEEDBACK_CALL_NOTE; const showSelectAndEditFeature = - selectedCodeGenerationModel === CodeGenerationModel.GPT_4O_2024_05_13 && + (selectedCodeGenerationModel === CodeGenerationModel.GPT_4O_2024_05_13 || + selectedCodeGenerationModel === + CodeGenerationModel.CLAUDE_3_5_SONNET_2024_06_20) && settings.generatedCodeConfig === Stack.HTML_TAILWIND; // Indicate coding state using the browser tab's favicon and title