Merge branch 'main' into dev

This commit is contained in:
xiehust 2024-06-25 18:25:40 +08:00 committed by GitHub
commit e9c6756c2e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
31 changed files with 2964 additions and 252 deletions

1
.github/FUNDING.yml vendored Normal file
View File

@ -0,0 +1 @@
github: [abi]

View File

@ -1,6 +1,6 @@
# screenshot-to-code # screenshot-to-code
A simple tool to convert screenshots, mockups and Figma designs into clean, functional code using AI. **Now supporting GPT-4O!** A simple tool to convert screenshots, mockups and Figma designs into clean, functional code using AI. **Now supporting Claude Sonnet 3.5 and GPT-4O!**
https://github.com/abi/screenshot-to-code/assets/23818/6cebadae-2fe3-4986-ac6a-8fb9db030045 https://github.com/abi/screenshot-to-code/assets/23818/6cebadae-2fe3-4986-ac6a-8fb9db030045
@ -15,7 +15,8 @@ Supported stacks:
Supported AI models: Supported AI models:
- GPT-4O - Best model! - Claude Sonnet 3.5 - Best model!
- GPT-4O - also recommended!
- GPT-4 Turbo (Apr 2024) - GPT-4 Turbo (Apr 2024)
- GPT-4 Vision (Nov 2023) - GPT-4 Vision (Nov 2023)
- Claude 3 Sonnet - Claude 3 Sonnet
@ -31,7 +32,11 @@ We also just added experimental support for taking a video/screen recording of a
[Follow me on Twitter for updates](https://twitter.com/_abi_). [Follow me on Twitter for updates](https://twitter.com/_abi_).
## 🚀 Try It Out without no install ## Sponsors
<a href="https://konghq.com/products/kong-konnect/register?utm_medium=referral&utm_source=github&utm_campaign=platform&utm_content=screenshot-to-code" target="_blank" title="Kong - powering the API world"><img src="https://picoapps.xyz/s2c-sponsors/Kong-GitHub-240x100.png"></a>
## 🚀 Hosted Version
[Try it live on the hosted version (paid)](https://screenshottocode.com). [Try it live on the hosted version (paid)](https://screenshottocode.com).
@ -41,9 +46,9 @@ We also just added experimental support for taking a video/screen recording of a
- 如果使用Bedrock Claude 3/3.5需要在运行机器上安装 https://aws.amazon.com/cn/cli/ 并配置aws iam 账号的ak sk另外还需要开通该账号Bedrock Claude 3 访问的权限。 - 如果使用Bedrock Claude 3/3.5需要在运行机器上安装 https://aws.amazon.com/cn/cli/ 并配置aws iam 账号的ak sk另外还需要开通该账号Bedrock Claude 3 访问的权限。
- 如果使用Bedrock Claude 3/3.5则无须配置OPENAI_API_KEY 或者 ANTHROPIC_API_KEY 到.env中 - 如果使用Bedrock Claude 3/3.5则无须配置OPENAI_API_KEY 或者 ANTHROPIC_API_KEY 到.env中
The app has a React/Vite frontend and a FastAPI backend. You will need an OpenAI API key with access to the GPT-4 Vision API or an Anthropic key if you want to use Claude Sonnet, or for experimental video support. The app has a React/Vite frontend and a FastAPI backend. You will need an OpenAI API key with access to the GPT-4 Vision API or an Anthropic key if you want to use Claude Sonnet, or for experimental video support.
Run the backend (I use Poetry for package management - `pip install poetry` if you don't have it): Run the backend (I use Poetry for package management - `pip install poetry` if you don't have it):
```bash ```bash
@ -55,7 +60,7 @@ poetry shell
poetry run uvicorn main:app --reload --port 7001 poetry run uvicorn main:app --reload --port 7001
``` ```
If you want to use Anthropic, add the `ANTHROPIC_API_KEY` to `backend/.env` with your API key from Anthropic. If you want to use Anthropic, add `ANTHROPIC_API_KEY` to `backend/.env`. You can also set up the keys using the settings dialog on the front-end (click the gear icon after loading the frontend).
Run the frontend: Run the frontend:
@ -114,5 +119,3 @@ https://github.com/abi/screenshot-to-code/assets/23818/3fec0f77-44e8-4fb3-a769-a
## 🌍 Hosted Version ## 🌍 Hosted Version
🆕 [Try it here (paid)](https://screenshottocode.com). Or see [Getting Started](#-getting-started) for local install instructions to use with your own API keys. 🆕 [Try it here (paid)](https://screenshottocode.com). Or see [Getting Started](#-getting-started) for local install instructions to use with your own API keys.
[!["Buy Me A Coffee"](https://www.buymeacoffee.com/assets/img/custom_images/orange_img.png)](https://www.buymeacoffee.com/abiraja)

View File

@ -1,4 +1,4 @@
### Getting an OpenAI API key with GPT4-Vision model access ### Getting an OpenAI API key with GPT-4 model access
You don't need a ChatGPT Pro account. Screenshot to code uses API keys from your OpenAI developer account. In order to get access to the GPT4 Vision model, log into your OpenAI account and then, follow these instructions: You don't need a ChatGPT Pro account. Screenshot to code uses API keys from your OpenAI developer account. In order to get access to the GPT4 Vision model, log into your OpenAI account and then, follow these instructions:

View File

@ -1,4 +1,4 @@
FROM python:3.12-slim-bullseye FROM python:3.12.3-slim-bullseye
ENV POETRY_VERSION 1.4.1 ENV POETRY_VERSION 1.4.1

View File

@ -15,7 +15,7 @@ async def generate_code_core(image_url: str, stack: Stack, model: Llm) -> str:
async def process_chunk(content: str): async def process_chunk(content: str):
pass pass
if model == Llm.CLAUDE_3_SONNET: if model == Llm.CLAUDE_3_SONNET or model == Llm.CLAUDE_3_5_SONNET_2024_06_20:
if not anthropic_api_key: if not anthropic_api_key:
raise Exception("Anthropic API key not found") raise Exception("Anthropic API key not found")
@ -23,6 +23,7 @@ async def generate_code_core(image_url: str, stack: Stack, model: Llm) -> str:
prompt_messages, prompt_messages,
api_key=anthropic_api_key, api_key=anthropic_api_key,
callback=lambda x: process_chunk(x), callback=lambda x: process_chunk(x),
model=model,
) )
else: else:
if not openai_api_key: if not openai_api_key:

View File

@ -5,7 +5,7 @@ from openai import AsyncOpenAI
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
async def process_tasks(prompts: List[str], api_key: str, base_url: str): async def process_tasks(prompts: List[str], api_key: str, base_url: str | None):
tasks = [generate_image(prompt, api_key, base_url) for prompt in prompts] tasks = [generate_image(prompt, api_key, base_url) for prompt in prompts]
results = await asyncio.gather(*tasks, return_exceptions=True) results = await asyncio.gather(*tasks, return_exceptions=True)
@ -15,22 +15,23 @@ async def process_tasks(prompts: List[str], api_key: str, base_url: str):
print(f"An exception occurred: {result}") print(f"An exception occurred: {result}")
processed_results.append(None) processed_results.append(None)
else: else:
processed_results.append(result) # type: ignore processed_results.append(result)
return processed_results return processed_results
async def generate_image(prompt: str, api_key: str, base_url: str): async def generate_image(
prompt: str, api_key: str, base_url: str | None
) -> Union[str, None]:
client = AsyncOpenAI(api_key=api_key, base_url=base_url) client = AsyncOpenAI(api_key=api_key, base_url=base_url)
image_params: Dict[str, Union[str, int]] = { res = await client.images.generate(
"model": "dall-e-3", model="dall-e-3",
"quality": "standard", quality="standard",
"style": "natural", style="natural",
"n": 1, n=1,
"size": "1024x1024", size="1024x1024",
"prompt": prompt, prompt=prompt,
} )
res = await client.images.generate(**image_params) # type: ignore
await client.close() await client.close()
return res.data[0].url return res.data[0].url
@ -63,13 +64,13 @@ def create_alt_url_mapping(code: str) -> Dict[str, str]:
async def generate_images( async def generate_images(
code: str, api_key: str, base_url: Union[str, None], image_cache: Dict[str, str] code: str, api_key: str, base_url: Union[str, None], image_cache: Dict[str, str]
): ) -> str:
# Find all images # Find all images
soup = BeautifulSoup(code, "html.parser") soup = BeautifulSoup(code, "html.parser")
images = soup.find_all("img") images = soup.find_all("img")
# Extract alt texts as image prompts # Extract alt texts as image prompts
alts = [] alts: List[str | None] = []
for img in images: for img in images:
# Only include URL if the image starts with https://placehold.co # Only include URL if the image starts with https://placehold.co
# and it's not already in the image_cache # and it's not already in the image_cache
@ -77,26 +78,26 @@ async def generate_images(
img["src"].startswith("https://placehold.co") img["src"].startswith("https://placehold.co")
and image_cache.get(img.get("alt")) is None and image_cache.get(img.get("alt")) is None
): ):
alts.append(img.get("alt", None)) # type: ignore alts.append(img.get("alt", None))
# Exclude images with no alt text # Exclude images with no alt text
alts = [alt for alt in alts if alt is not None] # type: ignore filtered_alts: List[str] = [alt for alt in alts if alt is not None]
# Remove duplicates # Remove duplicates
prompts = list(set(alts)) # type: ignore prompts = list(set(filtered_alts))
# Return early if there are no images to replace # Return early if there are no images to replace
if len(prompts) == 0: # type: ignore if len(prompts) == 0:
return code return code
# Generate images # Generate images
results = await process_tasks(prompts, api_key, base_url) # type: ignore results = await process_tasks(prompts, api_key, base_url)
# Create a dict mapping alt text to image URL # Create a dict mapping alt text to image URL
mapped_image_urls = dict(zip(prompts, results)) # type: ignore mapped_image_urls = dict(zip(prompts, results))
# Merge with image_cache # Merge with image_cache
mapped_image_urls = {**mapped_image_urls, **image_cache} # type: ignore mapped_image_urls = {**mapped_image_urls, **image_cache}
# Replace old image URLs with the generated URLs # Replace old image URLs with the generated URLs
for img in images: for img in images:

View File

@ -36,6 +36,7 @@ class Llm(Enum):
CLAUDE_3_5_SONNET = "claude-3-5-sonnet-20240620" CLAUDE_3_5_SONNET = "claude-3-5-sonnet-20240620"
CLAUDE_3_OPUS = "claude-3-opus-20240229" CLAUDE_3_OPUS = "claude-3-opus-20240229"
CLAUDE_3_HAIKU = "claude-3-haiku-20240307" CLAUDE_3_HAIKU = "claude-3-haiku-20240307"
CLAUDE_3_5_SONNET_2024_06_20 = "claude-3-5-sonnet-20240620"
BEDROCK_LLM_MODELID_LIST = {Llm.CLAUDE_3_5_SONNET: 'anthropic.claude-3-sonnet-20240229-v1:0', BEDROCK_LLM_MODELID_LIST = {Llm.CLAUDE_3_5_SONNET: 'anthropic.claude-3-sonnet-20240229-v1:0',
@ -83,6 +84,12 @@ async def stream_openai_response(
full_response = "" full_response = ""
async for chunk in stream: # type: ignore async for chunk in stream: # type: ignore
assert isinstance(chunk, ChatCompletionChunk) assert isinstance(chunk, ChatCompletionChunk)
if (
chunk.choices
and len(chunk.choices) > 0
and chunk.choices[0].delta
and chunk.choices[0].delta.content
):
content = chunk.choices[0].delta.content or "" content = chunk.choices[0].delta.content or ""
full_response += content full_response += content
await callback(content) await callback(content)
@ -96,6 +103,7 @@ async def stream_claude_response(
messages: List[ChatCompletionMessageParam], messages: List[ChatCompletionMessageParam],
api_key: str, api_key: str,
callback: Callable[[str], Awaitable[None]], callback: Callable[[str], Awaitable[None]],
model: Llm,
) -> str: ) -> str:
# client = AsyncAnthropic(api_key=api_key) # client = AsyncAnthropic(api_key=api_key)

View File

@ -13,7 +13,7 @@ from llm import (
) )
from openai.types.chat import ChatCompletionMessageParam from openai.types.chat import ChatCompletionMessageParam
from mock_llm import mock_completion from mock_llm import mock_completion
from typing import Dict, List, cast, get_args from typing import Dict, List, Union, cast, get_args
from image_generation import create_alt_url_mapping, generate_images from image_generation import create_alt_url_mapping, generate_images
from prompts import assemble_imported_code_prompt, assemble_prompt from prompts import assemble_imported_code_prompt, assemble_prompt
from datetime import datetime from datetime import datetime
@ -120,8 +120,19 @@ async def stream_code(websocket: WebSocket):
) )
return return
# Get the Anthropic API key from the request. Fall back to environment variable if not provided.
# If neither is provided, we throw an error later only if Claude is used.
anthropic_api_key = None
if "anthropicApiKey" in params and params["anthropicApiKey"]:
anthropic_api_key = params["anthropicApiKey"]
print("Using Anthropic API key from client-side settings dialog")
else:
anthropic_api_key = ANTHROPIC_API_KEY
if anthropic_api_key:
print("Using Anthropic API key from environment variable")
# Get the OpenAI Base URL from the request. Fall back to environment variable if not provided. # Get the OpenAI Base URL from the request. Fall back to environment variable if not provided.
openai_base_url = None openai_base_url: Union[str, None] = None
# Disable user-specified OpenAI Base URL in prod # Disable user-specified OpenAI Base URL in prod
if not os.environ.get("IS_PROD"): if not os.environ.get("IS_PROD"):
if "openAiBaseURL" in params and params["openAiBaseURL"]: if "openAiBaseURL" in params and params["openAiBaseURL"]:
@ -219,6 +230,7 @@ async def stream_code(websocket: WebSocket):
else: else:
try: try:
if validated_input_mode == "video": if validated_input_mode == "video":
# if not ANTHROPIC_API_KEY: # if not ANTHROPIC_API_KEY:
# await throw_error( # await throw_error(
# "Video only works with Anthropic models. No Anthropic API key found. Please add the environment variable ANTHROPIC_API_KEY to backend/.env" # "Video only works with Anthropic models. No Anthropic API key found. Please add the environment variable ANTHROPIC_API_KEY to backend/.env"
@ -228,24 +240,27 @@ async def stream_code(websocket: WebSocket):
completion = await stream_claude_response_native( completion = await stream_claude_response_native(
system_prompt=VIDEO_PROMPT, system_prompt=VIDEO_PROMPT,
messages=prompt_messages, # type: ignore messages=prompt_messages, # type: ignore
api_key=ANTHROPIC_API_KEY, api_key=anthropic_api_key,
callback=lambda x: process_chunk(x), callback=lambda x: process_chunk(x),
model=Llm.CLAUDE_3_SONNET, model=Llm.CLAUDE_3_5_SONNET,
include_thinking=True, include_thinking=True,
) )
# exact_llm_version = Llm.CLAUDE_3_OPUS exact_llm_version = Llm.CLAUDE_3_5_SONNET
exact_llm_version = Llm.CLAUDE_3_SONNET elif (
elif code_generation_model == Llm.CLAUDE_3_SONNET: code_generation_model == Llm.CLAUDE_3_SONNET
# if not ANTHROPIC_API_KEY: or code_generation_model == Llm.CLAUDE_3_5_SONNET_2024_06_20
):
# if not anthropic_api_key:
# await throw_error( # await throw_error(
# "No Anthropic API key found. Please add the environment variable ANTHROPIC_API_KEY to backend/.env" # "No Anthropic API key found. Please add the environment variable ANTHROPIC_API_KEY to backend/.env or in the settings dialog"
# ) # )
# raise Exception("No Anthropic key") # raise Exception("No Anthropic key")
completion = await stream_claude_response( completion = await stream_claude_response(
prompt_messages, # type: ignore prompt_messages, # type: ignore
api_key=ANTHROPIC_API_KEY, api_key=anthropic_api_key,
callback=lambda x: process_chunk(x), callback=lambda x: process_chunk(x),
model=code_generation_model,
) )
exact_llm_version = code_generation_model exact_llm_version = code_generation_model
else: else:

3
frontend/.gitignore vendored
View File

@ -25,3 +25,6 @@ dist-ssr
# Env files # Env files
.env* .env*
# Test files
src/tests/results/

View File

@ -1,4 +1,4 @@
FROM node:20.9-bullseye-slim FROM node:22-bullseye-slim
# Set the working directory in the container # Set the working directory in the container
WORKDIR /app WORKDIR /app
@ -6,6 +6,9 @@ WORKDIR /app
# Copy package.json and yarn.lock # Copy package.json and yarn.lock
COPY package.json yarn.lock /app/ COPY package.json yarn.lock /app/
# Set the environment variable to skip Puppeteer download
ENV PUPPETEER_SKIP_DOWNLOAD=true
# Install dependencies # Install dependencies
RUN yarn install RUN yarn install

9
frontend/jest.config.js Normal file
View File

@ -0,0 +1,9 @@
export default {
preset: "ts-jest",
testEnvironment: "node",
setupFiles: ["<rootDir>/src/setupTests.ts"],
transform: {
"^.+\\.tsx?$": "ts-jest",
},
testTimeout: 30000,
};

View File

@ -10,7 +10,7 @@
"build-hosted": "tsc && vite build --mode prod", "build-hosted": "tsc && vite build --mode prod",
"lint": "eslint . --ext ts,tsx --report-unused-disable-directives --max-warnings 0", "lint": "eslint . --ext ts,tsx --report-unused-disable-directives --max-warnings 0",
"preview": "vite preview", "preview": "vite preview",
"test": "vitest" "test": "jest"
}, },
"dependencies": { "dependencies": {
"@codemirror/lang-html": "^6.4.6", "@codemirror/lang-html": "^6.4.6",
@ -46,21 +46,28 @@
"tailwindcss-animate": "^1.0.7", "tailwindcss-animate": "^1.0.7",
"thememirror": "^2.0.1", "thememirror": "^2.0.1",
"vite-plugin-checker": "^0.6.2", "vite-plugin-checker": "^0.6.2",
"webm-duration-fix": "^1.0.4" "webm-duration-fix": "^1.0.4",
"zustand": "^4.5.2"
}, },
"devDependencies": { "devDependencies": {
"@types/jest": "^29.5.12",
"@types/node": "^20.9.0", "@types/node": "^20.9.0",
"@types/puppeteer": "^7.0.4",
"@types/react": "^18.2.15", "@types/react": "^18.2.15",
"@types/react-dom": "^18.2.7", "@types/react-dom": "^18.2.7",
"@typescript-eslint/eslint-plugin": "^6.0.0", "@typescript-eslint/eslint-plugin": "^6.0.0",
"@typescript-eslint/parser": "^6.0.0", "@typescript-eslint/parser": "^6.0.0",
"@vitejs/plugin-react": "^4.0.3", "@vitejs/plugin-react": "^4.0.3",
"autoprefixer": "^10.4.16", "autoprefixer": "^10.4.16",
"dotenv": "^16.4.5",
"eslint": "^8.45.0", "eslint": "^8.45.0",
"eslint-plugin-react-hooks": "^4.6.0", "eslint-plugin-react-hooks": "^4.6.0",
"eslint-plugin-react-refresh": "^0.4.3", "eslint-plugin-react-refresh": "^0.4.3",
"jest": "^29.7.0",
"postcss": "^8.4.31", "postcss": "^8.4.31",
"puppeteer": "^22.6.4",
"tailwindcss": "^3.3.5", "tailwindcss": "^3.3.5",
"ts-jest": "^29.1.2",
"typescript": "^5.0.2", "typescript": "^5.0.2",
"vite": "^4.4.5", "vite": "^4.4.5",
"vite-plugin-html": "^3.2.0", "vite-plugin-html": "^3.2.0",

View File

@ -0,0 +1,2 @@
TEST_SCREENSHOTONE_API_KEY=
TEST_ROOT_PATH=

View File

@ -40,6 +40,8 @@ import ModelSettingsSection from "./components/ModelSettingsSection";
import { extractHtml } from "./components/preview/extractHtml"; import { extractHtml } from "./components/preview/extractHtml";
import useBrowserTabIndicator from "./hooks/useBrowserTabIndicator"; import useBrowserTabIndicator from "./hooks/useBrowserTabIndicator";
import TipLink from "./components/core/TipLink"; import TipLink from "./components/core/TipLink";
import SelectAndEditModeToggleButton from "./components/select-and-edit/SelectAndEditModeToggleButton";
import { useAppStore } from "./store/app-store";
const IS_OPENAI_DOWN = false; const IS_OPENAI_DOWN = false;
@ -54,11 +56,14 @@ function App() {
const [updateInstruction, setUpdateInstruction] = useState(""); const [updateInstruction, setUpdateInstruction] = useState("");
const [isImportedFromCode, setIsImportedFromCode] = useState<boolean>(false); const [isImportedFromCode, setIsImportedFromCode] = useState<boolean>(false);
const { disableInSelectAndEditMode } = useAppStore();
// Settings // Settings
const [settings, setSettings] = usePersistedState<Settings>( const [settings, setSettings] = usePersistedState<Settings>(
{ {
openAiApiKey: null, openAiApiKey: null,
openAiBaseURL: null, openAiBaseURL: null,
anthropicApiKey: null,
screenshotOneApiKey: null, screenshotOneApiKey: null,
isImageGenerationEnabled: true, isImageGenerationEnabled: true,
editorTheme: EditorTheme.COBALT, editorTheme: EditorTheme.COBALT,
@ -89,9 +94,15 @@ function App() {
CodeGenerationModel.GPT_4_TURBO_2024_04_09 && CodeGenerationModel.GPT_4_TURBO_2024_04_09 &&
settings.generatedCodeConfig === Stack.REACT_TAILWIND; settings.generatedCodeConfig === Stack.REACT_TAILWIND;
// const showGpt4OMessage = const showBetterModelMessage =
// selectedCodeGenerationModel !== CodeGenerationModel.GPT_4O_2024_05_13 && selectedCodeGenerationModel !== CodeGenerationModel.GPT_4O_2024_05_13 &&
// appState === AppState.INITIAL; selectedCodeGenerationModel !==
CodeGenerationModel.CLAUDE_3_5_SONNET_2024_06_20 &&
appState === AppState.INITIAL;
const showSelectAndEditFeature =
selectedCodeGenerationModel === CodeGenerationModel.GPT_4O_2024_05_13 &&
settings.generatedCodeConfig === Stack.HTML_TAILWIND;
// Indicate coding state using the browser tab's favicon and title // Indicate coding state using the browser tab's favicon and title
useBrowserTabIndicator(appState === AppState.CODING); useBrowserTabIndicator(appState === AppState.CODING);
@ -148,6 +159,7 @@ function App() {
setAppHistory([]); setAppHistory([]);
setCurrentVersion(null); setCurrentVersion(null);
setShouldIncludeResultImage(false); setShouldIncludeResultImage(false);
disableInSelectAndEditMode();
}; };
const regenerate = () => { const regenerate = () => {
@ -236,7 +248,9 @@ function App() {
parentIndex: parentVersion, parentIndex: parentVersion,
code, code,
inputs: { inputs: {
prompt: updateInstruction, prompt: params.history
? params.history[params.history.length - 1]
: updateInstruction,
}, },
}, },
]; ];
@ -278,7 +292,10 @@ function App() {
} }
// Subsequent updates // Subsequent updates
async function doUpdate() { async function doUpdate(
updateInstruction: string,
selectedElement?: HTMLElement
) {
if (currentVersion === null) { if (currentVersion === null) {
toast.error( toast.error(
"No current version set. Contact support or open a Github issue." "No current version set. Contact support or open a Github issue."
@ -296,7 +313,17 @@ function App() {
return; return;
} }
const updatedHistory = [...historyTree, updateInstruction]; let modifiedUpdateInstruction = updateInstruction;
// Send in a reference to the selected element if it exists
if (selectedElement) {
modifiedUpdateInstruction =
updateInstruction +
" referring to this element specifically: " +
selectedElement.outerHTML;
}
const updatedHistory = [...historyTree, modifiedUpdateInstruction];
if (shouldIncludeResultImage) { if (shouldIncludeResultImage) {
const resultImage = await takeScreenshot(); const resultImage = await takeScreenshot();
@ -407,11 +434,11 @@ function App() {
</div> </div>
)} )}
{/* {showGpt4OMessage && ( {showBetterModelMessage && (
<div className="rounded-lg p-2 bg-fuchsia-200"> <div className="rounded-lg p-2 bg-fuchsia-200">
<p className="text-gray-800 text-sm"> <p className="text-gray-800 text-sm">
Now supporting GPT-4o. Higher quality and 2x faster. Give it a Now supporting Claude Sonnet 3.5. Higher quality and
try! 2x faster. Give it a try!
</p> </p>
</div> </div>
)} */} )} */}
@ -481,8 +508,8 @@ function App() {
/> />
</div> </div>
<Button <Button
onClick={doUpdate} onClick={() => doUpdate(updateInstruction)}
className="dark:text-white dark:bg-gray-700" className="dark:text-white dark:bg-gray-700 update-btn"
> >
Update Update
</Button> </Button>
@ -490,10 +517,13 @@ function App() {
<div className="flex items-center justify-end gap-x-2 mt-2"> <div className="flex items-center justify-end gap-x-2 mt-2">
<Button <Button
onClick={regenerate} onClick={regenerate}
className="flex items-center gap-x-2 dark:text-white dark:bg-gray-700" className="flex items-center gap-x-2 dark:text-white dark:bg-gray-700 regenerate-btn"
> >
🔄 Regenerate 🔄 Regenerate
</Button> </Button>
{showSelectAndEditFeature && (
<SelectAndEditModeToggleButton />
)}
</div> </div>
<div className="flex justify-end items-center mt-2"> <div className="flex justify-end items-center mt-2">
<TipLink /> <TipLink />
@ -599,7 +629,7 @@ function App() {
<Button <Button
onClick={downloadCode} onClick={downloadCode}
variant="secondary" variant="secondary"
className="flex items-center gap-x-2 mr-4 dark:text-white dark:bg-gray-700" className="flex items-center gap-x-2 mr-4 dark:text-white dark:bg-gray-700 download-btn"
> >
<FaDownload /> Download <FaDownload /> Download
</Button> </Button>
@ -622,10 +652,18 @@ function App() {
</div> </div>
</div> </div>
<TabsContent value="desktop"> <TabsContent value="desktop">
<Preview code={previewCode} device="desktop" /> <Preview
code={previewCode}
device="desktop"
doUpdate={doUpdate}
/>
</TabsContent> </TabsContent>
<TabsContent value="mobile"> <TabsContent value="mobile">
<Preview code={previewCode} device="mobile" /> <Preview
code={previewCode}
device="mobile"
doUpdate={doUpdate}
/>
</TabsContent> </TabsContent>
<TabsContent value="code"> <TabsContent value="code">
<CodeTab <CodeTab

View File

@ -166,7 +166,7 @@ function ImageUpload({ setReferenceImages }: Props) {
{screenRecorderState === ScreenRecorderState.INITIAL && ( {screenRecorderState === ScreenRecorderState.INITIAL && (
/* eslint-disable-next-line @typescript-eslint/no-explicit-any */ /* eslint-disable-next-line @typescript-eslint/no-explicit-any */
<div {...getRootProps({ style: style as any })}> <div {...getRootProps({ style: style as any })}>
<input {...getInputProps()} /> <input {...getInputProps()} className="file-input" />
<p className="text-slate-700 text-lg"> <p className="text-slate-700 text-lg">
Drag & drop a screenshot here, <br /> Drag & drop a screenshot here, <br />
or click to upload or click to upload

View File

@ -38,7 +38,9 @@ function ImportCodeSection({ importFromCode }: Props) {
return ( return (
<Dialog> <Dialog>
<DialogTrigger asChild> <DialogTrigger asChild>
<Button variant="secondary">Import from Code</Button> <Button className="import-from-code-btn" variant="secondary">
Import from Code
</Button>
</DialogTrigger> </DialogTrigger>
<DialogContent className="sm:max-w-[425px]"> <DialogContent className="sm:max-w-[425px]">
<DialogHeader> <DialogHeader>
@ -62,7 +64,7 @@ function ImportCodeSection({ importFromCode }: Props) {
/> />
<DialogFooter> <DialogFooter>
<Button type="submit" onClick={doImport}> <Button className="import-btn" type="submit" onClick={doImport}>
Import Import
</Button> </Button>
</DialogFooter> </DialogFooter>

View File

@ -1,21 +1,35 @@
import { useEffect, useRef } from "react"; import { useEffect, useRef, useState } from "react";
import classNames from "classnames"; import classNames from "classnames";
import useThrottle from "../hooks/useThrottle"; import useThrottle from "../hooks/useThrottle";
import EditPopup from "./select-and-edit/EditPopup";
interface Props { interface Props {
code: string; code: string;
device: "mobile" | "desktop"; device: "mobile" | "desktop";
doUpdate: (updateInstruction: string, selectedElement?: HTMLElement) => void;
} }
function Preview({ code, device }: Props) { function Preview({ code, device, doUpdate }: Props) {
const iframeRef = useRef<HTMLIFrameElement | null>(null); const iframeRef = useRef<HTMLIFrameElement | null>(null);
// Don't update code more often than every 200ms. // Don't update code more often than every 200ms.
const throttledCode = useThrottle(code, 200); const throttledCode = useThrottle(code, 200);
// Select and edit functionality
const [clickEvent, setClickEvent] = useState<MouseEvent | null>(null);
useEffect(() => { useEffect(() => {
if (iframeRef.current) { const iframe = iframeRef.current;
iframeRef.current.srcdoc = throttledCode; if (iframe) {
iframe.srcdoc = throttledCode;
// Set up click handler for select and edit funtionality
iframe.addEventListener("load", function () {
iframe.contentWindow?.document.body.addEventListener(
"click",
setClickEvent
);
});
} }
}, [throttledCode]); }, [throttledCode]);
@ -34,6 +48,7 @@ function Preview({ code, device }: Props) {
} }
)} )}
></iframe> ></iframe>
<EditPopup event={clickEvent} iframeRef={iframeRef} doUpdate={doUpdate} />
</div> </div>
); );
} }

View File

@ -49,7 +49,7 @@ function SettingsDialog({ settings, setSettings }: Props) {
<div className="flex items-center space-x-2"> <div className="flex items-center space-x-2">
<Label htmlFor="image-generation"> <Label htmlFor="image-generation">
<div>DALL-E Placeholder Image Generation</div> <div>DALL-E Placeholder Image Generation</div>
<div className="font-light mt-2"> <div className="font-light mt-2 text-xs">
More fun with it but if you want to save money, turn it off. More fun with it but if you want to save money, turn it off.
</div> </div>
</Label> </Label>
@ -64,10 +64,11 @@ function SettingsDialog({ settings, setSettings }: Props) {
} }
/> />
</div> </div>
<div className="flex flex-col space-y-4"> <div className="flex flex-col space-y-6">
<div>
<Label htmlFor="openai-api-key"> <Label htmlFor="openai-api-key">
<div>OpenAI API key</div> <div>OpenAI API key</div>
<div className="font-light mt-2 leading-relaxed"> <div className="font-light mt-1 mb-2 text-xs leading-relaxed">
Only stored in your browser. Never stored on servers. Overrides Only stored in your browser. Never stored on servers. Overrides
your .env config. your .env config.
</div> </div>
@ -84,9 +85,10 @@ function SettingsDialog({ settings, setSettings }: Props) {
})) }))
} }
/> />
</div>
{!IS_RUNNING_ON_CLOUD && ( {!IS_RUNNING_ON_CLOUD && (
<> <div>
<Label htmlFor="openai-api-key"> <Label htmlFor="openai-api-key">
<div>OpenAI Base URL (optional)</div> <div>OpenAI Base URL (optional)</div>
<div className="font-light mt-2 leading-relaxed"> <div className="font-light mt-2 leading-relaxed">
@ -105,9 +107,31 @@ function SettingsDialog({ settings, setSettings }: Props) {
})) }))
} }
/> />
</> </div>
)} )}
<div>
<Label htmlFor="anthropic-api-key">
<div>Anthropic API key</div>
<div className="font-light mt-1 text-xs leading-relaxed">
Only stored in your browser. Never stored on servers. Overrides
your .env config.
</div>
</Label>
<Input
id="anthropic-api-key"
placeholder="Anthropic API key"
value={settings.anthropicApiKey || ""}
onChange={(e) =>
setSettings((s) => ({
...s,
anthropicApiKey: e.target.value,
}))
}
/>
</div>
<Accordion type="single" collapsible className="w-full"> <Accordion type="single" collapsible className="w-full">
<AccordionItem value="item-1"> <AccordionItem value="item-1">
<AccordionTrigger>Screenshot by URL Config</AccordionTrigger> <AccordionTrigger>Screenshot by URL Config</AccordionTrigger>

View File

@ -69,7 +69,7 @@ export function UrlInputSection({ doCreate, screenshotOneApiKey }: Props) {
<Button <Button
onClick={takeScreenshot} onClick={takeScreenshot}
disabled={isLoading} disabled={isLoading}
className="bg-slate-400" className="bg-slate-400 capture-btn"
> >
{isLoading ? "Capturing..." : "Capture"} {isLoading ? "Capturing..." : "Capture"}
</Button> </Button>

View File

@ -1,4 +1,3 @@
import { expect, test } from "vitest";
import { extractHistoryTree, renderHistory } from "./utils"; import { extractHistoryTree, renderHistory } from "./utils";
import type { History } from "./history_types"; import type { History } from "./history_types";
@ -84,6 +83,7 @@ const basicBadHistory: History = [
}, },
]; ];
describe("History Utils", () => {
test("should correctly extract the history tree", () => { test("should correctly extract the history tree", () => {
expect(extractHistoryTree(basicLinearHistory, 2)).toEqual([ expect(extractHistoryTree(basicLinearHistory, 2)).toEqual([
"<html>1. create</html>", "<html>1. create</html>",
@ -228,3 +228,4 @@ test("should correctly render the history tree", () => {
}, },
]); ]);
}); });
});

View File

@ -0,0 +1,143 @@
import React, { useEffect, useRef, useState } from "react";
import { Textarea } from "../ui/textarea";
import { Button } from "../ui/button";
import { addHighlight, getAdjustedCoordinates, removeHighlight } from "./utils";
import { useAppStore } from "../../store/app-store";
interface EditPopupProps {
event: MouseEvent | null;
iframeRef: React.RefObject<HTMLIFrameElement>;
doUpdate: (updateInstruction: string, selectedElement?: HTMLElement) => void;
}
const EditPopup: React.FC<EditPopupProps> = ({
event,
iframeRef,
doUpdate,
}) => {
// App state
const { inSelectAndEditMode } = useAppStore();
// Create a wrapper ref to store inSelectAndEditMode so the value is not stale
// in a event listener
const inSelectAndEditModeRef = useRef(inSelectAndEditMode);
// Update the ref whenever the state changes
useEffect(() => {
inSelectAndEditModeRef.current = inSelectAndEditMode;
}, [inSelectAndEditMode]);
// Popup state
const [popupVisible, setPopupVisible] = useState(false);
const [popupPosition, setPopupPosition] = useState({ x: 0, y: 0 });
// Edit state
const [selectedElement, setSelectedElement] = useState<
HTMLElement | undefined
>(undefined);
const [updateText, setUpdateText] = useState("");
// Textarea ref for focusing
const textareaRef = useRef<HTMLTextAreaElement | null>(null);
function onUpdate(updateText: string) {
// Perform the update
doUpdate(
updateText,
selectedElement ? removeHighlight(selectedElement) : selectedElement
);
// Unselect the element
setSelectedElement(undefined);
// Hide the popup
setPopupVisible(false);
}
// Remove highlight and reset state when not in select and edit mode
useEffect(() => {
if (!inSelectAndEditMode) {
if (selectedElement) removeHighlight(selectedElement);
setSelectedElement(undefined);
setPopupVisible(false);
}
}, [inSelectAndEditMode, selectedElement]);
// Handle the click event
useEffect(() => {
// Return if not in select and edit mode
if (!inSelectAndEditModeRef.current || !event) {
return;
}
// Prevent default to avoid issues like label clicks triggering textareas, etc.
event.preventDefault();
const targetElement = event.target as HTMLElement;
// Return if no target element
if (!targetElement) return;
// Highlight and set the selected element
setSelectedElement((prev) => {
// Remove style from previous element
if (prev) {
removeHighlight(prev);
}
return addHighlight(targetElement);
});
// Calculate adjusted coordinates
const adjustedCoordinates = getAdjustedCoordinates(
event.clientX,
event.clientY,
iframeRef.current?.getBoundingClientRect()
);
// Show the popup at the click position
setPopupVisible(true);
setPopupPosition({ x: adjustedCoordinates.x, y: adjustedCoordinates.y });
// Reset the update text
setUpdateText("");
// Focus the textarea
textareaRef.current?.focus();
}, [event, iframeRef]);
// Focus the textarea when the popup is visible (we can't do this only when handling the click event
// because the textarea is not rendered yet)
// We need to also do it in the click event because popupVisible doesn't change values in that event
useEffect(() => {
if (popupVisible) {
textareaRef.current?.focus();
}
}, [popupVisible]);
if (!popupVisible) return;
return (
<div
className="absolute bg-white p-4 border border-gray-300 rounded shadow-lg w-60"
style={{ top: popupPosition.y, left: popupPosition.x }}
>
<Textarea
ref={textareaRef}
value={updateText}
onChange={(e) => setUpdateText(e.target.value)}
placeholder="Tell the AI what to change about this element..."
onKeyDown={(e) => {
if (e.key === "Enter") {
e.preventDefault();
onUpdate(updateText);
}
}}
/>
<div className="flex justify-end mt-2">
<Button onClick={() => onUpdate(updateText)}>Update</Button>
</div>
</div>
);
};
export default EditPopup;

View File

@ -0,0 +1,22 @@
import { GiClick } from "react-icons/gi";
import { useAppStore } from "../../store/app-store";
import { Button } from "../ui/button";
function SelectAndEditModeToggleButton() {
const { inSelectAndEditMode, toggleInSelectAndEditMode } = useAppStore();
return (
<Button
onClick={toggleInSelectAndEditMode}
className="flex items-center gap-x-2 dark:text-white dark:bg-gray-700 regenerate-btn"
variant={inSelectAndEditMode ? "destructive" : "default"}
>
<GiClick className="text-lg" />
<span>
{inSelectAndEditMode ? "Exit selection mode" : "Select and update"}
</span>
</Button>
);
}
export default SelectAndEditModeToggleButton;

View File

@ -0,0 +1,22 @@
export function removeHighlight(element: HTMLElement) {
element.style.outline = "";
element.style.backgroundColor = "";
return element;
}
export function addHighlight(element: HTMLElement) {
element.style.outline = "2px dashed #1846db";
element.style.backgroundColor = "#bfcbf5";
return element;
}
export function getAdjustedCoordinates(
x: number,
y: number,
rect: DOMRect | undefined
) {
const offsetX = rect ? rect.left : 0;
const offsetY = rect ? rect.top : 0;
return { x: x + offsetX, y: y + offsetY };
}

View File

@ -4,6 +4,7 @@ export enum CodeGenerationModel {
CLAUDE_3_5_SONNET = "claude_3_5_sonnet", CLAUDE_3_5_SONNET = "claude_3_5_sonnet",
CLAUDE_3_SONNET = "claude_3_sonnet", CLAUDE_3_SONNET = "claude_3_sonnet",
GPT_4O_2024_05_13 = "gpt-4o-2024-05-13", GPT_4O_2024_05_13 = "gpt-4o-2024-05-13",
CLAUDE_3_5_SONNET_2024_06_20 = "claude-3-5-sonnet-20240620",
GPT_4_TURBO_2024_04_09 = "gpt-4-turbo-2024-04-09", GPT_4_TURBO_2024_04_09 = "gpt-4-turbo-2024-04-09",
GPT_4_VISION = "gpt_4_vision", GPT_4_VISION = "gpt_4_vision",
} }
@ -13,6 +14,7 @@ export const CODE_GENERATION_MODEL_DESCRIPTIONS: {
[key in CodeGenerationModel]: { name: string; inBeta: boolean }; [key in CodeGenerationModel]: { name: string; inBeta: boolean };
} = { } = {
"gpt-4o-2024-05-13": { name: "GPT-4o 🌟", inBeta: false }, "gpt-4o-2024-05-13": { name: "GPT-4o 🌟", inBeta: false },
"claude-3-5-sonnet-20240620": { name: "Claude 3.5 Sonnet 🌟", inBeta: false },
"gpt-4-turbo-2024-04-09": { name: "GPT-4 Turbo (Apr 2024)", inBeta: false }, "gpt-4-turbo-2024-04-09": { name: "GPT-4 Turbo (Apr 2024)", inBeta: false },
gpt_4_vision: { name: "GPT-4 Vision (Nov 2023)", inBeta: false }, gpt_4_vision: { name: "GPT-4 Vision (Nov 2023)", inBeta: false },
claude_3_sonnet: { name: "Claude 3 Sonnet", inBeta: false }, claude_3_sonnet: { name: "Claude 3 Sonnet", inBeta: false },

View File

@ -0,0 +1,3 @@
// So jest test runner can read env vars from .env file
import { config } from "dotenv";
config({ path: ".env.jest" });

View File

@ -0,0 +1,15 @@
import { create } from "zustand";
// Store for app-wide state
interface AppStore {
inSelectAndEditMode: boolean;
toggleInSelectAndEditMode: () => void;
disableInSelectAndEditMode: () => void;
}
export const useAppStore = create<AppStore>((set) => ({
inSelectAndEditMode: false,
toggleInSelectAndEditMode: () =>
set((state) => ({ inSelectAndEditMode: !state.inSelectAndEditMode })),
disableInSelectAndEditMode: () => set({ inSelectAndEditMode: false }),
}));

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 35 KiB

View File

@ -0,0 +1,274 @@
import puppeteer, { Browser, Page, ElementHandle } from "puppeteer";
import { Stack } from "../lib/stacks";
import { CodeGenerationModel } from "../lib/models";
const TESTS_ROOT_PATH = process.env.TEST_ROOT_PATH;
// Fixtures
const FIXTURES_PATH = `${TESTS_ROOT_PATH}/fixtures`;
const SIMPLE_SCREENSHOT = FIXTURES_PATH + "/simple_button.png";
const SCREENSHOT_WITH_IMAGES = `${FIXTURES_PATH}/simple_ui_with_image.png`;
// Results
const RESULTS_DIR = `${TESTS_ROOT_PATH}/results`;
describe("e2e tests", () => {
let browser: Browser;
let page: Page;
const DEBUG = false;
const IS_HEADLESS = true;
const stacks = Object.values(Stack).slice(0, DEBUG ? 1 : undefined);
const models = Object.values(CodeGenerationModel).slice(
0,
DEBUG ? 1 : undefined
);
beforeAll(async () => {
browser = await puppeteer.launch({ headless: IS_HEADLESS });
page = await browser.newPage();
await page.goto("http://localhost:5173/");
// Set screen size
await page.setViewport({ width: 1080, height: 1024 });
// TODO: Does this need to be moved?
// const client = await page.createCDPSession();
// Set download behavior path
// await client.send("Page.setDownloadBehavior", {
// behavior: "allow",
// downloadPath: DOWNLOAD_PATH,
// });
});
afterAll(async () => {
await browser.close();
});
// Create tests
models.forEach((model) => {
stacks.forEach((stack) => {
it(
`Create for : ${model} & ${stack}`,
async () => {
const app = new App(
page,
stack,
model,
`create_screenshot_${model}_${stack}`
);
await app.init();
// Generate from screenshot
await app.uploadImage(SCREENSHOT_WITH_IMAGES);
},
60 * 1000
);
it(
`Create from URL for : ${model} & ${stack}`,
async () => {
const app = new App(
page,
stack,
model,
`create_url_${model}_${stack}`
);
await app.init();
// Generate from screenshot
await app.generateFromUrl("https://a.picoapps.xyz/design-fear");
},
60 * 1000
);
});
});
// Update tests - for every model (doesnt need to be repeated for each stack - fix to HTML Tailwind only)
models.forEach((model) => {
["html_tailwind"].forEach((stack) => {
it(
`update: ${model}`,
async () => {
const app = new App(page, stack, model, `update_${model}_${stack}`);
await app.init();
// Generate from screenshot
await app.uploadImage(SIMPLE_SCREENSHOT);
// Regenerate works for v1
await app.regenerate();
// Make an update
await app.edit("make the button background blue", "v2");
// Make another update
await app.edit("make the text italic", "v3");
// Branch off v2 and make an update
await app.clickVersion("v2");
await app.edit("make the text yellow", "v4");
},
90 * 1000
);
});
});
// Start from code tests - for every model
models.forEach((model) => {
["html_tailwind"].forEach((stack) => {
it.skip(
`Start from code: ${model}`,
async () => {
const app = new App(
page,
stack,
model,
`start_from_code_${model}_${stack}`
);
await app.init();
await app.importFromCode();
// Regenerate works for v1
// await app.regenerate();
// // Make an update
// await app.edit("make the header blue", "v2");
// // Make another update
// await app.edit("make all text italic", "v3");
// // Branch off v2 and make an update
// await app.clickVersion("v2");
// await app.edit("make all text red", "v4");
},
90 * 1000
);
});
});
});
class App {
private screenshotPathPrefix: string;
private page: Page;
private stack: string;
private model: string;
constructor(page: Page, stack: string, model: string, testId: string) {
this.page = page;
this.stack = stack;
this.model = model;
this.screenshotPathPrefix = `${RESULTS_DIR}/${testId}`;
}
async init() {
await this.setupLocalStorage();
}
async setupLocalStorage() {
const setting = {
openAiApiKey: null,
openAiBaseURL: null,
screenshotOneApiKey: process.env.TEST_SCREENSHOTONE_API_KEY,
isImageGenerationEnabled: true,
editorTheme: "cobalt",
generatedCodeConfig: this.stack,
codeGenerationModel: this.model,
isTermOfServiceAccepted: false,
accessCode: null,
};
await this.page.evaluate((setting) => {
localStorage.setItem("setting", JSON.stringify(setting));
}, setting);
// Reload the page to apply the local storage
await this.page.reload();
}
async _screenshot(step: string) {
await this.page.screenshot({
path: `${this.screenshotPathPrefix}_${step}.png`,
});
}
async _waitUntilVersionIsReady(version: string) {
await this.page.waitForNetworkIdle();
await this.page.waitForFunction(
(version) => document.body.innerText.includes(version),
{
timeout: 30000,
},
version
);
// Wait for 3s so that the HTML and JS has time to render before screenshotting
await new Promise((resolve) => setTimeout(resolve, 3000));
}
async generateFromUrl(url: string) {
// Type in the URL
await this.page.type('input[placeholder="Enter URL"]', url);
await this._screenshot("typed_url");
// Click the capture button and wait for the code to be generated
await this.page.click("button.capture-btn");
await this._waitUntilVersionIsReady("v1");
await this._screenshot("url_result");
}
// Uploads a screenshot and generates the image
async uploadImage(screenshotPath: string) {
// Upload file
const fileInput = (await this.page.$(
".file-input"
)) as ElementHandle<HTMLInputElement>;
if (!fileInput) {
throw new Error("File input element not found");
}
await fileInput.uploadFile(screenshotPath);
await this._screenshot("image_uploaded");
// Click the generate button and wait for the code to be generated
await this._waitUntilVersionIsReady("v1");
await this._screenshot("image_results");
}
// Makes a text edit and waits for a new version
async edit(edit: string, version: string) {
// Type in the edit
await this.page.type(
'textarea[placeholder="Tell the AI what to change..."]',
edit
);
await this._screenshot(`typed_${version}`);
// Click the update button and wait for the code to be generated
await this.page.click(".update-btn");
await this._waitUntilVersionIsReady(version);
await this._screenshot(`done_${version}`);
}
async clickVersion(version: string) {
await this.page.evaluate((version) => {
document.querySelectorAll("div").forEach((div) => {
if (div.innerText.includes(version)) {
div.click();
}
});
}, version);
}
async regenerate() {
await this.page.click(".regenerate-btn");
await this._waitUntilVersionIsReady("v1");
await this._screenshot("regenerate_results");
}
// Work in progress
async importFromCode() {
await this.page.click(".import-from-code-btn");
await this.page.type("textarea", "<html>hello world</html>");
await this.page.select("#output-settings-js", "HTML + Tailwind");
await this._screenshot("typed_code");
await this.page.click(".import-btn");
await this._waitUntilVersionIsReady("v1");
}
}

View File

@ -16,6 +16,7 @@ export interface Settings {
codeGenerationModel: CodeGenerationModel; codeGenerationModel: CodeGenerationModel;
// Only relevant for hosted version // Only relevant for hosted version
isTermOfServiceAccepted: boolean; isTermOfServiceAccepted: boolean;
anthropicApiKey: string | null; // Added property for anthropic API key
} }
export enum AppState { export enum AppState {

File diff suppressed because it is too large Load Diff