Merge branch 'main' into main

This commit is contained in:
xia ning 2024-06-11 09:43:03 +08:00 committed by GitHub
commit 0b3b492c98
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
36 changed files with 3111 additions and 313 deletions

1
.github/FUNDING.yml vendored Normal file
View File

@ -0,0 +1 @@
github: [abi]

21
.github/ISSUE_TEMPLATE/bug_report.md vendored Normal file
View File

@ -0,0 +1,21 @@
---
name: Bug report
about: Create a report to help us improve
title: ''
labels: ''
assignees: ''
---
**Describe the bug**
A clear and concise description of what the bug is.
**To Reproduce**
Steps to reproduce the behavior:
1. Go to '...'
2. Click on '....'
3. Scroll down to '....'
4. See error
**Screenshots of backend AND frontend terminal logs**
If applicable, add screenshots to help explain your problem.

10
.github/ISSUE_TEMPLATE/custom.md vendored Normal file
View File

@ -0,0 +1,10 @@
---
name: Custom issue template
about: Describe this issue template's purpose here.
title: ''
labels: ''
assignees: ''
---

View File

@ -0,0 +1,20 @@
---
name: Feature request
about: Suggest an idea for this project
title: ''
labels: ''
assignees: ''
---
**Is your feature request related to a problem? Please describe.**
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
**Describe the solution you'd like**
A clear and concise description of what you want to happen.
**Describe alternatives you've considered**
A clear and concise description of any alternative solutions or features you've considered.
**Additional context**
Add any other context or screenshots about the feature request here.

View File

@ -1,6 +1,6 @@
# screenshot-to-code
A simple tool to convert screenshots, mockups and Figma designs into clean, functional code using AI.
A simple tool to convert screenshots, mockups and Figma designs into clean, functional code using AI. **Now supporting GPT-4O!**
https://github.com/abi/screenshot-to-code/assets/23818/6cebadae-2fe3-4986-ac6a-8fb9db030045
@ -15,9 +15,10 @@ Supported stacks:
Supported AI models:
- GPT-4 Turbo (Apr 2024) - Best model
- GPT-4 Vision (Nov 2023) - Good model that's better than GPT-4 Turbo on some inputs
- Claude 3 Sonnet - Faster, and on par or better than GPT-4 vision for many inputs
- GPT-4O - Best model!
- GPT-4 Turbo (Apr 2024)
- GPT-4 Vision (Nov 2023)
- Claude 3 Sonnet
- DALL-E 3 for image generation
See the [Examples](#-examples) section below for more demos.
@ -30,13 +31,22 @@ We also just added experimental support for taking a video/screen recording of a
[Follow me on Twitter for updates](https://twitter.com/_abi_).
## 🚀 Try It Out without no install
## Sponsors
<a href="https://konghq.com/products/kong-konnect/register?utm_medium=referral&utm_source=github&utm_campaign=platform&utm_content=screenshot-to-code" target="_blank" title="Kong - powering the API world"><img src="https://picoapps.xyz/s2c-sponsors/Kong-GitHub-240x100.png"></a>
## 🚀 Hosted Version
[Try it live on the hosted version (paid)](https://screenshottocode.com).
## 🛠 Getting Started
The app has a React/Vite frontend and a FastAPI backend. You will need an OpenAI API key with access to the GPT-4 Vision API or an Anthropic key if you want to use Claude Sonnet, or for experimental video support.
The app has a React/Vite frontend and a FastAPI backend.
Keys needed:
* [OpenAI API key with access to GPT-4](https://github.com/abi/screenshot-to-code/blob/main/Troubleshooting.md)
* Anthropic key (optional) - only if you want to use Claude Sonnet, or for experimental video support.
Run the backend (I use Poetry for package management - `pip install poetry` if you don't have it):
@ -48,7 +58,7 @@ poetry shell
poetry run uvicorn main:app --reload --port 7001
```
If you want to use Anthropic, add the `ANTHROPIC_API_KEY` to `backend/.env` with your API key from Anthropic.
If you want to use Anthropic, add `ANTHROPIC_API_KEY` to `backend/.env`. You can also set up the keys using the settings dialog on the front-end (click the gear icon after loading the frontend).
Run the frontend:
@ -107,5 +117,3 @@ https://github.com/abi/screenshot-to-code/assets/23818/3fec0f77-44e8-4fb3-a769-a
## 🌍 Hosted Version
🆕 [Try it here (paid)](https://screenshottocode.com). Or see [Getting Started](#-getting-started) for local install instructions to use with your own API keys.
[!["Buy Me A Coffee"](https://www.buymeacoffee.com/assets/img/custom_images/orange_img.png)](https://www.buymeacoffee.com/abiraja)

View File

@ -1,4 +1,4 @@
### Getting an OpenAI API key with GPT4-Vision model access
### Getting an OpenAI API key with GPT-4 model access
You don't need a ChatGPT Pro account. Screenshot to code uses API keys from your OpenAI developer account. In order to get access to the GPT4 Vision model, log into your OpenAI account and then, follow these instructions:

View File

@ -5,7 +5,7 @@ from openai import AsyncOpenAI
from bs4 import BeautifulSoup
async def process_tasks(prompts: List[str], api_key: str, base_url: str):
async def process_tasks(prompts: List[str], api_key: str, base_url: str | None):
tasks = [generate_image(prompt, api_key, base_url) for prompt in prompts]
results = await asyncio.gather(*tasks, return_exceptions=True)
@ -15,22 +15,23 @@ async def process_tasks(prompts: List[str], api_key: str, base_url: str):
print(f"An exception occurred: {result}")
processed_results.append(None)
else:
processed_results.append(result) # type: ignore
processed_results.append(result)
return processed_results
async def generate_image(prompt: str, api_key: str, base_url: str):
async def generate_image(
prompt: str, api_key: str, base_url: str | None
) -> Union[str, None]:
client = AsyncOpenAI(api_key=api_key, base_url=base_url)
image_params: Dict[str, Union[str, int]] = {
"model": "dall-e-3",
"quality": "standard",
"style": "natural",
"n": 1,
"size": "1024x1024",
"prompt": prompt,
}
res = await client.images.generate(**image_params) # type: ignore
res = await client.images.generate(
model="dall-e-3",
quality="standard",
style="natural",
n=1,
size="1024x1024",
prompt=prompt,
)
await client.close()
return res.data[0].url
@ -63,13 +64,13 @@ def create_alt_url_mapping(code: str) -> Dict[str, str]:
async def generate_images(
code: str, api_key: str, base_url: Union[str, None], image_cache: Dict[str, str]
):
) -> str:
# Find all images
soup = BeautifulSoup(code, "html.parser")
images = soup.find_all("img")
# Extract alt texts as image prompts
alts = []
alts: List[str | None] = []
for img in images:
# Only include URL if the image starts with https://placehold.co
# and it's not already in the image_cache
@ -77,26 +78,26 @@ async def generate_images(
img["src"].startswith("https://placehold.co")
and image_cache.get(img.get("alt")) is None
):
alts.append(img.get("alt", None)) # type: ignore
alts.append(img.get("alt", None))
# Exclude images with no alt text
alts = [alt for alt in alts if alt is not None] # type: ignore
filtered_alts: List[str] = [alt for alt in alts if alt is not None]
# Remove duplicates
prompts = list(set(alts)) # type: ignore
prompts = list(set(filtered_alts))
# Return early if there are no images to replace
if len(prompts) == 0: # type: ignore
if len(prompts) == 0:
return code
# Generate images
results = await process_tasks(prompts, api_key, base_url) # type: ignore
results = await process_tasks(prompts, api_key, base_url)
# Create a dict mapping alt text to image URL
mapped_image_urls = dict(zip(prompts, results)) # type: ignore
mapped_image_urls = dict(zip(prompts, results))
# Merge with image_cache
mapped_image_urls = {**mapped_image_urls, **image_cache} # type: ignore
mapped_image_urls = {**mapped_image_urls, **image_cache}
# Replace old image URLs with the generated URLs
for img in images:

View File

@ -16,6 +16,7 @@ from utils import pprint_prompt
class Llm(Enum):
GPT_4_VISION = "gpt-4-vision-preview"
GPT_4_TURBO_2024_04_09 = "gpt-4-turbo-2024-04-09"
GPT_4O_2024_05_13 = "gpt-4o-2024-05-13"
CLAUDE_3_SONNET = "claude-3-sonnet-20240229"
CLAUDE_3_SONNET_BEDROCK = "anthropic.claude-3-sonnet-20240229-v1:0"
CLAUDE_3_OPUS = "claude-3-opus-20240229"
@ -51,13 +52,18 @@ async def stream_openai_response(
}
# Add 'max_tokens' only if the model is a GPT4 vision or Turbo model
if model == Llm.GPT_4_VISION or model == Llm.GPT_4_TURBO_2024_04_09:
if (
model == Llm.GPT_4_VISION
or model == Llm.GPT_4_TURBO_2024_04_09
or model == Llm.GPT_4O_2024_05_13
):
params["max_tokens"] = 4096
stream = await client.chat.completions.create(**params) # type: ignore
full_response = ""
async for chunk in stream: # type: ignore
assert isinstance(chunk, ChatCompletionChunk)
if chunk.choices and len(chunk.choices) > 0 and chunk.choices[0].delta and chunk.choices[0].delta.content:
content = chunk.choices[0].delta.content or ""
full_response += content
await callback(content)

View File

@ -7,10 +7,13 @@ from evals.config import EVALS_DIR
router = APIRouter()
# Update this if the number of outputs generated per input changes
N = 1
class Eval(BaseModel):
input: str
output: str
outputs: list[str]
@router.get("/evals")
@ -25,21 +28,27 @@ async def get_evals():
input_file_path = os.path.join(input_dir, file)
input_file = await image_to_data_url(input_file_path)
# Construct the corresponding output file name
output_file_name = file.replace(".png", ".html")
output_file_path = os.path.join(output_dir, output_file_name)
# Construct the corresponding output file names
output_file_names = [
file.replace(".png", f"_{i}.html") for i in range(0, N)
] # Assuming 3 outputs for each input
output_files_data: list[str] = []
for output_file_name in output_file_names:
output_file_path = os.path.join(output_dir, output_file_name)
# Check if the output file exists
if os.path.exists(output_file_path):
with open(output_file_path, "r") as f:
output_file_data = f.read()
output_files_data.append(f.read())
else:
output_file_data = "Output file not found."
output_files_data.append(
"<html><h1>Output file not found.</h1></html>"
)
evals.append(
Eval(
input=input_file,
output=output_file_data,
outputs=output_files_data,
)
)

View File

@ -13,7 +13,7 @@ from llm import (
)
from openai.types.chat import ChatCompletionMessageParam
from mock_llm import mock_completion
from typing import Dict, List, cast, get_args
from typing import Dict, List, Union, cast, get_args
from image_generation import create_alt_url_mapping, generate_images
from prompts import assemble_imported_code_prompt, assemble_prompt
from datetime import datetime
@ -84,7 +84,7 @@ async def stream_code(websocket: WebSocket):
# Read the model from the request. Fall back to default if not provided.
code_generation_model_str = params.get(
"codeGenerationModel", Llm.GPT_4_VISION.value
"codeGenerationModel", Llm.GPT_4O_2024_05_13.value
)
try:
code_generation_model = convert_frontend_str_to_llm(code_generation_model_str)
@ -111,6 +111,7 @@ async def stream_code(websocket: WebSocket):
if not openai_api_key and (
code_generation_model == Llm.GPT_4_VISION
or code_generation_model == Llm.GPT_4_TURBO_2024_04_09
or code_generation_model == Llm.GPT_4O_2024_05_13
):
print("OpenAI API key not found")
await throw_error(
@ -118,8 +119,19 @@ async def stream_code(websocket: WebSocket):
)
return
# Get the Anthropic API key from the request. Fall back to environment variable if not provided.
# If neither is provided, we throw an error later only if Claude is used.
anthropic_api_key = None
if "anthropicApiKey" in params and params["anthropicApiKey"]:
anthropic_api_key = params["anthropicApiKey"]
print("Using Anthropic API key from client-side settings dialog")
else:
anthropic_api_key = ANTHROPIC_API_KEY
if anthropic_api_key:
print("Using Anthropic API key from environment variable")
# Get the OpenAI Base URL from the request. Fall back to environment variable if not provided.
openai_base_url = None
openai_base_url: Union[str, None] = None
# Disable user-specified OpenAI Base URL in prod
if not os.environ.get("IS_PROD"):
if "openAiBaseURL" in params and params["openAiBaseURL"]:
@ -217,17 +229,17 @@ async def stream_code(websocket: WebSocket):
else:
try:
if validated_input_mode == "video":
if not ANTHROPIC_API_KEY and not AWS_ACCESS_KEY and not AWS_SECRET_ACCESS_KEY:
if not anthropic_api_key and not AWS_ACCESS_KEY and not AWS_SECRET_ACCESS_KEY:
await throw_error(
"Video only works with Anthropic models. Neither Anthropic API key or AWS Access Key found. Please add the environment variable ANTHROPIC_API_KEY or AWS_ACCESS_KEY/AWS_SECRET_ACCESS_KEY to backend/.env"
"Video only works with Anthropic models. Neither Anthropic API key or AWS Access Key found. Please add the environment variable ANTHROPIC_API_KEY or AWS_ACCESS_KEY/AWS_SECRET_ACCESS_KEY to backend/.env or in the settings dialog"
)
raise Exception("No Anthropic key")
if ANTHROPIC_API_KEY:
if anthropic_api_key:
completion = await stream_claude_response_native(
system_prompt=VIDEO_PROMPT,
messages=prompt_messages, # type: ignore
api_key=ANTHROPIC_API_KEY,
api_key=anthropic_api_key,
callback=lambda x: process_chunk(x),
model=Llm.CLAUDE_3_OPUS,
include_thinking=True,
@ -245,15 +257,15 @@ async def stream_code(websocket: WebSocket):
)
exact_llm_version = Llm.CLAUDE_3_OPUS
elif code_generation_model == Llm.CLAUDE_3_SONNET:
if not ANTHROPIC_API_KEY and not AWS_ACCESS_KEY and not AWS_SECRET_ACCESS_KEY:
if not anthropic_api_key and not AWS_ACCESS_KEY and not AWS_SECRET_ACCESS_KEY:
await throw_error(
"No Anthropic API key or AWS Access Key found. Please add the environment variable ANTHROPIC_API_KEY or AWS_ACCESS_KEY/AWS_SECRET_ACCESS_KEY to backend/.env"
"No Anthropic API key or AWS Access Key found. Please add the environment variable ANTHROPIC_API_KEY or AWS_ACCESS_KEY/AWS_SECRET_ACCESS_KEY to backend/.env or in the settings dialog"
)
raise Exception("No Anthropic key")
if ANTHROPIC_API_KEY:
if anthropic_api_key:
completion = await stream_claude_response(
prompt_messages, # type: ignore
api_key=ANTHROPIC_API_KEY,
api_key=anthropic_api_key,
callback=lambda x: process_chunk(x),
)
else:

View File

@ -13,8 +13,9 @@ from evals.config import EVALS_DIR
from evals.core import generate_code_core
from evals.utils import image_to_data_url
STACK = "html_tailwind"
MODEL = Llm.CLAUDE_3_SONNET
STACK = "ionic_tailwind"
MODEL = Llm.GPT_4O_2024_05_13
N = 1 # Number of outputs to generate
async def main():
@ -28,6 +29,7 @@ async def main():
for filename in evals:
filepath = os.path.join(INPUT_DIR, filename)
data_url = await image_to_data_url(filepath)
for _ in range(N): # Generate N tasks for each input
task = generate_code_core(image_url=data_url, stack=STACK, model=MODEL)
tasks.append(task)
@ -35,9 +37,13 @@ async def main():
os.makedirs(OUTPUT_DIR, exist_ok=True)
for filename, content in zip(evals, results):
# File name is derived from the original filename in evals
output_filename = f"{os.path.splitext(filename)[0]}.html"
for i, content in enumerate(results):
# Calculate index for filename and output number
eval_index = i // N
output_number = i % N
filename = evals[eval_index]
# File name is derived from the original filename in evals with an added output number
output_filename = f"{os.path.splitext(filename)[0]}_{output_number}.html"
output_filepath = os.path.join(OUTPUT_DIR, output_filename)
with open(output_filepath, "w") as file:
file.write(content)

View File

@ -24,6 +24,11 @@ class TestConvertFrontendStrToLlm(unittest.TestCase):
Llm.GPT_4_TURBO_2024_04_09,
"Should convert 'gpt-4-turbo-2024-04-09' to Llm.GPT_4_TURBO_2024_04_09",
)
self.assertEqual(
convert_frontend_str_to_llm("gpt-4o-2024-05-13"),
Llm.GPT_4O_2024_05_13,
"Should convert 'gpt-4o-2024-05-13' to Llm.GPT_4O_2024_05_13",
)
def test_convert_invalid_string_raises_exception(self):
with self.assertRaises(ValueError):

3
frontend/.gitignore vendored
View File

@ -25,3 +25,6 @@ dist-ssr
# Env files
.env*
# Test files
src/tests/results/

View File

@ -1,4 +1,4 @@
FROM node:20.9-bullseye-slim
FROM node:22-bullseye-slim
# Set the working directory in the container
WORKDIR /app
@ -6,6 +6,9 @@ WORKDIR /app
# Copy package.json and yarn.lock
COPY package.json yarn.lock /app/
# Set the environment variable to skip Puppeteer download
ENV PUPPETEER_SKIP_DOWNLOAD=true
# Install dependencies
RUN yarn install

9
frontend/jest.config.js Normal file
View File

@ -0,0 +1,9 @@
export default {
preset: "ts-jest",
testEnvironment: "node",
setupFiles: ["<rootDir>/src/setupTests.ts"],
transform: {
"^.+\\.tsx?$": "ts-jest",
},
testTimeout: 30000,
};

View File

@ -10,7 +10,7 @@
"build-hosted": "tsc && vite build --mode prod",
"lint": "eslint . --ext ts,tsx --report-unused-disable-directives --max-warnings 0",
"preview": "vite preview",
"test": "vitest"
"test": "jest"
},
"dependencies": {
"@codemirror/lang-html": "^6.4.6",
@ -46,21 +46,28 @@
"tailwindcss-animate": "^1.0.7",
"thememirror": "^2.0.1",
"vite-plugin-checker": "^0.6.2",
"webm-duration-fix": "^1.0.4"
"webm-duration-fix": "^1.0.4",
"zustand": "^4.5.2"
},
"devDependencies": {
"@types/jest": "^29.5.12",
"@types/node": "^20.9.0",
"@types/puppeteer": "^7.0.4",
"@types/react": "^18.2.15",
"@types/react-dom": "^18.2.7",
"@typescript-eslint/eslint-plugin": "^6.0.0",
"@typescript-eslint/parser": "^6.0.0",
"@vitejs/plugin-react": "^4.0.3",
"autoprefixer": "^10.4.16",
"dotenv": "^16.4.5",
"eslint": "^8.45.0",
"eslint-plugin-react-hooks": "^4.6.0",
"eslint-plugin-react-refresh": "^0.4.3",
"jest": "^29.7.0",
"postcss": "^8.4.31",
"puppeteer": "^22.6.4",
"tailwindcss": "^3.3.5",
"ts-jest": "^29.1.2",
"typescript": "^5.0.2",
"vite": "^4.4.5",
"vite-plugin-html": "^3.2.0",

View File

@ -0,0 +1,2 @@
TEST_SCREENSHOTONE_API_KEY=
TEST_ROOT_PATH=

View File

@ -40,6 +40,8 @@ import ModelSettingsSection from "./components/ModelSettingsSection";
import { extractHtml } from "./components/preview/extractHtml";
import useBrowserTabIndicator from "./hooks/useBrowserTabIndicator";
import TipLink from "./components/core/TipLink";
import SelectAndEditModeToggleButton from "./components/select-and-edit/SelectAndEditModeToggleButton";
import { useAppStore } from "./store/app-store";
const IS_OPENAI_DOWN = false;
@ -54,16 +56,19 @@ function App() {
const [updateInstruction, setUpdateInstruction] = useState("");
const [isImportedFromCode, setIsImportedFromCode] = useState<boolean>(false);
const { disableInSelectAndEditMode } = useAppStore();
// Settings
const [settings, setSettings] = usePersistedState<Settings>(
{
openAiApiKey: null,
openAiBaseURL: null,
anthropicApiKey: null,
screenshotOneApiKey: null,
isImageGenerationEnabled: true,
editorTheme: EditorTheme.COBALT,
generatedCodeConfig: Stack.HTML_TAILWIND,
codeGenerationModel: CodeGenerationModel.GPT_4_TURBO_2024_04_09,
codeGenerationModel: CodeGenerationModel.GPT_4O_2024_05_13,
// Only relevant for hosted version
isTermOfServiceAccepted: false,
},
@ -89,6 +94,14 @@ function App() {
CodeGenerationModel.GPT_4_TURBO_2024_04_09 &&
settings.generatedCodeConfig === Stack.REACT_TAILWIND;
const showGpt4OMessage =
selectedCodeGenerationModel !== CodeGenerationModel.GPT_4O_2024_05_13 &&
appState === AppState.INITIAL;
const showSelectAndEditFeature =
selectedCodeGenerationModel === CodeGenerationModel.GPT_4O_2024_05_13 &&
settings.generatedCodeConfig === Stack.HTML_TAILWIND;
// Indicate coding state using the browser tab's favicon and title
useBrowserTabIndicator(appState === AppState.CODING);
@ -144,6 +157,7 @@ function App() {
setAppHistory([]);
setCurrentVersion(null);
setShouldIncludeResultImage(false);
disableInSelectAndEditMode();
};
const regenerate = () => {
@ -232,7 +246,9 @@ function App() {
parentIndex: parentVersion,
code,
inputs: {
prompt: updateInstruction,
prompt: params.history
? params.history[params.history.length - 1]
: updateInstruction,
},
},
];
@ -274,7 +290,10 @@ function App() {
}
// Subsequent updates
async function doUpdate() {
async function doUpdate(
updateInstruction: string,
selectedElement?: HTMLElement
) {
if (currentVersion === null) {
toast.error(
"No current version set. Contact support or open a Github issue."
@ -292,7 +311,17 @@ function App() {
return;
}
const updatedHistory = [...historyTree, updateInstruction];
let modifiedUpdateInstruction = updateInstruction;
// Send in a reference to the selected element if it exists
if (selectedElement) {
modifiedUpdateInstruction =
updateInstruction +
" referring to this element specifically: " +
selectedElement.outerHTML;
}
const updatedHistory = [...historyTree, modifiedUpdateInstruction];
if (shouldIncludeResultImage) {
const resultImage = await takeScreenshot();
@ -403,6 +432,15 @@ function App() {
</div>
)}
{showGpt4OMessage && (
<div className="rounded-lg p-2 bg-fuchsia-200">
<p className="text-gray-800 text-sm">
Now supporting GPT-4o. Higher quality and 2x faster. Give it a
try!
</p>
</div>
)}
{appState !== AppState.CODE_READY && <TipLink />}
{IS_RUNNING_ON_CLOUD && !settings.openAiApiKey && <OnboardingNote />}
@ -468,8 +506,8 @@ function App() {
/>
</div>
<Button
onClick={doUpdate}
className="dark:text-white dark:bg-gray-700"
onClick={() => doUpdate(updateInstruction)}
className="dark:text-white dark:bg-gray-700 update-btn"
>
Update
</Button>
@ -477,10 +515,13 @@ function App() {
<div className="flex items-center justify-end gap-x-2 mt-2">
<Button
onClick={regenerate}
className="flex items-center gap-x-2 dark:text-white dark:bg-gray-700"
className="flex items-center gap-x-2 dark:text-white dark:bg-gray-700 regenerate-btn"
>
🔄 Regenerate
</Button>
{showSelectAndEditFeature && (
<SelectAndEditModeToggleButton />
)}
</div>
<div className="flex justify-end items-center mt-2">
<TipLink />
@ -586,7 +627,7 @@ function App() {
<Button
onClick={downloadCode}
variant="secondary"
className="flex items-center gap-x-2 mr-4 dark:text-white dark:bg-gray-700"
className="flex items-center gap-x-2 mr-4 dark:text-white dark:bg-gray-700 download-btn"
>
<FaDownload /> Download
</Button>
@ -609,10 +650,18 @@ function App() {
</div>
</div>
<TabsContent value="desktop">
<Preview code={previewCode} device="desktop" />
<Preview
code={previewCode}
device="desktop"
doUpdate={doUpdate}
/>
</TabsContent>
<TabsContent value="mobile">
<Preview code={previewCode} device="mobile" />
<Preview
code={previewCode}
device="mobile"
doUpdate={doUpdate}
/>
</TabsContent>
<TabsContent value="code">
<CodeTab

View File

@ -166,7 +166,7 @@ function ImageUpload({ setReferenceImages }: Props) {
{screenRecorderState === ScreenRecorderState.INITIAL && (
/* eslint-disable-next-line @typescript-eslint/no-explicit-any */
<div {...getRootProps({ style: style as any })}>
<input {...getInputProps()} />
<input {...getInputProps()} className="file-input" />
<p className="text-slate-700 text-lg">
Drag & drop a screenshot here, <br />
or click to upload

View File

@ -38,7 +38,9 @@ function ImportCodeSection({ importFromCode }: Props) {
return (
<Dialog>
<DialogTrigger asChild>
<Button variant="secondary">Import from Code</Button>
<Button className="import-from-code-btn" variant="secondary">
Import from Code
</Button>
</DialogTrigger>
<DialogContent className="sm:max-w-[425px]">
<DialogHeader>
@ -62,7 +64,7 @@ function ImportCodeSection({ importFromCode }: Props) {
/>
<DialogFooter>
<Button type="submit" onClick={doImport}>
<Button className="import-btn" type="submit" onClick={doImport}>
Import
</Button>
</DialogFooter>

View File

@ -1,21 +1,35 @@
import { useEffect, useRef } from "react";
import { useEffect, useRef, useState } from "react";
import classNames from "classnames";
import useThrottle from "../hooks/useThrottle";
import EditPopup from "./select-and-edit/EditPopup";
interface Props {
code: string;
device: "mobile" | "desktop";
doUpdate: (updateInstruction: string, selectedElement?: HTMLElement) => void;
}
function Preview({ code, device }: Props) {
function Preview({ code, device, doUpdate }: Props) {
const iframeRef = useRef<HTMLIFrameElement | null>(null);
// Don't update code more often than every 200ms.
const throttledCode = useThrottle(code, 200);
// Select and edit functionality
const [clickEvent, setClickEvent] = useState<MouseEvent | null>(null);
useEffect(() => {
if (iframeRef.current) {
iframeRef.current.srcdoc = throttledCode;
const iframe = iframeRef.current;
if (iframe) {
iframe.srcdoc = throttledCode;
// Set up click handler for select and edit funtionality
iframe.addEventListener("load", function () {
iframe.contentWindow?.document.body.addEventListener(
"click",
setClickEvent
);
});
}
}, [throttledCode]);
@ -34,6 +48,7 @@ function Preview({ code, device }: Props) {
}
)}
></iframe>
<EditPopup event={clickEvent} iframeRef={iframeRef} doUpdate={doUpdate} />
</div>
);
}

View File

@ -49,7 +49,7 @@ function SettingsDialog({ settings, setSettings }: Props) {
<div className="flex items-center space-x-2">
<Label htmlFor="image-generation">
<div>DALL-E Placeholder Image Generation</div>
<div className="font-light mt-2">
<div className="font-light mt-2 text-xs">
More fun with it but if you want to save money, turn it off.
</div>
</Label>
@ -64,10 +64,11 @@ function SettingsDialog({ settings, setSettings }: Props) {
}
/>
</div>
<div className="flex flex-col space-y-4">
<div className="flex flex-col space-y-6">
<div>
<Label htmlFor="openai-api-key">
<div>OpenAI API key</div>
<div className="font-light mt-2 leading-relaxed">
<div className="font-light mt-1 mb-2 text-xs leading-relaxed">
Only stored in your browser. Never stored on servers. Overrides
your .env config.
</div>
@ -84,9 +85,10 @@ function SettingsDialog({ settings, setSettings }: Props) {
}))
}
/>
</div>
{!IS_RUNNING_ON_CLOUD && (
<>
<div>
<Label htmlFor="openai-api-key">
<div>OpenAI Base URL (optional)</div>
<div className="font-light mt-2 leading-relaxed">
@ -105,9 +107,31 @@ function SettingsDialog({ settings, setSettings }: Props) {
}))
}
/>
</>
</div>
)}
<div>
<Label htmlFor="anthropic-api-key">
<div>Anthropic API key</div>
<div className="font-light mt-1 text-xs leading-relaxed">
Only stored in your browser. Never stored on servers. Overrides
your .env config.
</div>
</Label>
<Input
id="anthropic-api-key"
placeholder="Anthropic API key"
value={settings.anthropicApiKey || ""}
onChange={(e) =>
setSettings((s) => ({
...s,
anthropicApiKey: e.target.value,
}))
}
/>
</div>
<Accordion type="single" collapsible className="w-full">
<AccordionItem value="item-1">
<AccordionTrigger>Screenshot by URL Config</AccordionTrigger>

View File

@ -69,7 +69,7 @@ export function UrlInputSection({ doCreate, screenshotOneApiKey }: Props) {
<Button
onClick={takeScreenshot}
disabled={isLoading}
className="bg-slate-400"
className="bg-slate-400 capture-btn"
>
{isLoading ? "Capturing..." : "Capture"}
</Button>

View File

@ -4,7 +4,7 @@ import RatingPicker from "./RatingPicker";
interface Eval {
input: string;
output: string;
outputs: string[];
}
function EvalsPage() {
@ -38,18 +38,22 @@ function EvalsPage() {
<div className="flex flex-col gap-y-4 mt-4 mx-auto justify-center">
{evals.map((e, index) => (
<div className="flex flex-col justify-center" key={index}>
<div className="flex gap-x-2 justify-center">
<h2 className="font-bold text-lg ml-4">{index}</h2>
<div className="flex gap-x-2 justify-center ml-4">
{/* Update w if N changes to a fixed number like w-[600px] */}
<div className="w-1/2 p-1 border">
<img src={e.input} />
<img src={e.input} alt={`Input for eval ${index}`} />
</div>
<div className="w-1/2 p-1 border">
{e.outputs.map((output, outputIndex) => (
<div className="w-1/2 p-1 border" key={outputIndex}>
{/* Put output into an iframe */}
<iframe
srcDoc={e.output}
srcDoc={output}
className="w-[1200px] h-[800px] transform scale-[0.60]"
style={{ transformOrigin: "top left" }}
></iframe>
</div>
))}
</div>
<div className="ml-8 mt-4 flex justify-center">
<RatingPicker

View File

@ -1,4 +1,3 @@
import { expect, test } from "vitest";
import { extractHistoryTree, renderHistory } from "./utils";
import type { History } from "./history_types";
@ -84,7 +83,8 @@ const basicBadHistory: History = [
},
];
test("should correctly extract the history tree", () => {
describe("History Utils", () => {
test("should correctly extract the history tree", () => {
expect(extractHistoryTree(basicLinearHistory, 2)).toEqual([
"<html>1. create</html>",
"use better icons",
@ -132,9 +132,9 @@ test("should correctly extract the history tree", () => {
// Bad tree
expect(() => extractHistoryTree(basicBadHistory, 1)).toThrow();
});
});
test("should correctly render the history tree", () => {
test("should correctly render the history tree", () => {
expect(renderHistory(basicLinearHistory, 2)).toEqual([
{
isActive: false,
@ -227,4 +227,5 @@ test("should correctly render the history tree", () => {
type: "Edit",
},
]);
});
});

View File

@ -0,0 +1,143 @@
import React, { useEffect, useRef, useState } from "react";
import { Textarea } from "../ui/textarea";
import { Button } from "../ui/button";
import { addHighlight, getAdjustedCoordinates, removeHighlight } from "./utils";
import { useAppStore } from "../../store/app-store";
interface EditPopupProps {
event: MouseEvent | null;
iframeRef: React.RefObject<HTMLIFrameElement>;
doUpdate: (updateInstruction: string, selectedElement?: HTMLElement) => void;
}
const EditPopup: React.FC<EditPopupProps> = ({
event,
iframeRef,
doUpdate,
}) => {
// App state
const { inSelectAndEditMode } = useAppStore();
// Create a wrapper ref to store inSelectAndEditMode so the value is not stale
// in a event listener
const inSelectAndEditModeRef = useRef(inSelectAndEditMode);
// Update the ref whenever the state changes
useEffect(() => {
inSelectAndEditModeRef.current = inSelectAndEditMode;
}, [inSelectAndEditMode]);
// Popup state
const [popupVisible, setPopupVisible] = useState(false);
const [popupPosition, setPopupPosition] = useState({ x: 0, y: 0 });
// Edit state
const [selectedElement, setSelectedElement] = useState<
HTMLElement | undefined
>(undefined);
const [updateText, setUpdateText] = useState("");
// Textarea ref for focusing
const textareaRef = useRef<HTMLTextAreaElement | null>(null);
function onUpdate(updateText: string) {
// Perform the update
doUpdate(
updateText,
selectedElement ? removeHighlight(selectedElement) : selectedElement
);
// Unselect the element
setSelectedElement(undefined);
// Hide the popup
setPopupVisible(false);
}
// Remove highlight and reset state when not in select and edit mode
useEffect(() => {
if (!inSelectAndEditMode) {
if (selectedElement) removeHighlight(selectedElement);
setSelectedElement(undefined);
setPopupVisible(false);
}
}, [inSelectAndEditMode, selectedElement]);
// Handle the click event
useEffect(() => {
// Return if not in select and edit mode
if (!inSelectAndEditModeRef.current || !event) {
return;
}
// Prevent default to avoid issues like label clicks triggering textareas, etc.
event.preventDefault();
const targetElement = event.target as HTMLElement;
// Return if no target element
if (!targetElement) return;
// Highlight and set the selected element
setSelectedElement((prev) => {
// Remove style from previous element
if (prev) {
removeHighlight(prev);
}
return addHighlight(targetElement);
});
// Calculate adjusted coordinates
const adjustedCoordinates = getAdjustedCoordinates(
event.clientX,
event.clientY,
iframeRef.current?.getBoundingClientRect()
);
// Show the popup at the click position
setPopupVisible(true);
setPopupPosition({ x: adjustedCoordinates.x, y: adjustedCoordinates.y });
// Reset the update text
setUpdateText("");
// Focus the textarea
textareaRef.current?.focus();
}, [event, iframeRef]);
// Focus the textarea when the popup is visible (we can't do this only when handling the click event
// because the textarea is not rendered yet)
// We need to also do it in the click event because popupVisible doesn't change values in that event
useEffect(() => {
if (popupVisible) {
textareaRef.current?.focus();
}
}, [popupVisible]);
if (!popupVisible) return;
return (
<div
className="absolute bg-white p-4 border border-gray-300 rounded shadow-lg w-60"
style={{ top: popupPosition.y, left: popupPosition.x }}
>
<Textarea
ref={textareaRef}
value={updateText}
onChange={(e) => setUpdateText(e.target.value)}
placeholder="Tell the AI what to change about this element..."
onKeyDown={(e) => {
if (e.key === "Enter") {
e.preventDefault();
onUpdate(updateText);
}
}}
/>
<div className="flex justify-end mt-2">
<Button onClick={() => onUpdate(updateText)}>Update</Button>
</div>
</div>
);
};
export default EditPopup;

View File

@ -0,0 +1,22 @@
import { GiClick } from "react-icons/gi";
import { useAppStore } from "../../store/app-store";
import { Button } from "../ui/button";
function SelectAndEditModeToggleButton() {
const { inSelectAndEditMode, toggleInSelectAndEditMode } = useAppStore();
return (
<Button
onClick={toggleInSelectAndEditMode}
className="flex items-center gap-x-2 dark:text-white dark:bg-gray-700 regenerate-btn"
variant={inSelectAndEditMode ? "destructive" : "default"}
>
<GiClick className="text-lg" />
<span>
{inSelectAndEditMode ? "Exit selection mode" : "Select and update"}
</span>
</Button>
);
}
export default SelectAndEditModeToggleButton;

View File

@ -0,0 +1,22 @@
export function removeHighlight(element: HTMLElement) {
element.style.outline = "";
element.style.backgroundColor = "";
return element;
}
export function addHighlight(element: HTMLElement) {
element.style.outline = "2px dashed #1846db";
element.style.backgroundColor = "#bfcbf5";
return element;
}
export function getAdjustedCoordinates(
x: number,
y: number,
rect: DOMRect | undefined
) {
const offsetX = rect ? rect.left : 0;
const offsetY = rect ? rect.top : 0;
return { x: x + offsetX, y: y + offsetY };
}

View File

@ -1,5 +1,7 @@
// Keep in sync with backend (llm.py)
// Order here matches dropdown order
export enum CodeGenerationModel {
GPT_4O_2024_05_13 = "gpt-4o-2024-05-13",
GPT_4_TURBO_2024_04_09 = "gpt-4-turbo-2024-04-09",
GPT_4_VISION = "gpt_4_vision",
CLAUDE_3_SONNET = "claude_3_sonnet",
@ -9,6 +11,7 @@ export enum CodeGenerationModel {
export const CODE_GENERATION_MODEL_DESCRIPTIONS: {
[key in CodeGenerationModel]: { name: string; inBeta: boolean };
} = {
"gpt-4o-2024-05-13": { name: "GPT-4o 🌟", inBeta: false },
"gpt-4-turbo-2024-04-09": { name: "GPT-4 Turbo (Apr 2024)", inBeta: false },
gpt_4_vision: { name: "GPT-4 Vision (Nov 2023)", inBeta: false },
claude_3_sonnet: { name: "Claude 3 Sonnet", inBeta: false },

View File

@ -0,0 +1,3 @@
// So jest test runner can read env vars from .env file
import { config } from "dotenv";
config({ path: ".env.jest" });

View File

@ -0,0 +1,15 @@
import { create } from "zustand";
// Store for app-wide state
interface AppStore {
inSelectAndEditMode: boolean;
toggleInSelectAndEditMode: () => void;
disableInSelectAndEditMode: () => void;
}
export const useAppStore = create<AppStore>((set) => ({
inSelectAndEditMode: false,
toggleInSelectAndEditMode: () =>
set((state) => ({ inSelectAndEditMode: !state.inSelectAndEditMode })),
disableInSelectAndEditMode: () => set({ inSelectAndEditMode: false }),
}));

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 35 KiB

View File

@ -0,0 +1,274 @@
import puppeteer, { Browser, Page, ElementHandle } from "puppeteer";
import { Stack } from "../lib/stacks";
import { CodeGenerationModel } from "../lib/models";
const TESTS_ROOT_PATH = process.env.TEST_ROOT_PATH;
// Fixtures
const FIXTURES_PATH = `${TESTS_ROOT_PATH}/fixtures`;
const SIMPLE_SCREENSHOT = FIXTURES_PATH + "/simple_button.png";
const SCREENSHOT_WITH_IMAGES = `${FIXTURES_PATH}/simple_ui_with_image.png`;
// Results
const RESULTS_DIR = `${TESTS_ROOT_PATH}/results`;
describe("e2e tests", () => {
let browser: Browser;
let page: Page;
const DEBUG = false;
const IS_HEADLESS = true;
const stacks = Object.values(Stack).slice(0, DEBUG ? 1 : undefined);
const models = Object.values(CodeGenerationModel).slice(
0,
DEBUG ? 1 : undefined
);
beforeAll(async () => {
browser = await puppeteer.launch({ headless: IS_HEADLESS });
page = await browser.newPage();
await page.goto("http://localhost:5173/");
// Set screen size
await page.setViewport({ width: 1080, height: 1024 });
// TODO: Does this need to be moved?
// const client = await page.createCDPSession();
// Set download behavior path
// await client.send("Page.setDownloadBehavior", {
// behavior: "allow",
// downloadPath: DOWNLOAD_PATH,
// });
});
afterAll(async () => {
await browser.close();
});
// Create tests
models.forEach((model) => {
stacks.forEach((stack) => {
it(
`Create for : ${model} & ${stack}`,
async () => {
const app = new App(
page,
stack,
model,
`create_screenshot_${model}_${stack}`
);
await app.init();
// Generate from screenshot
await app.uploadImage(SCREENSHOT_WITH_IMAGES);
},
60 * 1000
);
it(
`Create from URL for : ${model} & ${stack}`,
async () => {
const app = new App(
page,
stack,
model,
`create_url_${model}_${stack}`
);
await app.init();
// Generate from screenshot
await app.generateFromUrl("https://a.picoapps.xyz/design-fear");
},
60 * 1000
);
});
});
// Update tests - for every model (doesnt need to be repeated for each stack - fix to HTML Tailwind only)
models.forEach((model) => {
["html_tailwind"].forEach((stack) => {
it(
`update: ${model}`,
async () => {
const app = new App(page, stack, model, `update_${model}_${stack}`);
await app.init();
// Generate from screenshot
await app.uploadImage(SIMPLE_SCREENSHOT);
// Regenerate works for v1
await app.regenerate();
// Make an update
await app.edit("make the button background blue", "v2");
// Make another update
await app.edit("make the text italic", "v3");
// Branch off v2 and make an update
await app.clickVersion("v2");
await app.edit("make the text yellow", "v4");
},
90 * 1000
);
});
});
// Start from code tests - for every model
models.forEach((model) => {
["html_tailwind"].forEach((stack) => {
it.skip(
`Start from code: ${model}`,
async () => {
const app = new App(
page,
stack,
model,
`start_from_code_${model}_${stack}`
);
await app.init();
await app.importFromCode();
// Regenerate works for v1
// await app.regenerate();
// // Make an update
// await app.edit("make the header blue", "v2");
// // Make another update
// await app.edit("make all text italic", "v3");
// // Branch off v2 and make an update
// await app.clickVersion("v2");
// await app.edit("make all text red", "v4");
},
90 * 1000
);
});
});
});
class App {
private screenshotPathPrefix: string;
private page: Page;
private stack: string;
private model: string;
constructor(page: Page, stack: string, model: string, testId: string) {
this.page = page;
this.stack = stack;
this.model = model;
this.screenshotPathPrefix = `${RESULTS_DIR}/${testId}`;
}
async init() {
await this.setupLocalStorage();
}
async setupLocalStorage() {
const setting = {
openAiApiKey: null,
openAiBaseURL: null,
screenshotOneApiKey: process.env.TEST_SCREENSHOTONE_API_KEY,
isImageGenerationEnabled: true,
editorTheme: "cobalt",
generatedCodeConfig: this.stack,
codeGenerationModel: this.model,
isTermOfServiceAccepted: false,
accessCode: null,
};
await this.page.evaluate((setting) => {
localStorage.setItem("setting", JSON.stringify(setting));
}, setting);
// Reload the page to apply the local storage
await this.page.reload();
}
async _screenshot(step: string) {
await this.page.screenshot({
path: `${this.screenshotPathPrefix}_${step}.png`,
});
}
async _waitUntilVersionIsReady(version: string) {
await this.page.waitForNetworkIdle();
await this.page.waitForFunction(
(version) => document.body.innerText.includes(version),
{
timeout: 30000,
},
version
);
// Wait for 3s so that the HTML and JS has time to render before screenshotting
await new Promise((resolve) => setTimeout(resolve, 3000));
}
async generateFromUrl(url: string) {
// Type in the URL
await this.page.type('input[placeholder="Enter URL"]', url);
await this._screenshot("typed_url");
// Click the capture button and wait for the code to be generated
await this.page.click("button.capture-btn");
await this._waitUntilVersionIsReady("v1");
await this._screenshot("url_result");
}
// Uploads a screenshot and generates the image
async uploadImage(screenshotPath: string) {
// Upload file
const fileInput = (await this.page.$(
".file-input"
)) as ElementHandle<HTMLInputElement>;
if (!fileInput) {
throw new Error("File input element not found");
}
await fileInput.uploadFile(screenshotPath);
await this._screenshot("image_uploaded");
// Click the generate button and wait for the code to be generated
await this._waitUntilVersionIsReady("v1");
await this._screenshot("image_results");
}
// Makes a text edit and waits for a new version
async edit(edit: string, version: string) {
// Type in the edit
await this.page.type(
'textarea[placeholder="Tell the AI what to change..."]',
edit
);
await this._screenshot(`typed_${version}`);
// Click the update button and wait for the code to be generated
await this.page.click(".update-btn");
await this._waitUntilVersionIsReady(version);
await this._screenshot(`done_${version}`);
}
async clickVersion(version: string) {
await this.page.evaluate((version) => {
document.querySelectorAll("div").forEach((div) => {
if (div.innerText.includes(version)) {
div.click();
}
});
}, version);
}
async regenerate() {
await this.page.click(".regenerate-btn");
await this._waitUntilVersionIsReady("v1");
await this._screenshot("regenerate_results");
}
// Work in progress
async importFromCode() {
await this.page.click(".import-from-code-btn");
await this.page.type("textarea", "<html>hello world</html>");
await this.page.select("#output-settings-js", "HTML + Tailwind");
await this._screenshot("typed_code");
await this.page.click(".import-btn");
await this._waitUntilVersionIsReady("v1");
}
}

View File

@ -16,6 +16,7 @@ export interface Settings {
codeGenerationModel: CodeGenerationModel;
// Only relevant for hosted version
isTermOfServiceAccepted: boolean;
anthropicApiKey: string | null; // Added property for anthropic API key
}
export enum AppState {

File diff suppressed because it is too large Load Diff