Compare commits

..

316 Commits

Author SHA1 Message Date
Abi Raja
a53afd350d update sdxl-lightning references to flux 2024-09-20 16:45:16 +02:00
Abi Raja
6899c7792e when all generations fail, print the all the underlying exceptions for debugging 2024-09-20 13:56:20 +02:00
Abi Raja
9199fee21d keep generating even if one of the models fails 2024-09-17 15:56:35 +02:00
Abi Raja
8717298def switch to flux schnell for replicate image gen 2024-09-17 12:20:19 +02:00
Abi Raja
a4087e613f make variant 1 claude 2024-09-16 15:21:57 +02:00
Abi Raja
cf6b94d675 update favicon/branding 2024-09-14 17:44:57 +02:00
Abi Raja
589507846b remove model selection dropdown since that happens on the background now 2024-09-13 14:41:00 +02:00
Abi Raja
24995c302e
Merge pull request #394 from abi/multiple-generations
Support multiple variants for each generation
2024-09-06 11:43:44 -04:00
Abi Raja
e4b021c2a4 remove debugging statement 2024-09-06 17:41:19 +02:00
Abi Raja
b4a9bbd9da fix unit tests 2024-08-31 17:22:18 +02:00
Abi Raja
0db8a206f2 disable variants for video 2024-08-31 15:47:44 +02:00
Abi Raja
c1fc28624d remove unused code 2024-08-31 15:43:02 +02:00
Abi Raja
c55e5d7923 remove unused code 2024-08-30 16:32:20 +02:00
Abi Raja
67ce707c3c
Update README.md 2024-08-30 08:47:27 -04:00
Abi Raja
5d33dd6bf3 fix TODO 2024-08-30 14:07:30 +02:00
Abi Raja
67e7066221 fix some TODOs 2024-08-30 13:45:28 +02:00
Abi Raja
e67239745c fix type error 2024-08-30 13:32:59 +02:00
Abi Raja
ce6a0622d0 fix TODO 2024-08-30 13:19:24 +02:00
Abi Raja
6542289931 fix some TODOs 2024-08-29 18:43:40 +02:00
Abi Raja
0ff42d9083 make commits immutable after a new commit is added 2024-08-29 17:57:52 +02:00
Abi Raja
f09b1c3c7f remove comment 2024-08-22 16:56:12 -04:00
Abi Raja
bf3e4eecb2 throw exceptions so Sentry can capture them 2024-08-22 16:55:39 -04:00
Abi Raja
939d4eebf8 nit 2024-08-22 16:42:51 -04:00
Abi Raja
a243480d78 streamline createCommit helper 2024-08-22 16:38:39 -04:00
Abi Raja
30d5cd2b65 move commit code into one file 2024-08-22 16:27:42 -04:00
Abi Raja
637f75b93e clean up history rendering 2024-08-22 15:20:20 -04:00
Abi Raja
13f93e259d clean up HistoryDisplay a little bit 2024-08-22 13:59:25 -04:00
Abi Raja
c900201417 support import from code 2024-08-22 13:51:40 -04:00
Abi Raja
089f4302d2 cleaner resets with resetHead 2024-08-22 13:32:12 -04:00
Abi Raja
8e8f0b4b64 intermediate changes towards multiple generations 2024-08-22 13:26:42 -04:00
Abi Raja
5f6dd08411 reset inputMode when resetting state 2024-08-05 16:17:48 -04:00
Abi Raja
b158597d7e fix bug with prompt assembly for imported code with Claude which disallows multiple user messages in a row 2024-08-05 14:11:01 -04:00
Abi Raja
fb5480b036 fix type error 2024-07-31 16:05:16 -04:00
Abi Raja
c76c7c202a move parameter extraction to separate fn 2024-07-31 15:46:53 -04:00
Abi Raja
823bd2e249 hide execution console 2024-07-31 14:44:33 -04:00
Abi Raja
710f2987a4 merge 2024-07-31 14:40:52 -04:00
Abi Raja
5c3f915bce parallelize just image generation 2024-07-31 13:36:22 -04:00
Abi Raja
701d97ec74 add comments 2024-07-31 11:27:26 -04:00
Abi Raja
7b2e2963ad print for debugging 2024-07-31 11:25:49 -04:00
Abi Raja
637c1b4c1d fix TODO 2024-07-31 11:22:25 -04:00
Abi Raja
c61a2ac772 fix TODO 2024-07-31 11:18:35 -04:00
Abi Raja
bcb89a3c23 refactor 2024-07-31 11:15:56 -04:00
Abi Raja
3591588e2b abstract out prompt assembly into a separate function 2024-07-31 11:07:30 -04:00
Abi Raja
dd7a51dd34 refactor 2024-07-31 10:16:03 -04:00
Abi Raja
3fbc0f9458 refactor 2024-07-31 10:15:07 -04:00
Abi Raja
ff12790883 refactor 2024-07-31 10:14:23 -04:00
Abi Raja
0f731598dd refactor to get .get() 2024-07-31 10:10:13 -04:00
Abi Raja
64dd7d6279 refactor 2024-07-31 10:04:04 -04:00
Abi Raja
96658819f3 fix issue with loading variants 2024-07-31 09:53:21 -04:00
Abi Raja
24a123db36 refactors 2024-07-30 16:37:36 -04:00
Abi Raja
64926408b0 refactor 2024-07-30 16:29:06 -04:00
Abi Raja
0700de7767 standardize to using typed send_message 2024-07-30 16:27:04 -04:00
Abi Raja
46c480931a make execution console show logs from both variants 2024-07-30 16:23:53 -04:00
Abi Raja
f52ca306a5 fix bug with handling setCode 2024-07-30 16:09:02 -04:00
Abi Raja
aff9352dc0 set up multiple generations 2024-07-30 15:44:48 -04:00
Abi Raja
4b0adc5769 poll more frequently and print timing logs 2024-07-29 16:33:04 -04:00
Abi Raja
52099e0853 create a eval script for sdxl lightning (replicate) vs dalle and support using replicate for image generation when key is set 2024-07-29 16:23:59 -04:00
Abi Raja
d7ab620e0b deep copy messages to avoid modifying the original list in the Claude LLM call 2024-07-26 11:56:21 -04:00
Abi Raja
21f553a0d8 add more comments 2024-07-25 15:20:06 -04:00
Abi Raja
7af2e34415 add more comments 2024-07-25 15:15:33 -04:00
Abi Raja
59b031e395 add comments and rename some code 2024-07-25 15:03:43 -04:00
Abi Raja
88f4e81b96 update comment 2024-07-25 14:59:13 -04:00
Abi Raja
40c0f52075 abstract out takeScreenshot 2024-07-25 14:58:18 -04:00
Abi Raja
7eac0741e0 hide execution console 2024-07-25 14:51:19 -04:00
Abi Raja
9b8ec5b67c abstract into GenerationSettings 2024-07-25 14:50:37 -04:00
Abi Raja
c72484a72c move files into folders 2024-07-25 14:43:20 -04:00
Abi Raja
0d639b4920 move files around and abstract to DeprecationMessage 2024-07-25 14:40:48 -04:00
Abi Raja
1a3086cc9a move download code to separate file 2024-07-25 14:36:22 -04:00
Abi Raja
993ff88e2b abstract into more components 2024-07-25 14:35:04 -04:00
Abi Raja
deb2375146 isolate history component by using projectStore directly 2024-07-25 12:45:03 -04:00
Abi Raja
dcef298dba also use appHistory 2024-07-25 12:37:06 -04:00
Abi Raja
83f6f00b10 also use currentVersion 2024-07-25 12:33:31 -04:00
Abi Raja
98951e0382 move some state over to zustand store 2024-07-25 12:32:17 -04:00
Abi Raja
fd6a8f779d add comment 2024-07-19 16:39:21 -04:00
Abi Raja
ba0d51a34f remove react warning 2024-07-19 16:11:46 -04:00
Abi Raja
3f6fcdea25 remove unused UI 2024-07-19 16:01:30 -04:00
Abi Raja
54b59c85d6 add eval runner for text prompt 2024-07-19 09:25:11 -04:00
Abi Raja
9d11866143 improve evals code 2024-07-19 07:55:44 -04:00
Abi Raja
9f732c4f5d update max tokens for Claude Sonnet 3.5 to newly supported limit (8192) 2024-07-15 18:51:22 -04:00
Abi Raja
a2d6fda7fd update QA tests to use 4o and Claude Sonnet 2024-07-15 16:33:39 -04:00
Abi Raja
329b8e8e77 update front-end to indicate status of deprecated models 2024-07-11 16:34:48 -04:00
Abi Raja
9b99cad286 auto-upgrade usage of older models 2024-07-11 16:26:26 -04:00
Abi Raja
6c2bd1eb64 rename to KeyboardShortcutBadge 2024-07-11 14:20:50 -04:00
Abi Raja
f6da5eb0d4 add keyboard shortcut badge for update buttons 2024-07-11 14:18:31 -04:00
Abi Raja
9f034f7dcc improve dark mode of select and edit popup 2024-07-11 13:58:17 -04:00
Abi Raja
24a7462c9f support enter to update, shift + enter to create a new line and focus on update textarea after coding 2024-07-11 13:53:49 -04:00
Abi Raja
9ec3adad19 fix bug where a image with dimensions larger than what Claude can accept but under the file size limit Claude can accept was not resized 2024-07-03 10:06:04 -04:00
Abi Raja
d2359f644c for images sent to Claude, resize if dimension exceeds limit 2024-06-27 17:59:15 +08:00
Abi Raja
8e8ad1409f
Merge pull request #227 from fulit103/add-html-css-option
Add HTML/CSS system prompt and update stack descriptions
2024-06-27 16:25:46 +08:00
Abi Raja
606f535bc2 enable Select and Edit for HTML + CSS stack 2024-06-27 16:24:06 +08:00
Abi Raja
38a06517fb add unit tests for the prompt assembly 2024-06-27 16:08:41 +08:00
Abi Raja
638053a54c add HTML + CSS prompt for imported code 2024-06-27 16:02:06 +08:00
Abi Raja
ec1bada3e2 fix type errors introduced with previous changes 2024-06-27 16:01:28 +08:00
Abi Raja
51e06a4c4b fix order and update README 2024-06-27 15:03:42 +08:00
Abi Raja
d4e182cfe0 update name of stack 2024-06-27 14:58:14 +08:00
Abi Raja
0200274e61 Merge branch 'main' into pr/227 2024-06-27 14:44:24 +08:00
Abi Raja
901ce332a5 update default model to Claude 3.5 Sonnet 2024-06-26 16:52:07 +08:00
Abi Raja
edfd16ef1d extract only html content 2024-06-26 16:36:33 +08:00
Abi Raja
89e442423c
Update README.md 2024-06-26 02:37:54 -04:00
Abi Raja
9dfb0dc5b8 do not allow empty updates 2024-06-26 13:39:22 +08:00
Abi Raja
4288cf2088 fix imported code prompt for Claude which doesn't allow multiple 'user' role messages in a row 2024-06-26 13:21:12 +08:00
Abi Raja
6be83b4a2d process images to be under 5mb for Claude 2024-06-25 20:57:41 +08:00
Abi Raja
5843e75760 enable select and edit for Claude 3.5 2024-06-25 06:11:01 +08:00
Abi Raja
6fe7098201
Update README.md 2024-06-24 04:16:29 -04:00
Abi Raja
9a4fbd4591
Merge pull request #358 from moas/patch-1
Fix Python version in dockerfile to avoid TypeError
2024-06-23 05:48:44 -04:00
Abi Raja
f75294ca64 update better model banner to include sonnet 3.5 2024-06-22 19:41:56 +08:00
Abi Raja
49a24d8b0c support Claude Sonnet 3.5 2024-06-22 19:39:49 +08:00
Oscar Landry L
9e2eb0c58c
Update Dockerfile
fix error:
TypeError: ForwardRef._evaluate() missing 1 required keyword-only argument: 'recursive_guard'
2024-06-11 14:58:18 +02:00
Abi Raja
392b9849a2
Merge pull request #355 from abi/select-and-edit
Add "Select and Edit" functionality for easier edits
2024-06-05 20:59:57 -04:00
Abi Raja
f7c9209d18 remove unused code 2024-06-05 20:49:31 -04:00
Abi Raja
1c13897f45 remove dummy state 2024-06-05 20:46:52 -04:00
Abi Raja
402a279eaf revert debugging statements 2024-06-05 20:41:35 -04:00
Abi Raja
465dc64439
Merge pull request #317 from MarcusLeoTKM/nodeversion
Changed node version from 20.9 to 22
2024-06-05 15:47:44 -04:00
Abi Raja
228870f8fc
Merge pull request #353 from naman1608/fix-docker-pupeteer-skip
[FIX] modify frontend dockerfile to skip puppeteer download
2024-06-05 15:26:33 -04:00
Abi Raja
561ac0b088
Merge pull request #348 from naman1608/type-fixes
full typed in image generation file
2024-06-05 15:21:41 -04:00
Abi Raja
7cd9b294c3 fix another type ignore 2024-06-05 15:18:54 -04:00
Abi Raja
8de0e95c54 fix up types 2024-06-05 15:12:47 -04:00
Naman Dhingra
a73b1f6dc3 modify frontend dockerfile to skip puppeteer download 2024-06-05 00:31:21 +05:30
Abi Raja
9fe32ba6e8 move inSelectAndEditMode to inside EditPopup 2024-05-31 21:39:12 -04:00
Abi Raja
e1265b2b75 make enter submit the change 2024-05-31 18:45:45 -04:00
Abi Raja
5f3868f3c9 fix bug with focusing textarea 2024-05-31 18:36:11 -04:00
Abi Raja
423c74bdc9 focus the textarea when the popup is visible 2024-05-31 18:33:42 -04:00
Abi Raja
3fbb8f074a reset should disable select and edit mode 2024-05-31 18:27:47 -04:00
Abi Raja
a7c905a301 update icon and text 2024-05-31 18:22:30 -04:00
Abi Raja
5a62fce52d
Merge pull request #341 from naman1608/handle-none-types
handle none type in llm.py
2024-05-31 15:04:32 -04:00
Abi Raja
b9fb11b6fd Merge branch 'main' into pr/341 2024-05-31 14:44:52 -04:00
Abi Raja
328dc7931d
Update README.md 2024-05-31 14:44:11 -04:00
Abi Raja
2204896e31
Update Troubleshooting.md 2024-05-31 14:40:25 -04:00
Abi Raja
22e45cc566
Merge pull request #329 from abi/add-anthropic-key
Add field for Anthropic API key in settings
2024-05-31 14:35:47 -04:00
Abi Raja
f6f8b2f707 fix bug 2024-05-31 14:24:01 -04:00
Abi Raja
0959441834 Merge branch 'main' into add-anthropic-key 2024-05-31 14:07:50 -04:00
Abi Raja
c64968b8f0 read anthropic api key from front-end 2024-05-31 14:06:08 -04:00
Abi Raja
8e579e425e clean up settings dialog look 2024-05-31 13:51:43 -04:00
Abi Raja
9cfcf928d0 isolate functionality
use zustand to share state rather than prop drilling

isolate more functionality away into a separate component
2024-05-30 10:43:12 -04:00
Abi Raja
9e2cbdce94 Merge branch 'main' into select-and-edit 2024-05-30 09:38:24 -04:00
Abi Raja
9d1fbc9b23 add zustand and create a dummy app store that isn't used yet 2024-05-30 09:38:15 -04:00
Abi Raja
43ae003c83 clean up code even further 2024-05-29 17:28:09 -04:00
Abi Raja
2709312943 move more of the code into EditPopup 2024-05-29 16:53:36 -04:00
Abi Raja
b0e7ae35af clean up more of the code 2024-05-29 16:36:20 -04:00
Abi Raja
f18724104c add for debugging 2024-05-29 15:32:19 -04:00
Abi Raja
82132d57aa improve look of popup 2024-05-29 14:54:50 -04:00
Abi Raja
608ba5cc6f improve x and y position 2024-05-29 14:48:01 -04:00
Abi Raja
97cdb093a5 basic UI for selecting and editing 2024-05-29 14:40:46 -04:00
Abi Raja
939539611f
Update README.md 2024-05-29 11:03:40 -04:00
Naman
f0ee686660 null check for chunk.choices 2024-05-29 13:15:23 +05:30
Naman
c5695975c7 handle none type in llm.py 2024-05-29 13:15:23 +05:30
Naman
d31ebcaa27 types fix in image generation file 2024-05-29 07:11:03 +05:30
Abi Raja
23e631765e
Create FUNDING.yml 2024-05-23 12:19:28 -04:00
Abi Raja
bef9aa831a
Update README.md 2024-05-23 12:00:59 -04:00
Abi Raja
bf38200d2a
Merge pull request #337 from abi/qa-testing
Add e2e tests
2024-05-21 10:47:04 -04:00
Abi Raja
9882447586 add comments 2024-05-21 10:38:33 -04:00
Abi Raja
55a0a6c444 minor fixes 2024-05-20 16:22:41 -04:00
Abi Raja
f01403d480 Merge branch 'main' into qa-testing 2024-05-20 16:11:55 -04:00
Abi Raja
df38041e77 attempt to test import from code 2024-05-20 16:08:33 -04:00
Abi Raja
726ecafd35 fix up update tests 2024-05-20 15:51:19 -04:00
Abi Raja
5c598daa3c add example for jest env 2024-05-20 15:46:30 -04:00
Abi Raja
ce82fc87c4 read fixtures path from env var 2024-05-20 15:44:22 -04:00
Abi Raja
a3104fab1a make env vars work 2024-05-20 15:24:16 -04:00
Abi Raja
9a234264b6 clean up test code 2024-05-20 14:33:49 -04:00
Abi Raja
dbd1ea07ff remove console.log 2024-05-20 13:10:34 -04:00
Abi Raja
992344ac8d add testing for edits 2024-05-17 17:06:51 -04:00
Abi Raja
610400bc49 also iterate through stacks 2024-05-17 16:52:01 -04:00
Abi Raja
7396160833 clean up the code 2024-05-17 16:49:17 -04:00
Abi Raja
72d412fa52 set up a basic puppeteer test 2024-05-17 16:22:45 -04:00
Abi Raja
1f9c5b2c80 set initial value 2024-05-17 14:47:54 -04:00
Abi Raja
ac8198e5ba Add field for Anthropic API key in settings 2024-05-17 12:30:26 -04:00
Abi Raja
1f61d02da6
Update issue templates 2024-05-16 14:05:50 -04:00
Abi Raja
01022abc4a
Update issue templates 2024-05-16 14:05:18 -04:00
Abi Raja
9e3614549c add GPT4o message 2024-05-14 11:48:07 -04:00
Abi Raja
22101ca73d
Update README.md 2024-05-13 16:24:12 -04:00
Abi Raja
0cbc5dbef0
Update README.md 2024-05-13 15:59:47 -04:00
Abi Raja
3c82b6571d
Update README.md 2024-05-13 15:59:18 -04:00
Abi Raja
ffb67d4ce9
Update README.md 2024-05-13 15:58:47 -04:00
Abi Raja
43214bbbf8 add another unit test for new model 2024-05-13 15:26:12 -04:00
Abi Raja
8e6a9c48f8 support GPT-4o 2024-05-13 15:24:47 -04:00
Leo
06a022a8c0 changed node version from 20.9 to 22 2024-04-30 16:32:38 -07:00
Abi Raja
a5fe0960d8 support best of n evals 2024-04-24 14:54:03 -04:00
Abi Raja
f9c4dd9c7c add react + gpt-4 turbo warning 2024-04-18 12:49:24 -04:00
Abi Raja
ba5c0feb22 Merge branch 'main' of https://github.com/abi/screenshot-to-code 2024-04-15 16:46:56 -04:00
Abi Raja
d103c02eba make turbo the default and move to top of list 2024-04-15 16:46:53 -04:00
Abi Raja
a96f8d3249
Update README.md 2024-04-15 16:19:39 -04:00
Abi Raja
911c008fcb Merge branch 'main' of https://github.com/abi/screenshot-to-code 2024-04-15 14:14:26 -04:00
Abi Raja
e3a4cfa7ab add class names for UI testing 2024-04-12 19:57:01 -04:00
Abi Raja
1c864b4dd9
Merge pull request #304 from milseg/patch-1
Fix: step by step API key generation
2024-04-12 10:22:05 -04:00
Milton
2c4450db89
Fix: step by step API key generation
Fix step by step adding a reference to the specific page in which open ai API keys are generated
2024-04-12 00:47:51 -03:00
Abi Raja
bb642b320e improve evaluation docs and the way the model is passed into the evaluation script 2024-04-11 10:52:25 -04:00
Abi Raja
6587b626c5 clean up model strings and add support for GPT-4 Turbo (Apr 2024) 2024-04-11 09:55:55 -04:00
Abi Raja
f65fab073e
Update README.md 2024-04-10 16:51:36 -04:00
Abi Raja
9e1bcae545 remove access code/token functionality 2024-04-10 14:51:15 -04:00
Abi Raja
483d1e2209 handle unknown server error by showing an error message to the user 2024-04-10 13:37:44 -04:00
Abi Raja
29cea327cc improve error UX (particular when no OpenAI API key is found) 2024-04-10 13:20:42 -04:00
Abi Raja
4ef26a6ae8
Update README.md 2024-04-05 14:38:05 -04:00
Abi Raja
64cdc74186 add a regenerate button to retry an iteration 2024-04-04 15:50:33 -04:00
Abi Raja
58a929731f update wording to make it more obvious 2024-04-04 15:38:25 -04:00
Abi Raja
5b6d1c4421
Update README.md 2024-04-04 12:18:41 -04:00
Abi Raja
11cebfad6d
Update Troubleshooting.md 2024-03-30 18:41:15 -04:00
Abi Raja
7b17e4bace
Delete blog/video-to-app.md 2024-03-25 15:13:16 -04:00
Abi Raja
08c4132949 add a link to tips 2024-03-25 14:39:22 -04:00
Abi Raja
9465b6780b Merge branch 'main' of https://github.com/abi/screenshot-to-code 2024-03-25 14:09:17 -04:00
Abi Raja
dc52ff4cce store intermediate artifacts for video for easier debugging 2024-03-25 14:09:13 -04:00
Abi Raja
fc9b2e0530
Update README.md 2024-03-25 11:44:54 -04:00
Abi Raja
04cb502be9
Update README.md 2024-03-22 13:51:23 -04:00
Abi Raja
48d2ae9cfd
Update README.md 2024-03-22 13:43:33 -04:00
Abi Raja
7024057c03 show user a preview of the video and allow them to re-record 2024-03-22 11:15:33 -04:00
Abi Raja
87a44cfb77 show user a preview of the video and allow them to re-record 2024-03-22 11:14:44 -04:00
Abi Raja
6a0201a5c0 fix up mock data 2024-03-22 11:14:15 -04:00
Abi Raja
6069c2a118
Update README.md 2024-03-20 15:54:42 -04:00
Abi Raja
7f44228a95 fix bug where open AI key is required to use it with Anthropic 2024-03-19 13:57:57 -04:00
Abi Raja
62272e3a68 update error message for video 2024-03-19 13:51:30 -04:00
Abi Raja
b57f34d73a close anthropic client after it's done streaming 2024-03-19 12:09:25 -04:00
Abi Raja
212aa228ab fix bug with using enum as string 2024-03-19 10:30:58 -04:00
Abi Raja
81c4fbe28d identify exact llm being used during generation 2024-03-18 17:44:05 -04:00
Abi Raja
4e30b207c1 stop the screen sharing stream after it's recording is done 2024-03-15 13:41:35 -04:00
Abi Raja
1a42f6a91b add haiku claude constant 2024-03-15 13:35:15 -04:00
Abi Raja
cdbb78ea55 add browser tab indicator for coding and update favicon images 2024-03-14 16:53:09 -04:00
Abi Raja
87cf6f4cc2 Merge branch 'main' of https://github.com/abi/screenshot-to-code 2024-03-14 15:59:59 -04:00
Abi Raja
a58c95c8c7 add more mock data 2024-03-14 15:59:57 -04:00
Abi Raja
c08cf0ae57 support screen recording 2024-03-14 15:57:44 -04:00
Abi Raja
214163b0e0
Update README.md 2024-03-08 17:01:42 -05:00
Abi Raja
451f6c399b
Update video-to-app.md 2024-03-08 17:01:25 -05:00
Abi Raja
b69edb7948 add information about Video to app 2024-03-08 14:13:34 -05:00
Abi Raja
f09b4c7808 improve preview for videos by showing the streaming response as it comes in 2024-03-08 12:37:37 -05:00
Abi Raja
cacf78a4bb show preview of videos (only works for .MP4) 2024-03-08 11:21:30 -05:00
Abi Raja
caa63013f5 extract html to show a preview for video mode and switch to .srcdoc with throttling for the preview 2024-03-08 11:16:49 -05:00
Abi Raja
a0f5af0fdc improve mock functionality 2024-03-08 05:02:23 -05:00
Abi Raja
fa58f2ce8f add note when input mode is video 2024-03-08 05:00:48 -05:00
Abi Raja
a907ef37c5 Merge branch 'main' of https://github.com/abi/screenshot-to-code 2024-03-08 04:49:50 -05:00
Abi Raja
3c97a401f8 highlight video feature 2024-03-08 04:49:47 -05:00
Abi Raja
6d283ea9ef
Rename video-to-app to video-to-app.md 2024-03-08 04:47:57 -05:00
Abi Raja
df0f30d403
Create video-to-app 2024-03-08 04:47:36 -05:00
Abi Raja
28d33a4d7e do multiple passes for video claude 2024-03-07 17:31:39 -05:00
Abi Raja
e828feb1ba
Merge pull request #257 from abi/video
Add support for video uploads (with Claude Opus)
2024-03-07 15:07:20 -05:00
Abi Raja
4937a92f42 support video uploads through the interface 2024-03-07 14:59:26 -05:00
Abi Raja
c2f230a8c9 initial version of script 2024-03-06 20:47:28 -05:00
Abi Raja
d8b75c43d2
Update README.md 2024-03-06 07:49:20 -05:00
Abi Raja
cd7cd841ed
Update evaluating-claude.md 2024-03-05 21:52:24 -05:00
Abi Raja
6029a9bec5
Update evaluating-claude.md 2024-03-05 21:48:55 -05:00
Abi Raja
0245f2c359
Update evaluating-claude.md 2024-03-05 21:35:38 -05:00
Abi Raja
f2cb1a4dc3 initial version of blogpost 2024-03-05 20:42:50 -05:00
Abi Raja
123e8ffce2 Add Claude prompts file (not used yet - we use the same prompts for both models) 2024-03-05 20:22:09 -05:00
Abi Raja
647bc2b236 Merge branch 'main' of https://github.com/abi/screenshot-to-code 2024-03-05 14:51:15 -05:00
Abi Raja
4f4d2e36f6 add evaluation description 2024-03-05 14:51:11 -05:00
Abi Raja
4ed4ec1cfe
Update README.md 2024-03-05 14:15:09 -05:00
Abi Raja
c69edeb4b6 send correct media type to Claude 2024-03-05 14:02:48 -05:00
Abi Raja
d281006586 Merge branch 'main' of https://github.com/abi/screenshot-to-code 2024-03-05 13:50:57 -05:00
Abi Raja
3a23d9eae3 add pyright as part of pre-commit (but disable it for now due to failing type check) 2024-03-05 13:50:55 -05:00
Abi Raja
a7e51f6f1a update error message from anthropic 2024-03-05 13:39:49 -05:00
Abi Raja
9e59050961
Update README.md 2024-03-05 13:38:51 -05:00
Abi Raja
4b908d8aae Merge branch 'main' of https://github.com/abi/screenshot-to-code 2024-03-05 13:31:11 -05:00
Abi Raja
0b1bdd4515 Delete sweep.yaml 2024-03-05 13:31:08 -05:00
Abi Raja
788c93f8d7
Update README.md 2024-03-05 12:32:24 -05:00
Abi Raja
ae92121fe9
Merge pull request #210 from Hakeemmidan/patch-1
Update Troubleshooting.md
2024-03-05 12:28:24 -05:00
Abi Raja
7a266adaa7 Merge branch 'main' of https://github.com/abi/screenshot-to-code 2024-03-05 12:25:37 -05:00
Abi Raja
7b62147a58 support Claude 3 Sonnet 2024-03-05 12:24:30 -05:00
Abi Raja
1bc26b616f
Update README.md 2024-03-05 09:44:23 -05:00
Abi Raja
f0ff9b21ba fix vs code settings for python import path 2024-03-04 16:21:04 -05:00
Abi Raja
a731e462ba Merge branch 'main' of https://github.com/abi/screenshot-to-code 2024-03-04 14:58:06 -05:00
Abi Raja
bd407e51f9 code clean up 2024-03-04 14:58:01 -05:00
Abi Raja
072b286b6d
Update README.md 2024-02-28 15:44:40 -05:00
Abi Raja
5e3a174203 test again with fixed test now 2024-02-23 15:33:23 -05:00
Abi Raja
d58d66d7ff test committing a failing test 2024-02-23 15:26:09 -05:00
Abi Raja
27165c0286 fix end of file newlines 2024-02-23 15:08:55 -05:00
Abi Raja
5e2c2b25c3 add pre-commit to run tests before each commit 2024-02-23 15:08:34 -05:00
Abi Raja
c0e084aa86 update evals 2024-02-21 09:30:42 -05:00
Julian Toro
7c1fb7a54d Add HTML/CSS system prompt and update stack descriptions 2024-02-02 10:19:52 -05:00
Abi Raja
34c1f9a959 update openAI key missing - error dialog message 2024-01-10 17:16:00 -08:00
Abi Raja
6d942f3cd7 refactors 2024-01-09 10:01:05 -08:00
Abi Raja
955d1a6a58 fix missing key 2024-01-09 09:58:03 -08:00
Abi Raja
8a431c0d02 add an imported code prompt 2024-01-09 08:29:16 -08:00
Abi Raja
7bc368d9bf update remaining variable names for GeneratedCodeConfig 2024-01-09 08:12:33 -08:00
Abi Raja
3723c81a04 move stack info to a separate file 2024-01-09 06:43:32 -08:00
Abi Raja
a8b562e392 simplify Stack dropdown generation significantly 2024-01-09 06:35:17 -08:00
Abi Raja
b8bce72d23 organize evals code into the evals dir 2024-01-08 17:38:34 -08:00
Abi Raja
aff0ad0b91 Update README.md 2024-01-08 17:06:21 -08:00
Abi Raja
7073879e6e include type checker 2024-01-08 17:04:59 -08:00
Abi Raja
1aeb8c4e14 Type the backend properly to avoid code duplication and ensure type errors when a stack configuration is not properly added 2024-01-08 15:30:53 -08:00
Abi Raja
15dc74a328 improve type checking for stack on backend 2024-01-08 14:55:41 -08:00
Abi Raja
adda6852f3 update gitignore 2024-01-08 14:35:26 -08:00
Abi Raja
0080a5e2c4 better organization of prompts directory 2024-01-08 14:25:00 -08:00
Abi Raja
5912957514
Update README.md 2024-01-08 16:52:38 -05:00
Abi Raja
fa7ffdfaeb add a vue tailwind generation option 2024-01-08 13:50:33 -08:00
Abi Raja
2ab95eb2c0 upgrade uvicorn dependency and add a backend start python script 2024-01-08 09:35:02 -08:00
Hakeem Almidan
8197eec851
Update Troubleshooting.md 2024-01-07 17:09:30 +04:00
Abi Raja
e32877f3a4 add some comments 2023-12-22 12:18:19 -05:00
Abi Raja
9b59b2d595 disable pasting which is causing a flurry of requests 2023-12-21 10:56:47 -05:00
Abi Raja
0c43b5e7cf
Update README.md 2023-12-20 16:46:02 -05:00
Abi Raja
9b728d034b add experimental support for SVG 2023-12-14 10:18:41 -05:00
Abi Raja
f676151edf fix bug with preview not updating immediately 2023-12-14 09:20:06 -05:00
Abi Raja
f1a9859384 fix bug with history not being updated if image generation fails 2023-12-14 08:29:04 -05:00
Abi Raja
9494f4c1cc fix reset function to reset all state correctly 2023-12-14 08:27:16 -05:00
Abi Raja
d23cec9bc0 add a front-end for scoring eval results 2023-12-13 16:38:26 -05:00
Abi Raja
896ac66ac5 add a simple eval script to batch run inferences 2023-12-13 12:50:34 -05:00
Abi Raja
a615f25fd7 improve history UX 2023-12-13 12:20:44 -05:00
Abi Raja
b88320177e Pre-process history for history display so we can write unit tests for it 2023-12-11 21:47:07 -05:00
Abi Raja
9f064c57a7 after code generation cancellation, leave the app in a good state (revert to last version or reset app if no last version) 2023-12-11 18:56:20 -05:00
Abi Raja
89c716f759 update to include new settings 2023-12-11 18:22:36 -05:00
Abi Raja
7edea28eab Merge branch 'main' into pr/170 2023-12-11 18:22:22 -05:00
Abi Raja
28db0a6f4c
Update README.md 2023-12-11 14:36:29 -05:00
Abi Raja
6385c500ec
Create design-docs.md 2023-12-10 17:53:39 -05:00
Abi Raja
bc64da750d
Merge pull request #177 from abi/import-from-code
Allow starting a new project from existing code
2023-12-10 15:58:03 -05:00
Abi Raja
dd65a6e1e4 add tests for the new prompts 2023-12-10 15:49:21 -05:00
Abi Raja
356dd428b8 re-org files and improve tests 2023-12-10 10:50:02 -05:00
Abi Raja
dcb95431ea update prompts for all stack combos of imported code 2023-12-10 10:34:03 -05:00
Abi Raja
e8e3d4cb6f make user set the stack when importing code 2023-12-10 10:22:05 -05:00
Abi Raja
52fee9e49b initial implementation of importing from code 2023-12-09 21:00:18 -05:00
Abi Raja
435402bc85 split main.py into appropriate routes files 2023-12-09 15:46:42 -05:00
Abi Raja
6a28ee2d3c strictly type python backend 2023-12-09 15:34:16 -05:00
Abi Raja
68a8d2788d improve tests and catch history tree generation errors better 2023-12-08 16:48:34 -05:00
Alex Lloyd
c0e68db40f remove WIP GeneratedCodeConfig 2023-12-07 18:25:05 -06:00
Alex Lloyd
4abee41e15 Remove redundant comment 2023-12-07 18:18:39 -06:00
Alex Lloyd
edfafa2c48 Fix parameter type for generateCode 2023-12-07 18:16:31 -06:00
Abi Raja
3bd2321b83 nit: comment fix 2023-12-07 13:34:23 -05:00
Abi Raja
8675d28e96 fix typescript and show badge of type of edit 2023-12-07 13:25:31 -05:00
Abi Raja
b3b478d6d1 add hover card to show full prompt 2023-12-07 13:21:10 -05:00
Abi Raja
dc65f0943a
Update README.md 2023-12-07 11:50:53 -05:00
Abi Raja
889935d619
Merge pull request #169 from abi/history
Show version history and enable branching off history
2023-12-07 11:43:14 -05:00
130 changed files with 10795 additions and 1870 deletions

1
.github/FUNDING.yml vendored Normal file
View File

@ -0,0 +1 @@
github: [abi]

21
.github/ISSUE_TEMPLATE/bug_report.md vendored Normal file
View File

@ -0,0 +1,21 @@
---
name: Bug report
about: Create a report to help us improve
title: ''
labels: ''
assignees: ''
---
**Describe the bug**
A clear and concise description of what the bug is.
**To Reproduce**
Steps to reproduce the behavior:
1. Go to '...'
2. Click on '....'
3. Scroll down to '....'
4. See error
**Screenshots of backend AND frontend terminal logs**
If applicable, add screenshots to help explain your problem.

10
.github/ISSUE_TEMPLATE/custom.md vendored Normal file
View File

@ -0,0 +1,10 @@
---
name: Custom issue template
about: Describe this issue template's purpose here.
title: ''
labels: ''
assignees: ''
---

View File

@ -0,0 +1,20 @@
---
name: Feature request
about: Suggest an idea for this project
title: ''
labels: ''
assignees: ''
---
**Is your feature request related to a problem? Please describe.**
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
**Describe the solution you'd like**
A clear and concise description of what you want to happen.
**Describe alternatives you've considered**
A clear and concise description of any alternative solutions or features you've considered.
**Additional context**
Add any other context or screenshots about the feature request here.

5
.vscode/settings.json vendored Normal file
View File

@ -0,0 +1,5 @@
{
"python.analysis.typeCheckingMode": "strict",
"python.analysis.extraPaths": ["./backend"],
"python.autoComplete.extraPaths": ["./backend"]
}

19
Evaluation.md Normal file
View File

@ -0,0 +1,19 @@
## Evaluating models and prompts
Evaluation dataset consists of 16 screenshots. A Python script for running screenshot-to-code on the dataset and a UI for rating outputs is included. With this set up, we can compare and evaluate various models and prompts.
### Running evals
- Input screenshots should be located at `backend/evals_data/inputs` and the outputs will be `backend/evals_data/outputs`. If you want to modify this, modify `EVALS_DIR` in `backend/evals/config.py`. You can download the input screenshot dataset here: TODO.
- Set a stack and model (`STACK` var, `MODEL` var) in `backend/run_evals.py`
- Run `OPENAI_API_KEY=sk-... python run_evals.py` - this runs the screenshot-to-code on the input dataset in parallel but it will still take a few minutes to complete.
- Once the script is done, you can find the outputs in `backend/evals_data/outputs`.
### Rating evals
In order to view and rate the outputs, visit your front-end at `/evals`.
- Rate each output on a scale of 1-4
- You can also print the page as PDF to share your results with others.
Generally, I run three tests for each model/prompt + stack combo and take the average score out of those tests to evaluate.

View File

@ -1,30 +1,50 @@
# screenshot-to-code # screenshot-to-code
This simple app converts a screenshot to code (HTML/Tailwind CSS, or React or Vue or Bootstrap). It uses GPT-4 Vision to generate the code and DALL-E 3 to generate similar-looking images. You can now also enter a URL to clone a live website! A simple tool to convert screenshots, mockups and Figma designs into clean, functional code using AI. **Now supporting Claude Sonnet 3.5 and GPT-4O!**
https://github.com/abi/screenshot-to-code/assets/23818/6cebadae-2fe3-4986-ac6a-8fb9db030045 https://github.com/abi/screenshot-to-code/assets/23818/6cebadae-2fe3-4986-ac6a-8fb9db030045
Supported stacks:
- HTML + Tailwind
- HTML + CSS
- React + Tailwind
- Vue + Tailwind
- Bootstrap
- Ionic + Tailwind
- SVG
Supported AI models:
- Claude Sonnet 3.5 - Best model!
- GPT-4O - also recommended!
- GPT-4 Turbo (Apr 2024)
- GPT-4 Vision (Nov 2023)
- Claude 3 Sonnet
- DALL-E 3 for image generation
See the [Examples](#-examples) section below for more demos. See the [Examples](#-examples) section below for more demos.
## 🚀 Try It Out! We also just added experimental support for taking a video/screen recording of a website in action and turning that into a functional prototype.
🆕 [Try it here](https://screenshottocode.com) (bring your own OpenAI key - **your key must have access to GPT-4 Vision. See [FAQ](#%EF%B8%8F-faqs) section below for details**). Or see [Getting Started](#-getting-started) below for local install instructions. ![google in app quick 3](https://github.com/abi/screenshot-to-code/assets/23818/8758ffa4-9483-4b9b-bb66-abd6d1594c33)
## 🌟 Recent Updates [Learn more about video here](https://github.com/abi/screenshot-to-code/wiki/Screen-Recording-to-Code).
- Nov 30 - Dark mode, output code in Ionic (thanks [@dialmedu](https://github.com/dialmedu)), set OpenAI base URL [Follow me on Twitter for updates](https://twitter.com/_abi_).
- Nov 28 - 🔥 🔥 🔥 Customize your stack: React or Bootstrap or TailwindCSS
- Nov 23 - Send in a screenshot of the current replicated version (sometimes improves quality of subsequent generations) ## 🚀 Hosted Version
- Nov 21 - Edit code in the code editor and preview changes live thanks to [@clean99](https://github.com/clean99)
- Nov 20 - Paste in a URL to screenshot and clone (requires [ScreenshotOne free API key](https://screenshotone.com?via=screenshot-to-code)) [Try it live on the hosted version (paid)](https://screenshottocode.com).
- Nov 19 - Support for dark/light code editor theme - thanks [@kachbit](https://github.com/kachbit)
- Nov 16 - Added a setting to disable DALL-E image generation if you don't need that
- Nov 16 - View code directly within the app
- Nov 15 - You can now instruct the AI to update the code as you wish. It is helpful if the AI messed up some styles or missed a section.
## 🛠 Getting Started ## 🛠 Getting Started
The app has a React/Vite frontend and a FastAPI backend. You will need an OpenAI API key with access to the GPT-4 Vision API. The app has a React/Vite frontend and a FastAPI backend.
Keys needed:
- [OpenAI API key with access to GPT-4](https://github.com/abi/screenshot-to-code/blob/main/Troubleshooting.md)
- Anthropic key (optional) - only if you want to use Claude Sonnet, or for experimental video support.
Run the backend (I use Poetry for package management - `pip install poetry` if you don't have it): Run the backend (I use Poetry for package management - `pip install poetry` if you don't have it):
@ -36,6 +56,8 @@ poetry shell
poetry run uvicorn main:app --reload --port 7001 poetry run uvicorn main:app --reload --port 7001
``` ```
If you want to use Anthropic, add `ANTHROPIC_API_KEY` to `backend/.env`. You can also set up the keys using the settings dialog on the front-end (click the gear icon after loading the frontend).
Run the frontend: Run the frontend:
```bash ```bash
@ -54,10 +76,6 @@ For debugging purposes, if you don't want to waste GPT4-Vision credits, you can
MOCK=true poetry run uvicorn main:app --reload --port 7001 MOCK=true poetry run uvicorn main:app --reload --port 7001
``` ```
## Configuration
* You can configure the OpenAI base URL if you need to use a proxy: Set OPENAI_BASE_URL in the `backend/.env` or directly in the UI in the settings dialog
## Docker ## Docker
If you have Docker installed on your system, in the root directory, run: If you have Docker installed on your system, in the root directory, run:
@ -73,6 +91,9 @@ The app will be up and running at http://localhost:5173. Note that you can't dev
- **I'm running into an error when setting up the backend. How can I fix it?** [Try this](https://github.com/abi/screenshot-to-code/issues/3#issuecomment-1814777959). If that still doesn't work, open an issue. - **I'm running into an error when setting up the backend. How can I fix it?** [Try this](https://github.com/abi/screenshot-to-code/issues/3#issuecomment-1814777959). If that still doesn't work, open an issue.
- **How do I get an OpenAI API key?** See https://github.com/abi/screenshot-to-code/blob/main/Troubleshooting.md - **How do I get an OpenAI API key?** See https://github.com/abi/screenshot-to-code/blob/main/Troubleshooting.md
- **How can I configure an OpenAI proxy?** - If you're not able to access the OpenAI API directly (due to e.g. country restrictions), you can try a VPN or you can configure the OpenAI base URL to use a proxy: Set OPENAI_BASE_URL in the `backend/.env` or directly in the UI in the settings dialog. Make sure the URL has "v1" in the path so it should look like this: `https://xxx.xxxxx.xxx/v1`
- **How can I update the backend host that my front-end connects to?** - Configure VITE_HTTP_BACKEND_URL and VITE_WS_BACKEND_URL in front/.env.local For example, set VITE_HTTP_BACKEND_URL=http://124.10.20.1:7001
- **Seeing UTF-8 errors when running the backend?** - On windows, open the .env file with notepad++, then go to Encoding and select UTF-8.
- **How can I provide feedback?** For feedback, feature requests and bug reports, open an issue or ping me on [Twitter](https://twitter.com/_abi_). - **How can I provide feedback?** For feedback, feature requests and bug reports, open an issue or ping me on [Twitter](https://twitter.com/_abi_).
## 📚 Examples ## 📚 Examples
@ -93,6 +114,4 @@ https://github.com/abi/screenshot-to-code/assets/23818/3fec0f77-44e8-4fb3-a769-a
## 🌍 Hosted Version ## 🌍 Hosted Version
🆕 [Try it here](https://screenshottocode.com) (bring your own OpenAI key - **your key must have access to GPT-4 Vision. See [FAQ](#%EF%B8%8F-faqs) section for details**). Or see [Getting Started](#-getting-started) for local install instructions. 🆕 [Try it here (paid)](https://screenshottocode.com). Or see [Getting Started](#-getting-started) for local install instructions to use with your own API keys.
[!["Buy Me A Coffee"](https://www.buymeacoffee.com/assets/img/custom_images/orange_img.png)](https://www.buymeacoffee.com/abiraja)

View File

@ -1,17 +1,22 @@
### Getting an OpenAI API key with GPT4-Vision model access ### Getting an OpenAI API key with GPT-4 model access
You don't need a ChatGPT Pro account. Screenshot to code uses API keys from your OpenAI developer account. In order to get access to the GPT4 Vision model, log into your OpenAI account and then, follow these instructions: You don't need a ChatGPT Pro account. Screenshot to code uses API keys from your OpenAI developer account. In order to get access to the GPT4 Vision model, log into your OpenAI account and then, follow these instructions:
1. Open [OpenAI Dashboard](https://platform.openai.com/) 1. Open [OpenAI Dashboard](https://platform.openai.com/)
1. Go to Settings > Billing 1. Go to Settings > Billing
1. Click at the Add payment details 1. Click at the Add payment details
<img width="1030" alt="285636868-c80deb92-ab47-45cd-988f-deee67fbd44d" src="https://github.com/abi/screenshot-to-code/assets/23818/4e0f4b77-9578-4f9a-803c-c12b1502f3d7"> <img width="900" alt="285636868-c80deb92-ab47-45cd-988f-deee67fbd44d" src="https://github.com/abi/screenshot-to-code/assets/23818/4e0f4b77-9578-4f9a-803c-c12b1502f3d7">
4. You have to buy some credits. The minimum is $5. 4. You have to buy some credits. The minimum is $5.
5. Go to Settings > Limits and check at the bottom of the page, your current tier has to be "Tier 1" to have GPT4 access 5. Go to Settings > Limits and check at the bottom of the page, your current tier has to be "Tier 1" to have GPT4 access
<img width="785" alt="285636973-da38bd4d-8a78-4904-8027-ca67d729b933" src="https://github.com/abi/screenshot-to-code/assets/23818/8d07cd84-0cf9-4f88-bc00-80eba492eadf"> <img width="900" alt="285636973-da38bd4d-8a78-4904-8027-ca67d729b933" src="https://github.com/abi/screenshot-to-code/assets/23818/8d07cd84-0cf9-4f88-bc00-80eba492eadf">
6. Go to Screenshot to code and paste it in the Settings dialog under OpenAI key (gear icon). Your key is only stored in your browser. Never stored on our servers.
Some users have also reported that it can take upto 30 minutes after your credit purchase for the GPT4 vision model to be activated. 6. Navigate to OpenAI [api keys](https://platform.openai.com/api-keys) page and create and copy a new secret key.
7. Go to Screenshot to code and paste it in the Settings dialog under OpenAI key (gear icon). Your key is only stored in your browser. Never stored on our servers.
If you've followed these steps, and it still doesn't work, feel free to open a Github issue. ## Still not working?
- Some users have also reported that it can take upto 30 minutes after your credit purchase for the GPT4 vision model to be activated.
- You need to add credits to your account AND set it to renew when credits run out in order to be upgraded to Tier 1. Make sure your "Settings > Limits" page shows that you are at Tier 1.
If you've followed these steps, and it still doesn't work, feel free to open a Github issue. We only provide support for the open source version since we don't have debugging logs on the hosted version. If you're looking to use the hosted version, we recommend getting a paid subscription on screenshottocode.com

8
backend/.gitignore vendored
View File

@ -150,3 +150,11 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear # and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder. # option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/ #.idea/
# Temporary eval output
evals_data
# Temporary video evals (Remove before merge)
video_evals

View File

@ -0,0 +1,25 @@
# See https://pre-commit.com for more information
# See https://pre-commit.com/hooks.html for more hooks
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v3.2.0
hooks:
- id: end-of-file-fixer
- id: check-yaml
- id: check-added-large-files
# - repo: local
# hooks:
# - id: poetry-pytest
# name: Run pytest with Poetry
# entry: poetry run --directory backend pytest
# language: system
# pass_filenames: false
# always_run: true
# files: ^backend/
# # - id: poetry-pyright
# # name: Run pyright with Poetry
# # entry: poetry run --directory backend pyright
# # language: system
# # pass_filenames: false
# # always_run: true
# # files: ^backend/

View File

@ -1,4 +1,4 @@
FROM python:3.12-slim-bullseye FROM python:3.12.3-slim-bullseye
ENV POETRY_VERSION 1.4.1 ENV POETRY_VERSION 1.4.1

View File

@ -1,3 +1,7 @@
Run tests # Run the type checker
pytest test_prompts.py poetry run pyright
# Run tests
poetry run pytest

View File

@ -1,21 +0,0 @@
import json
import os
import httpx
async def validate_access_token(access_code: str):
async with httpx.AsyncClient() as client:
url = (
"https://backend.buildpicoapps.com/screenshot_to_code/validate_access_token"
)
data = json.dumps(
{
"access_code": access_code,
"secret": os.environ.get("PICO_BACKEND_SECRET"),
}
)
headers = {"Content-Type": "application/json"}
response = await client.post(url, content=data, headers=headers)
response_data = response.json()
return response_data

View File

View File

@ -0,0 +1,57 @@
import unittest
from codegen.utils import extract_html_content
class TestUtils(unittest.TestCase):
def test_extract_html_content_with_html_tags(self):
text = "<html><body><p>Hello, World!</p></body></html>"
expected = "<html><body><p>Hello, World!</p></body></html>"
result = extract_html_content(text)
self.assertEqual(result, expected)
def test_extract_html_content_without_html_tags(self):
text = "No HTML content here."
expected = "No HTML content here."
result = extract_html_content(text)
self.assertEqual(result, expected)
def test_extract_html_content_with_partial_html_tags(self):
text = "<html><body><p>Hello, World!</p></body>"
expected = "<html><body><p>Hello, World!</p></body>"
result = extract_html_content(text)
self.assertEqual(result, expected)
def test_extract_html_content_with_multiple_html_tags(self):
text = "<html><body><p>First</p></body></html> Some text <html><body><p>Second</p></body></html>"
expected = "<html><body><p>First</p></body></html>"
result = extract_html_content(text)
self.assertEqual(result, expected)
## The following are tests based on actual LLM outputs
def test_extract_html_content_some_explanation_before(self):
text = """Got it! You want the song list to be displayed horizontally. I'll update the code to ensure that the song list is displayed in a horizontal layout.
Here's the updated code:
<html lang="en"><head></head><body class="bg-black text-white"></body></html>"""
expected = '<html lang="en"><head></head><body class="bg-black text-white"></body></html>'
result = extract_html_content(text)
self.assertEqual(result, expected)
def test_markdown_tags(self):
text = "```html<head></head>```"
expected = "```html<head></head>```"
result = extract_html_content(text)
self.assertEqual(result, expected)
def test_doctype_text(self):
text = '<!DOCTYPE html><html lang="en"><head></head><body></body></html>'
expected = '<html lang="en"><head></head><body></body></html>'
result = extract_html_content(text)
self.assertEqual(result, expected)
if __name__ == "__main__":
unittest.main()

14
backend/codegen/utils.py Normal file
View File

@ -0,0 +1,14 @@
import re
def extract_html_content(text: str):
# Use regex to find content within <html> tags and include the tags themselves
match = re.search(r"(<html.*?>.*?</html>)", text, re.DOTALL)
if match:
return match.group(1)
else:
# Otherwise, we just send the previous HTML over
print(
"[HTML Extraction] No <html> tags found in the generated content: " + text
)
return text

24
backend/config.py Normal file
View File

@ -0,0 +1,24 @@
# Useful for debugging purposes when you don't want to waste GPT4-Vision credits
# Setting to True will stream a mock response instead of calling the OpenAI API
# TODO: Should only be set to true when value is 'True', not any abitrary truthy value
import os
NUM_VARIANTS = 2
# LLM-related
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", None)
ANTHROPIC_API_KEY = os.environ.get("ANTHROPIC_API_KEY", None)
OPENAI_BASE_URL = os.environ.get("OPENAI_BASE_URL", None)
# Image generation (optional)
REPLICATE_API_KEY = os.environ.get("REPLICATE_API_KEY", None)
# Debugging-related
SHOULD_MOCK_AI_RESPONSE = bool(os.environ.get("MOCK", False))
IS_DEBUG_ENABLED = bool(os.environ.get("IS_DEBUG_ENABLED", False))
DEBUG_DIR = os.environ.get("DEBUG_DIR", "")
# Set to True when running in production (on the hosted version)
# Used as a feature flag to enable or disable certain features
IS_PROD = os.environ.get("IS_PROD", False)

7
backend/custom_types.py Normal file
View File

@ -0,0 +1,7 @@
from typing import Literal
InputMode = Literal[
"image",
"video",
]

View File

@ -0,0 +1,30 @@
import os
import logging
import uuid
from config import DEBUG_DIR, IS_DEBUG_ENABLED
class DebugFileWriter:
def __init__(self):
if not IS_DEBUG_ENABLED:
return
try:
self.debug_artifacts_path = os.path.expanduser(
f"{DEBUG_DIR}/{str(uuid.uuid4())}"
)
os.makedirs(self.debug_artifacts_path, exist_ok=True)
print(f"Debugging artifacts will be stored in: {self.debug_artifacts_path}")
except:
logging.error("Failed to create debug directory")
def write_to_file(self, filename: str, content: str) -> None:
try:
with open(os.path.join(self.debug_artifacts_path, filename), "w") as file:
file.write(content)
except Exception as e:
logging.error(f"Failed to write to file: {e}")
def extract_html_content(self, text: str) -> str:
return str(text.split("<html>")[-1].rsplit("</html>", 1)[0] + "</html>")

View File

View File

1
backend/evals/config.py Normal file
View File

@ -0,0 +1 @@
EVALS_DIR = "./evals_data"

42
backend/evals/core.py Normal file
View File

@ -0,0 +1,42 @@
from config import ANTHROPIC_API_KEY, OPENAI_API_KEY
from llm import Llm, stream_claude_response, stream_openai_response
from prompts import assemble_prompt
from prompts.types import Stack
from openai.types.chat import ChatCompletionMessageParam
async def generate_code_for_image(image_url: str, stack: Stack, model: Llm) -> str:
prompt_messages = assemble_prompt(image_url, stack)
return await generate_code_core(prompt_messages, model)
async def generate_code_core(
prompt_messages: list[ChatCompletionMessageParam], model: Llm
) -> str:
async def process_chunk(_: str):
pass
if model == Llm.CLAUDE_3_SONNET or model == Llm.CLAUDE_3_5_SONNET_2024_06_20:
if not ANTHROPIC_API_KEY:
raise Exception("Anthropic API key not found")
completion = await stream_claude_response(
prompt_messages,
api_key=ANTHROPIC_API_KEY,
callback=lambda x: process_chunk(x),
model=model,
)
else:
if not OPENAI_API_KEY:
raise Exception("OpenAI API key not found")
completion = await stream_openai_response(
prompt_messages,
api_key=OPENAI_API_KEY,
base_url=None,
callback=lambda x: process_chunk(x),
model=model,
)
return completion

7
backend/evals/utils.py Normal file
View File

@ -0,0 +1,7 @@
import base64
async def image_to_data_url(filepath: str):
with open(filepath, "rb") as image_file:
encoded_string = base64.b64encode(image_file.read()).decode()
return f"data:image/png;base64,{encoded_string}"

View File

View File

@ -0,0 +1,23 @@
from datetime import datetime
import json
import os
from openai.types.chat import ChatCompletionMessageParam
def write_logs(prompt_messages: list[ChatCompletionMessageParam], completion: str):
# Get the logs path from environment, default to the current working directory
logs_path = os.environ.get("LOGS_PATH", os.getcwd())
# Create run_logs directory if it doesn't exist within the specified logs path
logs_directory = os.path.join(logs_path, "run_logs")
if not os.path.exists(logs_directory):
os.makedirs(logs_directory)
print("Writing to logs directory:", logs_directory)
# Generate a unique filename using the current timestamp within the logs directory
filename = datetime.now().strftime(f"{logs_directory}/messages_%Y%m%d_%H%M%S.json")
# Write the messages dict into a new file for each run
with open(filename, "w") as f:
f.write(json.dumps({"prompt": prompt_messages, "completion": completion}))

View File

View File

@ -1,17 +1,33 @@
import asyncio import asyncio
import os
import re import re
from typing import Dict, List, Literal, Union
from openai import AsyncOpenAI from openai import AsyncOpenAI
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from image_generation.replicate import call_replicate
async def process_tasks(prompts, api_key, base_url):
tasks = [generate_image(prompt, api_key, base_url) for prompt in prompts] async def process_tasks(
prompts: List[str],
api_key: str,
base_url: str | None,
model: Literal["dalle3", "flux"],
):
import time
start_time = time.time()
if model == "dalle3":
tasks = [generate_image_dalle(prompt, api_key, base_url) for prompt in prompts]
else:
tasks = [generate_image_replicate(prompt, api_key) for prompt in prompts]
results = await asyncio.gather(*tasks, return_exceptions=True) results = await asyncio.gather(*tasks, return_exceptions=True)
end_time = time.time()
generation_time = end_time - start_time
print(f"Image generation time: {generation_time:.2f} seconds")
processed_results = [] processed_results: List[Union[str, None]] = []
for result in results: for result in results:
if isinstance(result, Exception): if isinstance(result, BaseException):
print(f"An exception occurred: {result}") print(f"An exception occurred: {result}")
processed_results.append(None) processed_results.append(None)
else: else:
@ -20,22 +36,38 @@ async def process_tasks(prompts, api_key, base_url):
return processed_results return processed_results
async def generate_image(prompt, api_key, base_url): async def generate_image_dalle(
prompt: str, api_key: str, base_url: str | None
) -> Union[str, None]:
client = AsyncOpenAI(api_key=api_key, base_url=base_url) client = AsyncOpenAI(api_key=api_key, base_url=base_url)
image_params = { res = await client.images.generate(
"model": "dall-e-3", model="dall-e-3",
"quality": "standard", quality="standard",
"style": "natural", style="natural",
"n": 1, n=1,
"size": "1024x1024", size="1024x1024",
"prompt": prompt, prompt=prompt,
} )
res = await client.images.generate(**image_params)
await client.close() await client.close()
return res.data[0].url return res.data[0].url
def extract_dimensions(url): async def generate_image_replicate(prompt: str, api_key: str) -> str:
# We use Flux Schnell
return await call_replicate(
{
"prompt": prompt,
"num_outputs": 1,
"aspect_ratio": "1:1",
"output_format": "png",
"output_quality": 100,
},
api_key,
)
def extract_dimensions(url: str):
# Regular expression to match numbers in the format '300x200' # Regular expression to match numbers in the format '300x200'
matches = re.findall(r"(\d+)x(\d+)", url) matches = re.findall(r"(\d+)x(\d+)", url)
@ -48,11 +80,11 @@ def extract_dimensions(url):
return (100, 100) return (100, 100)
def create_alt_url_mapping(code): def create_alt_url_mapping(code: str) -> Dict[str, str]:
soup = BeautifulSoup(code, "html.parser") soup = BeautifulSoup(code, "html.parser")
images = soup.find_all("img") images = soup.find_all("img")
mapping = {} mapping: Dict[str, str] = {}
for image in images: for image in images:
if not image["src"].startswith("https://placehold.co"): if not image["src"].startswith("https://placehold.co"):
@ -61,13 +93,19 @@ def create_alt_url_mapping(code):
return mapping return mapping
async def generate_images(code, api_key, base_url, image_cache): async def generate_images(
code: str,
api_key: str,
base_url: Union[str, None],
image_cache: Dict[str, str],
model: Literal["dalle3", "flux"] = "dalle3",
) -> str:
# Find all images # Find all images
soup = BeautifulSoup(code, "html.parser") soup = BeautifulSoup(code, "html.parser")
images = soup.find_all("img") images = soup.find_all("img")
# Extract alt texts as image prompts # Extract alt texts as image prompts
alts = [] alts: List[str | None] = []
for img in images: for img in images:
# Only include URL if the image starts with https://placehold.co # Only include URL if the image starts with https://placehold.co
# and it's not already in the image_cache # and it's not already in the image_cache
@ -78,17 +116,17 @@ async def generate_images(code, api_key, base_url, image_cache):
alts.append(img.get("alt", None)) alts.append(img.get("alt", None))
# Exclude images with no alt text # Exclude images with no alt text
alts = [alt for alt in alts if alt is not None] filtered_alts: List[str] = [alt for alt in alts if alt is not None]
# Remove duplicates # Remove duplicates
prompts = list(set(alts)) prompts = list(set(filtered_alts))
# Return early if there are no images to replace # Return early if there are no images to replace
if len(prompts) == 0: if len(prompts) == 0:
return code return code
# Generate images # Generate images
results = await process_tasks(prompts, api_key, base_url) results = await process_tasks(prompts, api_key, base_url, model)
# Create a dict mapping alt text to image URL # Create a dict mapping alt text to image URL
mapped_image_urls = dict(zip(prompts, results)) mapped_image_urls = dict(zip(prompts, results))

View File

@ -0,0 +1,65 @@
import asyncio
import httpx
async def call_replicate(input: dict[str, str | int], api_token: str) -> str:
headers = {
"Authorization": f"Bearer {api_token}",
"Content-Type": "application/json",
}
data = {"input": input}
async with httpx.AsyncClient() as client:
try:
response = await client.post(
"https://api.replicate.com/v1/models/black-forest-labs/flux-schnell/predictions",
headers=headers,
json=data,
)
response.raise_for_status()
response_json = response.json()
# Extract the id from the response
prediction_id = response_json.get("id")
if not prediction_id:
raise ValueError("Prediction ID not found in initial response.")
# Polling every 0.1 seconds until the status is succeeded or error (upto 10s)
num_polls = 0
max_polls = 100
while num_polls < max_polls:
num_polls += 1
await asyncio.sleep(0.1)
# Check the status
status_check_url = (
f"https://api.replicate.com/v1/predictions/{prediction_id}"
)
status_response = await client.get(status_check_url, headers=headers)
status_response.raise_for_status()
status_response_json = status_response.json()
status = status_response_json.get("status")
# If status is succeeded or if there's an error, break out of the loop
if status == "succeeded":
return status_response_json["output"][0]
elif status == "error":
raise ValueError(
f"Inference errored out: {status_response_json.get('error', 'Unknown error')}"
)
elif status == "failed":
raise ValueError("Inference failed")
# If we've reached here, it means we've exceeded the max number of polls
raise TimeoutError("Inference timed out")
except httpx.HTTPStatusError as e:
raise ValueError(f"HTTP error occurred: {e}")
except httpx.RequestError as e:
raise ValueError(f"An error occurred while requesting: {e}")
except asyncio.TimeoutError:
raise TimeoutError("Request timed out")
except Exception as e:
raise ValueError(f"An unexpected error occurred: {e}")

View File

View File

@ -0,0 +1,80 @@
import base64
import io
import time
from PIL import Image
CLAUDE_IMAGE_MAX_SIZE = 5 * 1024 * 1024
CLAUDE_MAX_IMAGE_DIMENSION = 7990
# Process image so it meets Claude requirements
def process_image(image_data_url: str) -> tuple[str, str]:
# Extract bytes and media type from base64 data URL
media_type = image_data_url.split(";")[0].split(":")[1]
base64_data = image_data_url.split(",")[1]
image_bytes = base64.b64decode(base64_data)
img = Image.open(io.BytesIO(image_bytes))
# Check if image is under max dimensions and size
is_under_dimension_limit = (
img.width < CLAUDE_MAX_IMAGE_DIMENSION
and img.height < CLAUDE_MAX_IMAGE_DIMENSION
)
is_under_size_limit = len(base64_data) <= CLAUDE_IMAGE_MAX_SIZE
# If image is under both limits, no processing needed
if is_under_dimension_limit and is_under_size_limit:
print("[CLAUDE IMAGE PROCESSING] no processing needed")
return (media_type, base64_data)
# Time image processing
start_time = time.time()
# Check if either dimension exceeds 7900px (Claude disallows >= 8000px)
# Resize image if needed
if not is_under_dimension_limit:
# Calculate the new dimensions while maintaining aspect ratio
if img.width > img.height:
new_width = CLAUDE_MAX_IMAGE_DIMENSION
new_height = int((CLAUDE_MAX_IMAGE_DIMENSION / img.width) * img.height)
else:
new_height = CLAUDE_MAX_IMAGE_DIMENSION
new_width = int((CLAUDE_MAX_IMAGE_DIMENSION / img.height) * img.width)
# Resize the image
img = img.resize((new_width, new_height), Image.DEFAULT_STRATEGY)
print(
f"[CLAUDE IMAGE PROCESSING] image resized: width = {new_width}, height = {new_height}"
)
# Convert and compress as JPEG
# We always compress as JPEG (95% at the least) even when we resize and the original image
# is under the size limit.
quality = 95
output = io.BytesIO()
img = img.convert("RGB") # Ensure image is in RGB mode for JPEG conversion
img.save(output, format="JPEG", quality=quality)
# Reduce quality until image is under max size
while (
len(base64.b64encode(output.getvalue())) > CLAUDE_IMAGE_MAX_SIZE
and quality > 10
):
output = io.BytesIO()
img.save(output, format="JPEG", quality=quality)
quality -= 5
# Log so we know it was modified
old_size = len(base64_data)
new_size = len(base64.b64encode(output.getvalue()))
print(
f"[CLAUDE IMAGE PROCESSING] image size updated: old size = {old_size} bytes, new size = {new_size} bytes"
)
end_time = time.time()
processing_time = end_time - start_time
print(f"[CLAUDE IMAGE PROCESSING] processing time: {processing_time:.2f} seconds")
return ("image/jpeg", base64.b64encode(output.getvalue()).decode("utf-8"))

View File

@ -1,35 +1,233 @@
import os import copy
from typing import Awaitable, Callable from enum import Enum
from typing import Any, Awaitable, Callable, List, cast
from anthropic import AsyncAnthropic
from openai import AsyncOpenAI from openai import AsyncOpenAI
from openai.types.chat import ChatCompletionMessageParam, ChatCompletionChunk
from config import IS_DEBUG_ENABLED
from debug.DebugFileWriter import DebugFileWriter
from image_processing.utils import process_image
MODEL_GPT_4_VISION = "gpt-4-vision-preview" from utils import pprint_prompt
# Actual model versions that are passed to the LLMs and stored in our logs
class Llm(Enum):
GPT_4_VISION = "gpt-4-vision-preview"
GPT_4_TURBO_2024_04_09 = "gpt-4-turbo-2024-04-09"
GPT_4O_2024_05_13 = "gpt-4o-2024-05-13"
CLAUDE_3_SONNET = "claude-3-sonnet-20240229"
CLAUDE_3_OPUS = "claude-3-opus-20240229"
CLAUDE_3_HAIKU = "claude-3-haiku-20240307"
CLAUDE_3_5_SONNET_2024_06_20 = "claude-3-5-sonnet-20240620"
# Will throw errors if you send a garbage string
def convert_frontend_str_to_llm(frontend_str: str) -> Llm:
if frontend_str == "gpt_4_vision":
return Llm.GPT_4_VISION
elif frontend_str == "claude_3_sonnet":
return Llm.CLAUDE_3_SONNET
else:
return Llm(frontend_str)
async def stream_openai_response( async def stream_openai_response(
messages, messages: List[ChatCompletionMessageParam],
api_key: str, api_key: str,
base_url: str | None, base_url: str | None,
callback: Callable[[str], Awaitable[None]], callback: Callable[[str], Awaitable[None]],
): model: Llm,
) -> str:
client = AsyncOpenAI(api_key=api_key, base_url=base_url) client = AsyncOpenAI(api_key=api_key, base_url=base_url)
model = MODEL_GPT_4_VISION
# Base parameters # Base parameters
params = {"model": model, "messages": messages, "stream": True, "timeout": 600} params = {
"model": model.value,
"messages": messages,
"stream": True,
"timeout": 600,
"temperature": 0.0,
}
# Add 'max_tokens' only if the model is a GPT4 vision model # Add 'max_tokens' only if the model is a GPT4 vision or Turbo model
if model == MODEL_GPT_4_VISION: if (
model == Llm.GPT_4_VISION
or model == Llm.GPT_4_TURBO_2024_04_09
or model == Llm.GPT_4O_2024_05_13
):
params["max_tokens"] = 4096 params["max_tokens"] = 4096
params["temperature"] = 0
completion = await client.chat.completions.create(**params) stream = await client.chat.completions.create(**params) # type: ignore
full_response = "" full_response = ""
async for chunk in completion: async for chunk in stream: # type: ignore
content = chunk.choices[0].delta.content or "" assert isinstance(chunk, ChatCompletionChunk)
full_response += content if (
await callback(content) chunk.choices
and len(chunk.choices) > 0
and chunk.choices[0].delta
and chunk.choices[0].delta.content
):
content = chunk.choices[0].delta.content or ""
full_response += content
await callback(content)
await client.close() await client.close()
return full_response return full_response
# TODO: Have a seperate function that translates OpenAI messages to Claude messages
async def stream_claude_response(
messages: List[ChatCompletionMessageParam],
api_key: str,
callback: Callable[[str], Awaitable[None]],
model: Llm,
) -> str:
client = AsyncAnthropic(api_key=api_key)
# Base parameters
max_tokens = 8192
temperature = 0.0
# Translate OpenAI messages to Claude messages
# Deep copy messages to avoid modifying the original list
cloned_messages = copy.deepcopy(messages)
system_prompt = cast(str, cloned_messages[0].get("content"))
claude_messages = [dict(message) for message in cloned_messages[1:]]
for message in claude_messages:
if not isinstance(message["content"], list):
continue
for content in message["content"]: # type: ignore
if content["type"] == "image_url":
content["type"] = "image"
# Extract base64 data and media type from data URL
# Example base64 data URL: data:image/png;base64,iVBOR...
image_data_url = cast(str, content["image_url"]["url"])
# Process image and split media type and data
# so it works with Claude (under 5mb in base64 encoding)
(media_type, base64_data) = process_image(image_data_url)
# Remove OpenAI parameter
del content["image_url"]
content["source"] = {
"type": "base64",
"media_type": media_type,
"data": base64_data,
}
# Stream Claude response
async with client.messages.stream(
model=model.value,
max_tokens=max_tokens,
temperature=temperature,
system=system_prompt,
messages=claude_messages, # type: ignore
extra_headers={"anthropic-beta": "max-tokens-3-5-sonnet-2024-07-15"},
) as stream:
async for text in stream.text_stream:
await callback(text)
# Return final message
response = await stream.get_final_message()
# Close the Anthropic client
await client.close()
return response.content[0].text
async def stream_claude_response_native(
system_prompt: str,
messages: list[Any],
api_key: str,
callback: Callable[[str], Awaitable[None]],
include_thinking: bool = False,
model: Llm = Llm.CLAUDE_3_OPUS,
) -> str:
client = AsyncAnthropic(api_key=api_key)
# Base model parameters
max_tokens = 4096
temperature = 0.0
# Multi-pass flow
current_pass_num = 1
max_passes = 2
prefix = "<thinking>"
response = None
# For debugging
full_stream = ""
debug_file_writer = DebugFileWriter()
while current_pass_num <= max_passes:
current_pass_num += 1
# Set up message depending on whether we have a <thinking> prefix
messages_to_send = (
messages + [{"role": "assistant", "content": prefix}]
if include_thinking
else messages
)
pprint_prompt(messages_to_send)
async with client.messages.stream(
model=model.value,
max_tokens=max_tokens,
temperature=temperature,
system=system_prompt,
messages=messages_to_send, # type: ignore
) as stream:
async for text in stream.text_stream:
print(text, end="", flush=True)
full_stream += text
await callback(text)
response = await stream.get_final_message()
response_text = response.content[0].text
# Write each pass's code to .html file and thinking to .txt file
if IS_DEBUG_ENABLED:
debug_file_writer.write_to_file(
f"pass_{current_pass_num - 1}.html",
debug_file_writer.extract_html_content(response_text),
)
debug_file_writer.write_to_file(
f"thinking_pass_{current_pass_num - 1}.txt",
response_text.split("</thinking>")[0],
)
# Set up messages array for next pass
messages += [
{"role": "assistant", "content": str(prefix) + response.content[0].text},
{
"role": "user",
"content": "You've done a good job with a first draft. Improve this further based on the original instructions so that the app is fully functional and looks like the original video of the app we're trying to replicate.",
},
]
print(
f"Token usage: Input Tokens: {response.usage.input_tokens}, Output Tokens: {response.usage.output_tokens}"
)
# Close the Anthropic client
await client.close()
if IS_DEBUG_ENABLED:
debug_file_writer.write_to_file("full_stream.txt", full_stream)
if not response:
raise Exception("No HTML response found in AI response")
else:
return response.content[0].text

View File

@ -1,25 +1,12 @@
# Load environment variables first # Load environment variables first
from dotenv import load_dotenv from dotenv import load_dotenv
load_dotenv() load_dotenv()
import json from fastapi import FastAPI
import os
import traceback
from datetime import datetime
from fastapi import FastAPI, WebSocket
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import HTMLResponse from routes import screenshot, generate_code, home, evals
import openai
from llm import stream_openai_response
from mock import mock_completion
from utils import pprint_prompt
from image_generation import create_alt_url_mapping, generate_images
from prompts import assemble_prompt
from routes import screenshot
from access_token import validate_access_token
app = FastAPI(openapi_url=None, docs_url=None, redoc_url=None) app = FastAPI(openapi_url=None, docs_url=None, redoc_url=None)
@ -32,231 +19,8 @@ app.add_middleware(
allow_headers=["*"], allow_headers=["*"],
) )
# Add routes
# Useful for debugging purposes when you don't want to waste GPT4-Vision credits app.include_router(generate_code.router)
# Setting to True will stream a mock response instead of calling the OpenAI API
# TODO: Should only be set to true when value is 'True', not any abitrary truthy value
SHOULD_MOCK_AI_RESPONSE = bool(os.environ.get("MOCK", False))
# Set to True when running in production (on the hosted version)
# Used as a feature flag to enable or disable certain features
IS_PROD = os.environ.get("IS_PROD", False)
app.include_router(screenshot.router) app.include_router(screenshot.router)
app.include_router(home.router)
app.include_router(evals.router)
@app.get("/")
async def get_status():
return HTMLResponse(
content="<h3>Your backend is running correctly. Please open the front-end URL (default is http://localhost:5173) to use screenshot-to-code.</h3>"
)
def write_logs(prompt_messages, completion):
# Get the logs path from environment, default to the current working directory
logs_path = os.environ.get("LOGS_PATH", os.getcwd())
# Create run_logs directory if it doesn't exist within the specified logs path
logs_directory = os.path.join(logs_path, "run_logs")
if not os.path.exists(logs_directory):
os.makedirs(logs_directory)
print("Writing to logs directory:", logs_directory)
# Generate a unique filename using the current timestamp within the logs directory
filename = datetime.now().strftime(f"{logs_directory}/messages_%Y%m%d_%H%M%S.json")
# Write the messages dict into a new file for each run
with open(filename, "w") as f:
f.write(json.dumps({"prompt": prompt_messages, "completion": completion}))
@app.websocket("/generate-code")
async def stream_code(websocket: WebSocket):
await websocket.accept()
print("Incoming websocket connection...")
async def throw_error(
message: str,
):
await websocket.send_json({"type": "error", "value": message})
await websocket.close()
params = await websocket.receive_json()
print("Received params")
# Read the code config settings from the request. Fall back to default if not provided.
generated_code_config = ""
if "generatedCodeConfig" in params and params["generatedCodeConfig"]:
generated_code_config = params["generatedCodeConfig"]
print(f"Generating {generated_code_config} code")
# Get the OpenAI API key from the request. Fall back to environment variable if not provided.
# If neither is provided, we throw an error.
openai_api_key = None
if "accessCode" in params and params["accessCode"]:
print("Access code - using platform API key")
res = await validate_access_token(params["accessCode"])
if res["success"]:
openai_api_key = os.environ.get("PLATFORM_OPENAI_API_KEY")
else:
await websocket.send_json(
{
"type": "error",
"value": res["failure_reason"],
}
)
return
else:
if params["openAiApiKey"]:
openai_api_key = params["openAiApiKey"]
print("Using OpenAI API key from client-side settings dialog")
else:
openai_api_key = os.environ.get("OPENAI_API_KEY")
if openai_api_key:
print("Using OpenAI API key from environment variable")
if not openai_api_key:
print("OpenAI API key not found")
await websocket.send_json(
{
"type": "error",
"value": "No OpenAI API key found. Please add your API key in the settings dialog or add it to backend/.env file.",
}
)
return
# Get the OpenAI Base URL from the request. Fall back to environment variable if not provided.
openai_base_url = None
# Disable user-specified OpenAI Base URL in prod
if not os.environ.get("IS_PROD"):
if "openAiBaseURL" in params and params["openAiBaseURL"]:
openai_base_url = params["openAiBaseURL"]
print("Using OpenAI Base URL from client-side settings dialog")
else:
openai_base_url = os.environ.get("OPENAI_BASE_URL")
if openai_base_url:
print("Using OpenAI Base URL from environment variable")
if not openai_base_url:
print("Using official OpenAI URL")
# Get the image generation flag from the request. Fall back to True if not provided.
should_generate_images = (
params["isImageGenerationEnabled"]
if "isImageGenerationEnabled" in params
else True
)
print("generating code...")
await websocket.send_json({"type": "status", "value": "Generating code..."})
async def process_chunk(content):
await websocket.send_json({"type": "chunk", "value": content})
# Assemble the prompt
try:
if params.get("resultImage") and params["resultImage"]:
prompt_messages = assemble_prompt(
params["image"], generated_code_config, params["resultImage"]
)
else:
prompt_messages = assemble_prompt(params["image"], generated_code_config)
except:
await websocket.send_json(
{
"type": "error",
"value": "Error assembling prompt. Contact support at support@picoapps.xyz",
}
)
await websocket.close()
return
# Image cache for updates so that we don't have to regenerate images
image_cache = {}
if params["generationType"] == "update":
# Transform into message format
# TODO: Move this to frontend
for index, text in enumerate(params["history"]):
prompt_messages += [
{"role": "assistant" if index % 2 == 0 else "user", "content": text}
]
image_cache = create_alt_url_mapping(params["history"][-2])
if SHOULD_MOCK_AI_RESPONSE:
completion = await mock_completion(process_chunk)
else:
try:
completion = await stream_openai_response(
prompt_messages,
api_key=openai_api_key,
base_url=openai_base_url,
callback=lambda x: process_chunk(x),
)
except openai.AuthenticationError as e:
print("[GENERATE_CODE] Authentication failed", e)
error_message = (
"Incorrect OpenAI key. Please make sure your OpenAI API key is correct, or create a new OpenAI API key on your OpenAI dashboard."
+ (
" Alternatively, you can purchase code generation credits directly on this website."
if IS_PROD
else ""
)
)
return await throw_error(error_message)
except openai.NotFoundError as e:
print("[GENERATE_CODE] Model not found", e)
error_message = (
e.message
+ ". Please make sure you have followed the instructions correctly to obtain an OpenAI key with GPT vision access: https://github.com/abi/screenshot-to-code/blob/main/Troubleshooting.md"
+ (
" Alternatively, you can purchase code generation credits directly on this website."
if IS_PROD
else ""
)
)
return await throw_error(error_message)
except openai.RateLimitError as e:
print("[GENERATE_CODE] Rate limit exceeded", e)
error_message = (
"OpenAI error - 'You exceeded your current quota, please check your plan and billing details.'"
+ (
" Alternatively, you can purchase code generation credits directly on this website."
if IS_PROD
else ""
)
)
return await throw_error(error_message)
# Write the messages dict into a log so that we can debug later
write_logs(prompt_messages, completion)
try:
if should_generate_images:
await websocket.send_json(
{"type": "status", "value": "Generating images..."}
)
updated_html = await generate_images(
completion,
api_key=openai_api_key,
base_url=openai_base_url,
image_cache=image_cache,
)
else:
updated_html = completion
await websocket.send_json({"type": "setCode", "value": updated_html})
await websocket.send_json(
{"type": "status", "value": "Code generation complete."}
)
except Exception as e:
traceback.print_exc()
print("Image generation failed", e)
await websocket.send_json(
{"type": "status", "value": "Image generation failed but code is complete."}
)
await websocket.close()

View File

@ -1,207 +0,0 @@
import asyncio
async def mock_completion(process_chunk):
code_to_return = NO_IMAGES_NYTIMES_MOCK_CODE
for i in range(0, len(code_to_return), 10):
await process_chunk(code_to_return[i : i + 10])
await asyncio.sleep(0.01)
return code_to_return
APPLE_MOCK_CODE = """<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Product Showcase</title>
<script src="https://cdn.tailwindcss.com"></script>
<link href="https://fonts.googleapis.com/css2?family=Roboto:wght@400;500;700&display=swap" rel="stylesheet">
<style>
body {
font-family: 'Roboto', sans-serif;
}
</style>
</head>
<body class="bg-black text-white">
<nav class="py-6">
<div class="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8 flex justify-between items-center">
<div class="flex items-center">
<img src="https://placehold.co/24x24" alt="Company Logo" class="mr-8">
<a href="#" class="text-white text-sm font-medium mr-4">Store</a>
<a href="#" class="text-white text-sm font-medium mr-4">Mac</a>
<a href="#" class="text-white text-sm font-medium mr-4">iPad</a>
<a href="#" class="text-white text-sm font-medium mr-4">iPhone</a>
<a href="#" class="text-white text-sm font-medium mr-4">Watch</a>
<a href="#" class="text-white text-sm font-medium mr-4">Vision</a>
<a href="#" class="text-white text-sm font-medium mr-4">AirPods</a>
<a href="#" class="text-white text-sm font-medium mr-4">TV & Home</a>
<a href="#" class="text-white text-sm font-medium mr-4">Entertainment</a>
<a href="#" class="text-white text-sm font-medium mr-4">Accessories</a>
<a href="#" class="text-white text-sm font-medium">Support</a>
</div>
<div class="flex items-center">
<a href="#" class="text-white text-sm font-medium mr-4"><i class="fas fa-search"></i></a>
<a href="#" class="text-white text-sm font-medium"><i class="fas fa-shopping-bag"></i></a>
</div>
</div>
</nav>
<main class="mt-8">
<div class="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8">
<div class="text-center">
<img src="https://placehold.co/100x100" alt="Brand Logo" class="mx-auto mb-4">
<h1 class="text-5xl font-bold mb-4">WATCH SERIES 9</h1>
<p class="text-2xl font-medium mb-8">Smarter. Brighter. Mightier.</p>
<div class="flex justify-center space-x-4">
<a href="#" class="text-blue-600 text-sm font-medium">Learn more ></a>
<a href="#" class="text-blue-600 text-sm font-medium">Buy ></a>
</div>
</div>
<div class="flex justify-center mt-12">
<img src="https://placehold.co/500x300" alt="Product image of a smartwatch with a pink band and a circular interface displaying various health metrics." class="mr-8">
<img src="https://placehold.co/500x300" alt="Product image of a smartwatch with a blue band and a square interface showing a classic analog clock face." class="ml-8">
</div>
</div>
</main>
</body>
</html>"""
NYTIMES_MOCK_CODE = """
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>The New York Times - News</title>
<script src="https://cdn.tailwindcss.com"></script>
<link href="https://fonts.googleapis.com/css2?family=Libre+Franklin:wght@300;400;700&display=swap" rel="stylesheet">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.3/css/all.min.css">
<style>
body {
font-family: 'Libre Franklin', sans-serif;
}
</style>
</head>
<body class="bg-gray-100">
<div class="container mx-auto px-4">
<header class="border-b border-gray-300 py-4">
<div class="flex justify-between items-center">
<div class="flex items-center space-x-4">
<button class="text-gray-700"><i class="fas fa-bars"></i></button>
<button class="text-gray-700"><i class="fas fa-search"></i></button>
<div class="text-xs uppercase tracking-widest">Tuesday, November 14, 2023<br>Today's Paper</div>
</div>
<div>
<img src="https://placehold.co/200x50?text=The+New+York+Times+Logo" alt="The New York Times Logo" class="h-8">
</div>
<div class="flex items-center space-x-4">
<button class="bg-black text-white px-4 py-1 text-xs uppercase tracking-widest">Give the times</button>
<div class="text-xs">Account</div>
</div>
</div>
<nav class="flex justify-between items-center py-4">
<div class="flex space-x-4">
<a href="#" class="text-xs uppercase tracking-widest text-gray-700">U.S.</a>
<!-- Add other navigation links as needed -->
</div>
<div class="flex space-x-4">
<a href="#" class="text-xs uppercase tracking-widest text-gray-700">Cooking</a>
<!-- Add other navigation links as needed -->
</div>
</nav>
</header>
<main>
<section class="py-6">
<div class="grid grid-cols-3 gap-4">
<div class="col-span-2">
<article class="mb-4">
<h2 class="text-xl font-bold mb-2">Israeli Military Raids Gazas Largest Hospital</h2>
<p class="text-gray-700 mb-2">Israeli troops have entered the Al-Shifa Hospital complex, where conditions have grown dire and Israel says Hamas fighters are embedded.</p>
<a href="#" class="text-blue-600 text-sm">See more updates <i class="fas fa-external-link-alt"></i></a>
</article>
<!-- Repeat for each news item -->
</div>
<div class="col-span-1">
<article class="mb-4">
<img src="https://placehold.co/300x200?text=News+Image" alt="Flares and plumes of smoke over the northern Gaza skyline on Tuesday." class="mb-2">
<h2 class="text-xl font-bold mb-2">From Elvis to Elopements, the Evolution of the Las Vegas Wedding</h2>
<p class="text-gray-700 mb-2">The glittering city that attracts thousands of couples seeking unconventional nuptials has grown beyond the drive-through wedding.</p>
<a href="#" class="text-blue-600 text-sm">8 MIN READ</a>
</article>
<!-- Repeat for each news item -->
</div>
</div>
</section>
</main>
</div>
</body>
</html>
"""
NO_IMAGES_NYTIMES_MOCK_CODE = """
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>The New York Times - News</title>
<script src="https://cdn.tailwindcss.com"></script>
<link href="https://fonts.googleapis.com/css2?family=Libre+Franklin:wght@300;400;700&display=swap" rel="stylesheet">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.3/css/all.min.css">
<style>
body {
font-family: 'Libre Franklin', sans-serif;
}
</style>
</head>
<body class="bg-gray-100">
<div class="container mx-auto px-4">
<header class="border-b border-gray-300 py-4">
<div class="flex justify-between items-center">
<div class="flex items-center space-x-4">
<button class="text-gray-700"><i class="fas fa-bars"></i></button>
<button class="text-gray-700"><i class="fas fa-search"></i></button>
<div class="text-xs uppercase tracking-widest">Tuesday, November 14, 2023<br>Today's Paper</div>
</div>
<div class="flex items-center space-x-4">
<button class="bg-black text-white px-4 py-1 text-xs uppercase tracking-widest">Give the times</button>
<div class="text-xs">Account</div>
</div>
</div>
<nav class="flex justify-between items-center py-4">
<div class="flex space-x-4">
<a href="#" class="text-xs uppercase tracking-widest text-gray-700">U.S.</a>
<!-- Add other navigation links as needed -->
</div>
<div class="flex space-x-4">
<a href="#" class="text-xs uppercase tracking-widest text-gray-700">Cooking</a>
<!-- Add other navigation links as needed -->
</div>
</nav>
</header>
<main>
<section class="py-6">
<div class="grid grid-cols-3 gap-4">
<div class="col-span-2">
<article class="mb-4">
<h2 class="text-xl font-bold mb-2">Israeli Military Raids Gazas Largest Hospital</h2>
<p class="text-gray-700 mb-2">Israeli troops have entered the Al-Shifa Hospital complex, where conditions have grown dire and Israel says Hamas fighters are embedded.</p>
<a href="#" class="text-blue-600 text-sm">See more updates <i class="fas fa-external-link-alt"></i></a>
</article>
<!-- Repeat for each news item -->
</div>
<div class="col-span-1">
<article class="mb-4">
<h2 class="text-xl font-bold mb-2">From Elvis to Elopements, the Evolution of the Las Vegas Wedding</h2>
<p class="text-gray-700 mb-2">The glittering city that attracts thousands of couples seeking unconventional nuptials has grown beyond the drive-through wedding.</p>
<a href="#" class="text-blue-600 text-sm">8 MIN READ</a>
</article>
<!-- Repeat for each news item -->
</div>
</div>
</section>
</main>
</div>
</body>
</html>
"""

1583
backend/mock_llm.py Normal file

File diff suppressed because it is too large Load Diff

1536
backend/poetry.lock generated

File diff suppressed because it is too large Load Diff

134
backend/prompts/__init__.py Normal file
View File

@ -0,0 +1,134 @@
from typing import Union
from openai.types.chat import ChatCompletionMessageParam, ChatCompletionContentPartParam
from custom_types import InputMode
from image_generation.core import create_alt_url_mapping
from prompts.imported_code_prompts import IMPORTED_CODE_SYSTEM_PROMPTS
from prompts.screenshot_system_prompts import SYSTEM_PROMPTS
from prompts.types import Stack
from video.utils import assemble_claude_prompt_video
USER_PROMPT = """
Generate code for a web page that looks exactly like this.
"""
SVG_USER_PROMPT = """
Generate code for a SVG that looks exactly like this.
"""
async def create_prompt(
params: dict[str, str], stack: Stack, input_mode: InputMode
) -> tuple[list[ChatCompletionMessageParam], dict[str, str]]:
image_cache: dict[str, str] = {}
# If this generation started off with imported code, we need to assemble the prompt differently
if params.get("isImportedFromCode"):
original_imported_code = params["history"][0]
prompt_messages = assemble_imported_code_prompt(original_imported_code, stack)
for index, text in enumerate(params["history"][1:]):
if index % 2 == 0:
message: ChatCompletionMessageParam = {
"role": "user",
"content": text,
}
else:
message: ChatCompletionMessageParam = {
"role": "assistant",
"content": text,
}
prompt_messages.append(message)
else:
# Assemble the prompt for non-imported code
if params.get("resultImage"):
prompt_messages = assemble_prompt(
params["image"], stack, params["resultImage"]
)
else:
prompt_messages = assemble_prompt(params["image"], stack)
if params["generationType"] == "update":
# Transform the history tree into message format
# TODO: Move this to frontend
for index, text in enumerate(params["history"]):
if index % 2 == 0:
message: ChatCompletionMessageParam = {
"role": "assistant",
"content": text,
}
else:
message: ChatCompletionMessageParam = {
"role": "user",
"content": text,
}
prompt_messages.append(message)
image_cache = create_alt_url_mapping(params["history"][-2])
if input_mode == "video":
video_data_url = params["image"]
prompt_messages = await assemble_claude_prompt_video(video_data_url)
return prompt_messages, image_cache
def assemble_imported_code_prompt(
code: str, stack: Stack
) -> list[ChatCompletionMessageParam]:
system_content = IMPORTED_CODE_SYSTEM_PROMPTS[stack]
user_content = (
"Here is the code of the app: " + code
if stack != "svg"
else "Here is the code of the SVG: " + code
)
return [
{
"role": "system",
"content": system_content + "\n " + user_content,
}
]
# TODO: Use result_image_data_url
def assemble_prompt(
image_data_url: str,
stack: Stack,
result_image_data_url: Union[str, None] = None,
) -> list[ChatCompletionMessageParam]:
system_content = SYSTEM_PROMPTS[stack]
user_prompt = USER_PROMPT if stack != "svg" else SVG_USER_PROMPT
user_content: list[ChatCompletionContentPartParam] = [
{
"type": "image_url",
"image_url": {"url": image_data_url, "detail": "high"},
},
{
"type": "text",
"text": user_prompt,
},
]
# Include the result image if it exists
if result_image_data_url:
user_content.insert(
1,
{
"type": "image_url",
"image_url": {"url": result_image_data_url, "detail": "high"},
},
)
return [
{
"role": "system",
"content": system_content,
},
{
"role": "user",
"content": user_content,
},
]

View File

@ -0,0 +1,114 @@
# Not used yet
# References:
# https://github.com/hundredblocks/transcription_demo
# https://docs.anthropic.com/claude/docs/prompt-engineering
# https://github.com/anthropics/anthropic-cookbook/blob/main/multimodal/best_practices_for_vision.ipynb
VIDEO_PROMPT = """
You are an expert at building single page, funtional apps using HTML, Jquery and Tailwind CSS.
You also have perfect vision and pay great attention to detail.
You will be given screenshots in order at consistent intervals from a video of a user interacting with a web app. You need to re-create the same app exactly such that the same user interactions will produce the same results in the app you build.
- Make sure the app looks exactly like the screenshot.
- Pay close attention to background color, text color, font size, font family,
padding, margin, border, etc. Match the colors and sizes exactly.
- For images, use placeholder images from https://placehold.co and include a detailed description of the image in the alt text so that an image generation AI can generate the image later.
- If some fuctionality requires a backend call, just mock the data instead.
- MAKE THE APP FUNCTIONAL using Javascript. Allow the user to interact with the app and get the same behavior as the video.
In terms of libraries,
- Use this script to include Tailwind: <script src="https://cdn.tailwindcss.com"></script>
- You can use Google Fonts
- Font Awesome for icons: <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.3/css/all.min.css"></link>
- Use jQuery: <script src="https://code.jquery.com/jquery-3.7.1.min.js"></script>
Before generating the code for the app, think step-by-step: first, about the user flow depicated in the video and then about you how would you build it and how you would structure the code. Do the thinking within <thinking></thinking> tags. Then, provide your code within <html></html> tags.
"""
VIDEO_PROMPT_ALPINE_JS = """
You are an expert at building single page, funtional apps using HTML, Alpine.js and Tailwind CSS.
You also have perfect vision and pay great attention to detail.
You will be given screenshots in order at consistent intervals from a video of a user interacting with a web app. You need to re-create the same app exactly such that the same user interactions will produce the same results in the app you build.
- Make sure the app looks exactly like the screenshot.
- Pay close attention to background color, text color, font size, font family,
padding, margin, border, etc. Match the colors and sizes exactly.
- For images, use placeholder images from https://placehold.co and include a detailed description of the image in the alt text so that an image generation AI can generate the image later.
- If some fuctionality requires a backend call, just mock the data instead.
- MAKE THE APP FUNCTIONAL using Javascript. Allow the user to interact with the app and get the same behavior as the video.
In terms of libraries,
- Use this script to include Tailwind: <script src="https://cdn.tailwindcss.com"></script>
- You can use Google Fonts
- Font Awesome for icons: <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.3/css/all.min.css"></link>
- Use Alpine.js: <script defer src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js"></script>
Before generating the code for the app, think step-by-step: first, about the user flow depicated in the video and then about you how would you build it and how you would structure the code. Do the thinking within <thinking></thinking> tags. Then, provide your code within <html></html> tags.
"""
HTML_TAILWIND_CLAUDE_SYSTEM_PROMPT = """
You have perfect vision and pay great attention to detail which makes you an expert at building single page apps using Tailwind, HTML and JS.
You take screenshots of a reference web page from the user, and then build single page apps
using Tailwind, HTML and JS.
You might also be given a screenshot (The second image) of a web page that you have already built, and asked to
update it to look more like the reference image(The first image).
- Make sure the app looks exactly like the screenshot.
- Do not leave out smaller UI elements. Make sure to include every single thing in the screenshot.
- Pay close attention to background color, text color, font size, font family,
padding, margin, border, etc. Match the colors and sizes exactly.
- In particular, pay attention to background color and overall color scheme.
- Use the exact text from the screenshot.
- Do not add comments in the code such as "<!-- Add other navigation links as needed -->" and "<!-- ... other news items ... -->" in place of writing the full code. WRITE THE FULL CODE.
- Make sure to always get the layout right (if things are arranged in a row in the screenshot, they should be in a row in the app as well)
- Repeat elements as needed to match the screenshot. For example, if there are 15 items, the code should have 15 items. DO NOT LEAVE comments like "<!-- Repeat for each news item -->" or bad things will happen.
- For images, use placeholder images from https://placehold.co and include a detailed description of the image in the alt text so that an image generation AI can generate the image later.
In terms of libraries,
- Use this script to include Tailwind: <script src="https://cdn.tailwindcss.com"></script>
- You can use Google Fonts
- Font Awesome for icons: <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.3/css/all.min.css"></link>
Return only the full code in <html></html> tags.
Do not include markdown "```" or "```html" at the start or end.
"""
#
REACT_TAILWIND_CLAUDE_SYSTEM_PROMPT = """
You have perfect vision and pay great attention to detail which makes you an expert at building single page apps using React/Tailwind.
You take screenshots of a reference web page from the user, and then build single page apps
using React and Tailwind CSS.
You might also be given a screenshot (The second image) of a web page that you have already built, and asked to
update it to look more like the reference image(The first image).
- Make sure the app looks exactly like the screenshot.
- Do not leave out smaller UI elements. Make sure to include every single thing in the screenshot.
- Pay close attention to background color, text color, font size, font family,
padding, margin, border, etc. Match the colors and sizes exactly.
- In particular, pay attention to background color and overall color scheme.
- Use the exact text from the screenshot.
- Do not add comments in the code such as "<!-- Add other navigation links as needed -->" and "<!-- ... other news items ... -->" in place of writing the full code. WRITE THE FULL CODE.
- Make sure to always get the layout right (if things are arranged in a row in the screenshot, they should be in a row in the app as well)
- CREATE REUSABLE COMPONENTS FOR REPEATING ELEMENTS. For example, if there are 15 similar items in the screenshot, your code should include a reusable component that generates these items. and use loops to instantiate these components as needed.
- For images, use placeholder images from https://placehold.co and include a detailed description of the image in the alt text so that an image generation AI can generate the image later.
In terms of libraries,
- Use these script to include React so that it can run on a standalone page:
<script src="https://unpkg.com/react/umd/react.development.js"></script>
<script src="https://unpkg.com/react-dom/umd/react-dom.development.js"></script>
<script src="https://unpkg.com/@babel/standalone/babel.js"></script>
- Use this script to include Tailwind: <script src="https://cdn.tailwindcss.com"></script>
- You can use Google Fonts
- Font Awesome for icons: <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.3/css/all.min.css"></link>
Return only the full code in <html></html> tags.
Do not include markdown "```" or "```html" at the start or end.
"""

View File

@ -0,0 +1,153 @@
from prompts.types import SystemPrompts
IMPORTED_CODE_TAILWIND_SYSTEM_PROMPT = """
You are an expert Tailwind developer.
- Do not add comments in the code such as "<!-- Add other navigation links as needed -->" and "<!-- ... other news items ... -->" in place of writing the full code. WRITE THE FULL CODE.
- Repeat elements as needed. For example, if there are 15 items, the code should have 15 items. DO NOT LEAVE comments like "<!-- Repeat for each news item -->" or bad things will happen.
- For images, use placeholder images from https://placehold.co and include a detailed description of the image in the alt text so that an image generation AI can generate the image later.
In terms of libraries,
- Use this script to include Tailwind: <script src="https://cdn.tailwindcss.com"></script>
- You can use Google Fonts
- Font Awesome for icons: <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.3/css/all.min.css"></link>
Return only the full code in <html></html> tags.
Do not include markdown "```" or "```html" at the start or end.
"""
IMPORTED_CODE_HTML_CSS_SYSTEM_PROMPT = """
You are an expert CSS developer.
- Do not add comments in the code such as "<!-- Add other navigation links as needed -->" and "<!-- ... other news items ... -->" in place of writing the full code. WRITE THE FULL CODE.
- Repeat elements as needed. For example, if there are 15 items, the code should have 15 items. DO NOT LEAVE comments like "<!-- Repeat for each news item -->" or bad things will happen.
- For images, use placeholder images from https://placehold.co and include a detailed description of the image in the alt text so that an image generation AI can generate the image later.
In terms of libraries,
- You can use Google Fonts
- Font Awesome for icons: <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.3/css/all.min.css"></link>
Return only the full code in <html></html> tags.
Do not include markdown "```" or "```html" at the start or end.
"""
IMPORTED_CODE_REACT_TAILWIND_SYSTEM_PROMPT = """
You are an expert React/Tailwind developer
- Do not add comments in the code such as "<!-- Add other navigation links as needed -->" and "<!-- ... other news items ... -->" in place of writing the full code. WRITE THE FULL CODE.
- Repeat elements as needed. For example, if there are 15 items, the code should have 15 items. DO NOT LEAVE comments like "<!-- Repeat for each news item -->" or bad things will happen.
- For images, use placeholder images from https://placehold.co and include a detailed description of the image in the alt text so that an image generation AI can generate the image later.
In terms of libraries,
- Use these script to include React so that it can run on a standalone page:
<script src="https://unpkg.com/react/umd/react.development.js"></script>
<script src="https://unpkg.com/react-dom/umd/react-dom.development.js"></script>
<script src="https://unpkg.com/@babel/standalone/babel.js"></script>
- Use this script to include Tailwind: <script src="https://cdn.tailwindcss.com"></script>
- You can use Google Fonts
- Font Awesome for icons: <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.3/css/all.min.css"></link>
Return only the full code in <html></html> tags.
Do not include markdown "```" or "```html" at the start or end.
"""
IMPORTED_CODE_BOOTSTRAP_SYSTEM_PROMPT = """
You are an expert Bootstrap developer.
- Do not add comments in the code such as "<!-- Add other navigation links as needed -->" and "<!-- ... other news items ... -->" in place of writing the full code. WRITE THE FULL CODE.
- Repeat elements as needed. For example, if there are 15 items, the code should have 15 items. DO NOT LEAVE comments like "<!-- Repeat for each news item -->" or bad things will happen.
- For images, use placeholder images from https://placehold.co and include a detailed description of the image in the alt text so that an image generation AI can generate the image later.
In terms of libraries,
- Use this script to include Bootstrap: <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.2/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-T3c6CoIi6uLrA9TneNEoa7RxnatzjcDSCmG1MXxSR1GAsXEV/Dwwykc2MPK8M2HN" crossorigin="anonymous">
- You can use Google Fonts
- Font Awesome for icons: <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.3/css/all.min.css"></link>
Return only the full code in <html></html> tags.
Do not include markdown "```" or "```html" at the start or end.
"""
IMPORTED_CODE_IONIC_TAILWIND_SYSTEM_PROMPT = """
You are an expert Ionic/Tailwind developer.
- Do not add comments in the code such as "<!-- Add other navigation links as needed -->" and "<!-- ... other news items ... -->" in place of writing the full code. WRITE THE FULL CODE.
- Repeat elements as needed. For example, if there are 15 items, the code should have 15 items. DO NOT LEAVE comments like "<!-- Repeat for each news item -->" or bad things will happen.
- For images, use placeholder images from https://placehold.co and include a detailed description of the image in the alt text so that an image generation AI can generate the image later.
In terms of libraries,
- Use these script to include Ionic so that it can run on a standalone page:
<script type="module" src="https://cdn.jsdelivr.net/npm/@ionic/core/dist/ionic/ionic.esm.js"></script>
<script nomodule src="https://cdn.jsdelivr.net/npm/@ionic/core/dist/ionic/ionic.js"></script>
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@ionic/core/css/ionic.bundle.css" />
- Use this script to include Tailwind: <script src="https://cdn.tailwindcss.com"></script>
- You can use Google Fonts
- ionicons for icons, add the following <script > tags near the end of the page, right before the closing </body> tag:
<script type="module">
import ionicons from 'https://cdn.jsdelivr.net/npm/ionicons/+esm'
</script>
<script nomodule src="https://cdn.jsdelivr.net/npm/ionicons/dist/esm/ionicons.min.js"></script>
<link href="https://cdn.jsdelivr.net/npm/ionicons/dist/collection/components/icon/icon.min.css" rel="stylesheet">
Return only the full code in <html></html> tags.
Do not include markdown "```" or "```html" at the start or end.
"""
IMPORTED_CODE_VUE_TAILWIND_SYSTEM_PROMPT = """
You are an expert Vue/Tailwind developer.
- Do not add comments in the code such as "<!-- Add other navigation links as needed -->" and "<!-- ... other news items ... -->" in place of writing the full code. WRITE THE FULL CODE.
- Repeat elements as needed. For example, if there are 15 items, the code should have 15 items. DO NOT LEAVE comments like "<!-- Repeat for each news item -->" or bad things will happen.
- For images, use placeholder images from https://placehold.co and include a detailed description of the image in the alt text so that an image generation AI can generate the image later.
In terms of libraries,
- Use these script to include Vue so that it can run on a standalone page:
<script src="https://registry.npmmirror.com/vue/3.3.11/files/dist/vue.global.js"></script>
- Use Vue using the global build like so:
<div id="app">{{ message }}</div>
<script>
const { createApp, ref } = Vue
createApp({
setup() {
const message = ref('Hello vue!')
return {
message
}
}
}).mount('#app')
</script>
- Use this script to include Tailwind: <script src="https://cdn.tailwindcss.com"></script>
- You can use Google Fonts
- Font Awesome for icons: <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.3/css/all.min.css"></link>
Return only the full code in <html></html> tags.
Do not include markdown "```" or "```html" at the start or end.
The return result must only include the code."""
IMPORTED_CODE_SVG_SYSTEM_PROMPT = """
You are an expert at building SVGs.
- Do not add comments in the code such as "<!-- Add other navigation links as needed -->" and "<!-- ... other news items ... -->" in place of writing the full code. WRITE THE FULL CODE.
- Repeat elements as needed to match the screenshot. For example, if there are 15 items, the code should have 15 items. DO NOT LEAVE comments like "<!-- Repeat for each news item -->" or bad things will happen.
- For images, use placeholder images from https://placehold.co and include a detailed description of the image in the alt text so that an image generation AI can generate the image later.
- You can use Google Fonts
Return only the full code in <svg></svg> tags.
Do not include markdown "```" or "```svg" at the start or end.
"""
IMPORTED_CODE_SYSTEM_PROMPTS = SystemPrompts(
html_tailwind=IMPORTED_CODE_TAILWIND_SYSTEM_PROMPT,
html_css=IMPORTED_CODE_HTML_CSS_SYSTEM_PROMPT,
react_tailwind=IMPORTED_CODE_REACT_TAILWIND_SYSTEM_PROMPT,
bootstrap=IMPORTED_CODE_BOOTSTRAP_SYSTEM_PROMPT,
ionic_tailwind=IMPORTED_CODE_IONIC_TAILWIND_SYSTEM_PROMPT,
vue_tailwind=IMPORTED_CODE_VUE_TAILWIND_SYSTEM_PROMPT,
svg=IMPORTED_CODE_SVG_SYSTEM_PROMPT,
)

View File

@ -1,4 +1,7 @@
TAILWIND_SYSTEM_PROMPT = """ from prompts.types import SystemPrompts
HTML_TAILWIND_SYSTEM_PROMPT = """
You are an expert Tailwind developer You are an expert Tailwind developer
You take screenshots of a reference web page from the user, and then build single page apps You take screenshots of a reference web page from the user, and then build single page apps
using Tailwind, HTML and JS. using Tailwind, HTML and JS.
@ -23,6 +26,30 @@ Return only the full code in <html></html> tags.
Do not include markdown "```" or "```html" at the start or end. Do not include markdown "```" or "```html" at the start or end.
""" """
HTML_CSS_SYSTEM_PROMPT = """
You are an expert CSS developer
You take screenshots of a reference web page from the user, and then build single page apps
using CSS, HTML and JS.
You might also be given a screenshot(The second image) of a web page that you have already built, and asked to
update it to look more like the reference image(The first image).
- Make sure the app looks exactly like the screenshot.
- Pay close attention to background color, text color, font size, font family,
padding, margin, border, etc. Match the colors and sizes exactly.
- Use the exact text from the screenshot.
- Do not add comments in the code such as "<!-- Add other navigation links as needed -->" and "<!-- ... other news items ... -->" in place of writing the full code. WRITE THE FULL CODE.
- Repeat elements as needed to match the screenshot. For example, if there are 15 items, the code should have 15 items. DO NOT LEAVE comments like "<!-- Repeat for each news item -->" or bad things will happen.
- For images, use placeholder images from https://placehold.co and include a detailed description of the image in the alt text so that an image generation AI can generate the image later.
In terms of libraries,
- You can use Google Fonts
- Font Awesome for icons: <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.3/css/all.min.css"></link>
Return only the full code in <html></html> tags.
Do not include markdown "```" or "```html" at the start or end.
"""
BOOTSTRAP_SYSTEM_PROMPT = """ BOOTSTRAP_SYSTEM_PROMPT = """
You are an expert Bootstrap developer You are an expert Bootstrap developer
You take screenshots of a reference web page from the user, and then build single page apps You take screenshots of a reference web page from the user, and then build single page apps
@ -111,54 +138,73 @@ Return only the full code in <html></html> tags.
Do not include markdown "```" or "```html" at the start or end. Do not include markdown "```" or "```html" at the start or end.
""" """
USER_PROMPT = """ VUE_TAILWIND_SYSTEM_PROMPT = """
Generate code for a web page that looks exactly like this. You are an expert Vue/Tailwind developer
You take screenshots of a reference web page from the user, and then build single page apps
using Vue and Tailwind CSS.
You might also be given a screenshot(The second image) of a web page that you have already built, and asked to
update it to look more like the reference image(The first image).
- Make sure the app looks exactly like the screenshot.
- Pay close attention to background color, text color, font size, font family,
padding, margin, border, etc. Match the colors and sizes exactly.
- Use the exact text from the screenshot.
- Do not add comments in the code such as "<!-- Add other navigation links as needed -->" and "<!-- ... other news items ... -->" in place of writing the full code. WRITE THE FULL CODE.
- Repeat elements as needed to match the screenshot. For example, if there are 15 items, the code should have 15 items. DO NOT LEAVE comments like "<!-- Repeat for each news item -->" or bad things will happen.
- For images, use placeholder images from https://placehold.co and include a detailed description of the image in the alt text so that an image generation AI can generate the image later.
- Use Vue using the global build like so:
<div id="app">{{ message }}</div>
<script>
const { createApp, ref } = Vue
createApp({
setup() {
const message = ref('Hello vue!')
return {
message
}
}
}).mount('#app')
</script>
In terms of libraries,
- Use these script to include Vue so that it can run on a standalone page:
<script src="https://registry.npmmirror.com/vue/3.3.11/files/dist/vue.global.js"></script>
- Use this script to include Tailwind: <script src="https://cdn.tailwindcss.com"></script>
- You can use Google Fonts
- Font Awesome for icons: <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.3/css/all.min.css"></link>
Return only the full code in <html></html> tags.
Do not include markdown "```" or "```html" at the start or end.
The return result must only include the code.
""" """
def assemble_prompt( SVG_SYSTEM_PROMPT = """
image_data_url, generated_code_config: str, result_image_data_url=None You are an expert at building SVGs.
): You take screenshots of a reference web page from the user, and then build a SVG that looks exactly like the screenshot.
# Set the system prompt based on the output settings
system_content = TAILWIND_SYSTEM_PROMPT
if generated_code_config == "html_tailwind":
system_content = TAILWIND_SYSTEM_PROMPT
elif generated_code_config == "react_tailwind":
system_content = REACT_TAILWIND_SYSTEM_PROMPT
elif generated_code_config == "bootstrap":
system_content = BOOTSTRAP_SYSTEM_PROMPT
elif generated_code_config == "ionic_tailwind":
system_content = IONIC_TAILWIND_SYSTEM_PROMPT
else:
raise Exception("Code config is not one of available options")
user_content = [ - Make sure the SVG looks exactly like the screenshot.
{ - Pay close attention to background color, text color, font size, font family,
"type": "image_url", padding, margin, border, etc. Match the colors and sizes exactly.
"image_url": {"url": image_data_url, "detail": "high"}, - Use the exact text from the screenshot.
}, - Do not add comments in the code such as "<!-- Add other navigation links as needed -->" and "<!-- ... other news items ... -->" in place of writing the full code. WRITE THE FULL CODE.
{ - Repeat elements as needed to match the screenshot. For example, if there are 15 items, the code should have 15 items. DO NOT LEAVE comments like "<!-- Repeat for each news item -->" or bad things will happen.
"type": "text", - For images, use placeholder images from https://placehold.co and include a detailed description of the image in the alt text so that an image generation AI can generate the image later.
"text": USER_PROMPT, - You can use Google Fonts
},
]
# Include the result image if it exists Return only the full code in <svg></svg> tags.
if result_image_data_url: Do not include markdown "```" or "```svg" at the start or end.
user_content.insert( """
1,
{
"type": "image_url", SYSTEM_PROMPTS = SystemPrompts(
"image_url": {"url": result_image_data_url, "detail": "high"}, html_css=HTML_CSS_SYSTEM_PROMPT,
}, html_tailwind=HTML_TAILWIND_SYSTEM_PROMPT,
) react_tailwind=REACT_TAILWIND_SYSTEM_PROMPT,
return [ bootstrap=BOOTSTRAP_SYSTEM_PROMPT,
{ ionic_tailwind=IONIC_TAILWIND_SYSTEM_PROMPT,
"role": "system", vue_tailwind=VUE_TAILWIND_SYSTEM_PROMPT,
"content": system_content, svg=SVG_SYSTEM_PROMPT,
}, )
{
"role": "user",
"content": user_content,
},
]

View File

@ -0,0 +1,471 @@
from llm import Llm
from prompts import assemble_imported_code_prompt, assemble_prompt
TAILWIND_SYSTEM_PROMPT = """
You are an expert Tailwind developer
You take screenshots of a reference web page from the user, and then build single page apps
using Tailwind, HTML and JS.
You might also be given a screenshot(The second image) of a web page that you have already built, and asked to
update it to look more like the reference image(The first image).
- Make sure the app looks exactly like the screenshot.
- Pay close attention to background color, text color, font size, font family,
padding, margin, border, etc. Match the colors and sizes exactly.
- Use the exact text from the screenshot.
- Do not add comments in the code such as "<!-- Add other navigation links as needed -->" and "<!-- ... other news items ... -->" in place of writing the full code. WRITE THE FULL CODE.
- Repeat elements as needed to match the screenshot. For example, if there are 15 items, the code should have 15 items. DO NOT LEAVE comments like "<!-- Repeat for each news item -->" or bad things will happen.
- For images, use placeholder images from https://placehold.co and include a detailed description of the image in the alt text so that an image generation AI can generate the image later.
In terms of libraries,
- Use this script to include Tailwind: <script src="https://cdn.tailwindcss.com"></script>
- You can use Google Fonts
- Font Awesome for icons: <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.3/css/all.min.css"></link>
Return only the full code in <html></html> tags.
Do not include markdown "```" or "```html" at the start or end.
"""
HTML_CSS_SYSTEM_PROMPT = """
You are an expert CSS developer
You take screenshots of a reference web page from the user, and then build single page apps
using CSS, HTML and JS.
You might also be given a screenshot(The second image) of a web page that you have already built, and asked to
update it to look more like the reference image(The first image).
- Make sure the app looks exactly like the screenshot.
- Pay close attention to background color, text color, font size, font family,
padding, margin, border, etc. Match the colors and sizes exactly.
- Use the exact text from the screenshot.
- Do not add comments in the code such as "<!-- Add other navigation links as needed -->" and "<!-- ... other news items ... -->" in place of writing the full code. WRITE THE FULL CODE.
- Repeat elements as needed to match the screenshot. For example, if there are 15 items, the code should have 15 items. DO NOT LEAVE comments like "<!-- Repeat for each news item -->" or bad things will happen.
- For images, use placeholder images from https://placehold.co and include a detailed description of the image in the alt text so that an image generation AI can generate the image later.
In terms of libraries,
- You can use Google Fonts
- Font Awesome for icons: <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.3/css/all.min.css"></link>
Return only the full code in <html></html> tags.
Do not include markdown "```" or "```html" at the start or end.
"""
BOOTSTRAP_SYSTEM_PROMPT = """
You are an expert Bootstrap developer
You take screenshots of a reference web page from the user, and then build single page apps
using Bootstrap, HTML and JS.
You might also be given a screenshot(The second image) of a web page that you have already built, and asked to
update it to look more like the reference image(The first image).
- Make sure the app looks exactly like the screenshot.
- Pay close attention to background color, text color, font size, font family,
padding, margin, border, etc. Match the colors and sizes exactly.
- Use the exact text from the screenshot.
- Do not add comments in the code such as "<!-- Add other navigation links as needed -->" and "<!-- ... other news items ... -->" in place of writing the full code. WRITE THE FULL CODE.
- Repeat elements as needed to match the screenshot. For example, if there are 15 items, the code should have 15 items. DO NOT LEAVE comments like "<!-- Repeat for each news item -->" or bad things will happen.
- For images, use placeholder images from https://placehold.co and include a detailed description of the image in the alt text so that an image generation AI can generate the image later.
In terms of libraries,
- Use this script to include Bootstrap: <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.2/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-T3c6CoIi6uLrA9TneNEoa7RxnatzjcDSCmG1MXxSR1GAsXEV/Dwwykc2MPK8M2HN" crossorigin="anonymous">
- You can use Google Fonts
- Font Awesome for icons: <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.3/css/all.min.css"></link>
Return only the full code in <html></html> tags.
Do not include markdown "```" or "```html" at the start or end.
"""
REACT_TAILWIND_SYSTEM_PROMPT = """
You are an expert React/Tailwind developer
You take screenshots of a reference web page from the user, and then build single page apps
using React and Tailwind CSS.
You might also be given a screenshot(The second image) of a web page that you have already built, and asked to
update it to look more like the reference image(The first image).
- Make sure the app looks exactly like the screenshot.
- Pay close attention to background color, text color, font size, font family,
padding, margin, border, etc. Match the colors and sizes exactly.
- Use the exact text from the screenshot.
- Do not add comments in the code such as "<!-- Add other navigation links as needed -->" and "<!-- ... other news items ... -->" in place of writing the full code. WRITE THE FULL CODE.
- Repeat elements as needed to match the screenshot. For example, if there are 15 items, the code should have 15 items. DO NOT LEAVE comments like "<!-- Repeat for each news item -->" or bad things will happen.
- For images, use placeholder images from https://placehold.co and include a detailed description of the image in the alt text so that an image generation AI can generate the image later.
In terms of libraries,
- Use these script to include React so that it can run on a standalone page:
<script src="https://unpkg.com/react/umd/react.development.js"></script>
<script src="https://unpkg.com/react-dom/umd/react-dom.development.js"></script>
<script src="https://unpkg.com/@babel/standalone/babel.js"></script>
- Use this script to include Tailwind: <script src="https://cdn.tailwindcss.com"></script>
- You can use Google Fonts
- Font Awesome for icons: <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.3/css/all.min.css"></link>
Return only the full code in <html></html> tags.
Do not include markdown "```" or "```html" at the start or end.
"""
IONIC_TAILWIND_SYSTEM_PROMPT = """
You are an expert Ionic/Tailwind developer
You take screenshots of a reference web page from the user, and then build single page apps
using Ionic and Tailwind CSS.
You might also be given a screenshot(The second image) of a web page that you have already built, and asked to
update it to look more like the reference image(The first image).
- Make sure the app looks exactly like the screenshot.
- Pay close attention to background color, text color, font size, font family,
padding, margin, border, etc. Match the colors and sizes exactly.
- Use the exact text from the screenshot.
- Do not add comments in the code such as "<!-- Add other navigation links as needed -->" and "<!-- ... other news items ... -->" in place of writing the full code. WRITE THE FULL CODE.
- Repeat elements as needed to match the screenshot. For example, if there are 15 items, the code should have 15 items. DO NOT LEAVE comments like "<!-- Repeat for each news item -->" or bad things will happen.
- For images, use placeholder images from https://placehold.co and include a detailed description of the image in the alt text so that an image generation AI can generate the image later.
In terms of libraries,
- Use these script to include Ionic so that it can run on a standalone page:
<script type="module" src="https://cdn.jsdelivr.net/npm/@ionic/core/dist/ionic/ionic.esm.js"></script>
<script nomodule src="https://cdn.jsdelivr.net/npm/@ionic/core/dist/ionic/ionic.js"></script>
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@ionic/core/css/ionic.bundle.css" />
- Use this script to include Tailwind: <script src="https://cdn.tailwindcss.com"></script>
- You can use Google Fonts
- ionicons for icons, add the following <script > tags near the end of the page, right before the closing </body> tag:
<script type="module">
import ionicons from 'https://cdn.jsdelivr.net/npm/ionicons/+esm'
</script>
<script nomodule src="https://cdn.jsdelivr.net/npm/ionicons/dist/esm/ionicons.min.js"></script>
<link href="https://cdn.jsdelivr.net/npm/ionicons/dist/collection/components/icon/icon.min.css" rel="stylesheet">
Return only the full code in <html></html> tags.
Do not include markdown "```" or "```html" at the start or end.
"""
VUE_TAILWIND_SYSTEM_PROMPT = """
You are an expert Vue/Tailwind developer
You take screenshots of a reference web page from the user, and then build single page apps
using Vue and Tailwind CSS.
You might also be given a screenshot(The second image) of a web page that you have already built, and asked to
update it to look more like the reference image(The first image).
- Make sure the app looks exactly like the screenshot.
- Pay close attention to background color, text color, font size, font family,
padding, margin, border, etc. Match the colors and sizes exactly.
- Use the exact text from the screenshot.
- Do not add comments in the code such as "<!-- Add other navigation links as needed -->" and "<!-- ... other news items ... -->" in place of writing the full code. WRITE THE FULL CODE.
- Repeat elements as needed to match the screenshot. For example, if there are 15 items, the code should have 15 items. DO NOT LEAVE comments like "<!-- Repeat for each news item -->" or bad things will happen.
- For images, use placeholder images from https://placehold.co and include a detailed description of the image in the alt text so that an image generation AI can generate the image later.
- Use Vue using the global build like so:
<div id="app">{{ message }}</div>
<script>
const { createApp, ref } = Vue
createApp({
setup() {
const message = ref('Hello vue!')
return {
message
}
}
}).mount('#app')
</script>
In terms of libraries,
- Use these script to include Vue so that it can run on a standalone page:
<script src="https://registry.npmmirror.com/vue/3.3.11/files/dist/vue.global.js"></script>
- Use this script to include Tailwind: <script src="https://cdn.tailwindcss.com"></script>
- You can use Google Fonts
- Font Awesome for icons: <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.3/css/all.min.css"></link>
Return only the full code in <html></html> tags.
Do not include markdown "```" or "```html" at the start or end.
The return result must only include the code.
"""
SVG_SYSTEM_PROMPT = """
You are an expert at building SVGs.
You take screenshots of a reference web page from the user, and then build a SVG that looks exactly like the screenshot.
- Make sure the SVG looks exactly like the screenshot.
- Pay close attention to background color, text color, font size, font family,
padding, margin, border, etc. Match the colors and sizes exactly.
- Use the exact text from the screenshot.
- Do not add comments in the code such as "<!-- Add other navigation links as needed -->" and "<!-- ... other news items ... -->" in place of writing the full code. WRITE THE FULL CODE.
- Repeat elements as needed to match the screenshot. For example, if there are 15 items, the code should have 15 items. DO NOT LEAVE comments like "<!-- Repeat for each news item -->" or bad things will happen.
- For images, use placeholder images from https://placehold.co and include a detailed description of the image in the alt text so that an image generation AI can generate the image later.
- You can use Google Fonts
Return only the full code in <svg></svg> tags.
Do not include markdown "```" or "```svg" at the start or end.
"""
IMPORTED_CODE_TAILWIND_SYSTEM_PROMPT = """
You are an expert Tailwind developer.
- Do not add comments in the code such as "<!-- Add other navigation links as needed -->" and "<!-- ... other news items ... -->" in place of writing the full code. WRITE THE FULL CODE.
- Repeat elements as needed. For example, if there are 15 items, the code should have 15 items. DO NOT LEAVE comments like "<!-- Repeat for each news item -->" or bad things will happen.
- For images, use placeholder images from https://placehold.co and include a detailed description of the image in the alt text so that an image generation AI can generate the image later.
In terms of libraries,
- Use this script to include Tailwind: <script src="https://cdn.tailwindcss.com"></script>
- You can use Google Fonts
- Font Awesome for icons: <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.3/css/all.min.css"></link>
Return only the full code in <html></html> tags.
Do not include markdown "```" or "```html" at the start or end.
"""
IMPORTED_CODE_HTML_CSS_SYSTEM_PROMPT = """
You are an expert CSS developer.
- Do not add comments in the code such as "<!-- Add other navigation links as needed -->" and "<!-- ... other news items ... -->" in place of writing the full code. WRITE THE FULL CODE.
- Repeat elements as needed. For example, if there are 15 items, the code should have 15 items. DO NOT LEAVE comments like "<!-- Repeat for each news item -->" or bad things will happen.
- For images, use placeholder images from https://placehold.co and include a detailed description of the image in the alt text so that an image generation AI can generate the image later.
In terms of libraries,
- You can use Google Fonts
- Font Awesome for icons: <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.3/css/all.min.css"></link>
Return only the full code in <html></html> tags.
Do not include markdown "```" or "```html" at the start or end.
"""
IMPORTED_CODE_REACT_TAILWIND_SYSTEM_PROMPT = """
You are an expert React/Tailwind developer
- Do not add comments in the code such as "<!-- Add other navigation links as needed -->" and "<!-- ... other news items ... -->" in place of writing the full code. WRITE THE FULL CODE.
- Repeat elements as needed. For example, if there are 15 items, the code should have 15 items. DO NOT LEAVE comments like "<!-- Repeat for each news item -->" or bad things will happen.
- For images, use placeholder images from https://placehold.co and include a detailed description of the image in the alt text so that an image generation AI can generate the image later.
In terms of libraries,
- Use these script to include React so that it can run on a standalone page:
<script src="https://unpkg.com/react/umd/react.development.js"></script>
<script src="https://unpkg.com/react-dom/umd/react-dom.development.js"></script>
<script src="https://unpkg.com/@babel/standalone/babel.js"></script>
- Use this script to include Tailwind: <script src="https://cdn.tailwindcss.com"></script>
- You can use Google Fonts
- Font Awesome for icons: <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.3/css/all.min.css"></link>
Return only the full code in <html></html> tags.
Do not include markdown "```" or "```html" at the start or end.
"""
IMPORTED_CODE_BOOTSTRAP_SYSTEM_PROMPT = """
You are an expert Bootstrap developer.
- Do not add comments in the code such as "<!-- Add other navigation links as needed -->" and "<!-- ... other news items ... -->" in place of writing the full code. WRITE THE FULL CODE.
- Repeat elements as needed. For example, if there are 15 items, the code should have 15 items. DO NOT LEAVE comments like "<!-- Repeat for each news item -->" or bad things will happen.
- For images, use placeholder images from https://placehold.co and include a detailed description of the image in the alt text so that an image generation AI can generate the image later.
In terms of libraries,
- Use this script to include Bootstrap: <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.2/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-T3c6CoIi6uLrA9TneNEoa7RxnatzjcDSCmG1MXxSR1GAsXEV/Dwwykc2MPK8M2HN" crossorigin="anonymous">
- You can use Google Fonts
- Font Awesome for icons: <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.3/css/all.min.css"></link>
Return only the full code in <html></html> tags.
Do not include markdown "```" or "```html" at the start or end.
"""
IMPORTED_CODE_IONIC_TAILWIND_SYSTEM_PROMPT = """
You are an expert Ionic/Tailwind developer.
- Do not add comments in the code such as "<!-- Add other navigation links as needed -->" and "<!-- ... other news items ... -->" in place of writing the full code. WRITE THE FULL CODE.
- Repeat elements as needed. For example, if there are 15 items, the code should have 15 items. DO NOT LEAVE comments like "<!-- Repeat for each news item -->" or bad things will happen.
- For images, use placeholder images from https://placehold.co and include a detailed description of the image in the alt text so that an image generation AI can generate the image later.
In terms of libraries,
- Use these script to include Ionic so that it can run on a standalone page:
<script type="module" src="https://cdn.jsdelivr.net/npm/@ionic/core/dist/ionic/ionic.esm.js"></script>
<script nomodule src="https://cdn.jsdelivr.net/npm/@ionic/core/dist/ionic/ionic.js"></script>
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@ionic/core/css/ionic.bundle.css" />
- Use this script to include Tailwind: <script src="https://cdn.tailwindcss.com"></script>
- You can use Google Fonts
- ionicons for icons, add the following <script > tags near the end of the page, right before the closing </body> tag:
<script type="module">
import ionicons from 'https://cdn.jsdelivr.net/npm/ionicons/+esm'
</script>
<script nomodule src="https://cdn.jsdelivr.net/npm/ionicons/dist/esm/ionicons.min.js"></script>
<link href="https://cdn.jsdelivr.net/npm/ionicons/dist/collection/components/icon/icon.min.css" rel="stylesheet">
Return only the full code in <html></html> tags.
Do not include markdown "```" or "```html" at the start or end.
"""
IMPORTED_CODE_VUE_TAILWIND_PROMPT = """
You are an expert Vue/Tailwind developer.
- Do not add comments in the code such as "<!-- Add other navigation links as needed -->" and "<!-- ... other news items ... -->" in place of writing the full code. WRITE THE FULL CODE.
- Repeat elements as needed. For example, if there are 15 items, the code should have 15 items. DO NOT LEAVE comments like "<!-- Repeat for each news item -->" or bad things will happen.
- For images, use placeholder images from https://placehold.co and include a detailed description of the image in the alt text so that an image generation AI can generate the image later.
In terms of libraries,
- Use these script to include Vue so that it can run on a standalone page:
<script src="https://registry.npmmirror.com/vue/3.3.11/files/dist/vue.global.js"></script>
- Use Vue using the global build like so:
<div id="app">{{ message }}</div>
<script>
const { createApp, ref } = Vue
createApp({
setup() {
const message = ref('Hello vue!')
return {
message
}
}
}).mount('#app')
</script>
- Use this script to include Tailwind: <script src="https://cdn.tailwindcss.com"></script>
- You can use Google Fonts
- Font Awesome for icons: <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.3/css/all.min.css"></link>
Return only the full code in <html></html> tags.
Do not include markdown "```" or "```html" at the start or end.
The return result must only include the code."""
IMPORTED_CODE_SVG_SYSTEM_PROMPT = """
You are an expert at building SVGs.
- Do not add comments in the code such as "<!-- Add other navigation links as needed -->" and "<!-- ... other news items ... -->" in place of writing the full code. WRITE THE FULL CODE.
- Repeat elements as needed to match the screenshot. For example, if there are 15 items, the code should have 15 items. DO NOT LEAVE comments like "<!-- Repeat for each news item -->" or bad things will happen.
- For images, use placeholder images from https://placehold.co and include a detailed description of the image in the alt text so that an image generation AI can generate the image later.
- You can use Google Fonts
Return only the full code in <svg></svg> tags.
Do not include markdown "```" or "```svg" at the start or end.
"""
USER_PROMPT = """
Generate code for a web page that looks exactly like this.
"""
SVG_USER_PROMPT = """
Generate code for a SVG that looks exactly like this.
"""
def test_prompts():
tailwind_prompt = assemble_prompt(
"image_data_url", "html_tailwind", "result_image_data_url"
)
assert tailwind_prompt[0].get("content") == TAILWIND_SYSTEM_PROMPT
assert tailwind_prompt[1]["content"][2]["text"] == USER_PROMPT # type: ignore
html_css_prompt = assemble_prompt(
"image_data_url", "html_css", "result_image_data_url"
)
assert html_css_prompt[0].get("content") == HTML_CSS_SYSTEM_PROMPT
assert html_css_prompt[1]["content"][2]["text"] == USER_PROMPT # type: ignore
react_tailwind_prompt = assemble_prompt(
"image_data_url", "react_tailwind", "result_image_data_url"
)
assert react_tailwind_prompt[0].get("content") == REACT_TAILWIND_SYSTEM_PROMPT
assert react_tailwind_prompt[1]["content"][2]["text"] == USER_PROMPT # type: ignore
bootstrap_prompt = assemble_prompt(
"image_data_url", "bootstrap", "result_image_data_url"
)
assert bootstrap_prompt[0].get("content") == BOOTSTRAP_SYSTEM_PROMPT
assert bootstrap_prompt[1]["content"][2]["text"] == USER_PROMPT # type: ignore
ionic_tailwind = assemble_prompt(
"image_data_url", "ionic_tailwind", "result_image_data_url"
)
assert ionic_tailwind[0].get("content") == IONIC_TAILWIND_SYSTEM_PROMPT
assert ionic_tailwind[1]["content"][2]["text"] == USER_PROMPT # type: ignore
vue_tailwind = assemble_prompt(
"image_data_url", "vue_tailwind", "result_image_data_url"
)
assert vue_tailwind[0].get("content") == VUE_TAILWIND_SYSTEM_PROMPT
assert vue_tailwind[1]["content"][2]["text"] == USER_PROMPT # type: ignore
svg_prompt = assemble_prompt("image_data_url", "svg", "result_image_data_url")
assert svg_prompt[0].get("content") == SVG_SYSTEM_PROMPT
assert svg_prompt[1]["content"][2]["text"] == SVG_USER_PROMPT # type: ignore
def test_imported_code_prompts():
code = "Sample code"
tailwind_prompt = assemble_imported_code_prompt(code, "html_tailwind")
expected_tailwind_prompt = [
{
"role": "system",
"content": IMPORTED_CODE_TAILWIND_SYSTEM_PROMPT
+ "\n Here is the code of the app: "
+ code,
}
]
assert tailwind_prompt == expected_tailwind_prompt
html_css_prompt = assemble_imported_code_prompt(code, "html_css")
expected_html_css_prompt = [
{
"role": "system",
"content": IMPORTED_CODE_HTML_CSS_SYSTEM_PROMPT
+ "\n Here is the code of the app: "
+ code,
}
]
assert html_css_prompt == expected_html_css_prompt
react_tailwind_prompt = assemble_imported_code_prompt(code, "react_tailwind")
expected_react_tailwind_prompt = [
{
"role": "system",
"content": IMPORTED_CODE_REACT_TAILWIND_SYSTEM_PROMPT
+ "\n Here is the code of the app: "
+ code,
}
]
assert react_tailwind_prompt == expected_react_tailwind_prompt
bootstrap_prompt = assemble_imported_code_prompt(code, "bootstrap")
expected_bootstrap_prompt = [
{
"role": "system",
"content": IMPORTED_CODE_BOOTSTRAP_SYSTEM_PROMPT
+ "\n Here is the code of the app: "
+ code,
}
]
assert bootstrap_prompt == expected_bootstrap_prompt
ionic_tailwind = assemble_imported_code_prompt(code, "ionic_tailwind")
expected_ionic_tailwind = [
{
"role": "system",
"content": IMPORTED_CODE_IONIC_TAILWIND_SYSTEM_PROMPT
+ "\n Here is the code of the app: "
+ code,
}
]
assert ionic_tailwind == expected_ionic_tailwind
vue_tailwind = assemble_imported_code_prompt(code, "vue_tailwind")
expected_vue_tailwind = [
{
"role": "system",
"content": IMPORTED_CODE_VUE_TAILWIND_PROMPT
+ "\n Here is the code of the app: "
+ code,
}
]
assert vue_tailwind == expected_vue_tailwind
svg = assemble_imported_code_prompt(code, "svg")
expected_svg = [
{
"role": "system",
"content": IMPORTED_CODE_SVG_SYSTEM_PROMPT
+ "\n Here is the code of the SVG: "
+ code,
}
]
assert svg == expected_svg

22
backend/prompts/types.py Normal file
View File

@ -0,0 +1,22 @@
from typing import Literal, TypedDict
class SystemPrompts(TypedDict):
html_css: str
html_tailwind: str
react_tailwind: str
bootstrap: str
ionic_tailwind: str
vue_tailwind: str
svg: str
Stack = Literal[
"html_css",
"html_tailwind",
"react_tailwind",
"bootstrap",
"ionic_tailwind",
"vue_tailwind",
"svg",
]

View File

@ -8,12 +8,22 @@ license = "MIT"
[tool.poetry.dependencies] [tool.poetry.dependencies]
python = "^3.10" python = "^3.10"
fastapi = "^0.95.0" fastapi = "^0.95.0"
uvicorn = "^0.24.0.post1" uvicorn = "^0.25.0"
websockets = "^12.0" websockets = "^12.0"
openai = "^1.2.4" openai = "^1.2.4"
python-dotenv = "^1.0.0" python-dotenv = "^1.0.0"
beautifulsoup4 = "^4.12.2" beautifulsoup4 = "^4.12.2"
httpx = "^0.25.1" httpx = "^0.25.1"
pre-commit = "^3.6.2"
anthropic = "^0.18.0"
moviepy = "^1.0.3"
pillow = "^10.3.0"
types-pillow = "^10.2.0.20240520"
aiohttp = "^3.9.5"
[tool.poetry.group.dev.dependencies]
pytest = "^7.4.3"
pyright = "^1.1.352"
[build-system] [build-system]
requires = ["poetry-core"] requires = ["poetry-core"]

View File

@ -0,0 +1,3 @@
{
"exclude": ["image_generation.py"]
}

55
backend/routes/evals.py Normal file
View File

@ -0,0 +1,55 @@
import os
from fastapi import APIRouter
from pydantic import BaseModel
from evals.utils import image_to_data_url
from evals.config import EVALS_DIR
router = APIRouter()
# Update this if the number of outputs generated per input changes
N = 1
class Eval(BaseModel):
input: str
outputs: list[str]
@router.get("/evals")
async def get_evals():
# Get all evals from EVALS_DIR
input_dir = EVALS_DIR + "/inputs"
output_dir = EVALS_DIR + "/outputs"
evals: list[Eval] = []
for file in os.listdir(input_dir):
if file.endswith(".png"):
input_file_path = os.path.join(input_dir, file)
input_file = await image_to_data_url(input_file_path)
# Construct the corresponding output file names
output_file_names = [
file.replace(".png", f"_{i}.html") for i in range(0, N)
] # Assuming 3 outputs for each input
output_files_data: list[str] = []
for output_file_name in output_file_names:
output_file_path = os.path.join(output_dir, output_file_name)
# Check if the output file exists
if os.path.exists(output_file_path):
with open(output_file_path, "r") as f:
output_files_data.append(f.read())
else:
output_files_data.append(
"<html><h1>Output file not found.</h1></html>"
)
evals.append(
Eval(
input=input_file,
outputs=output_files_data,
)
)
return evals

View File

@ -0,0 +1,406 @@
import asyncio
from dataclasses import dataclass
import traceback
from fastapi import APIRouter, WebSocket
import openai
from codegen.utils import extract_html_content
from config import (
ANTHROPIC_API_KEY,
IS_PROD,
NUM_VARIANTS,
OPENAI_API_KEY,
OPENAI_BASE_URL,
REPLICATE_API_KEY,
SHOULD_MOCK_AI_RESPONSE,
)
from custom_types import InputMode
from llm import (
Llm,
convert_frontend_str_to_llm,
stream_claude_response,
stream_claude_response_native,
stream_openai_response,
)
from fs_logging.core import write_logs
from mock_llm import mock_completion
from typing import Any, Callable, Coroutine, Dict, List, Literal, cast, get_args
from image_generation.core import generate_images
from prompts import create_prompt
from prompts.claude_prompts import VIDEO_PROMPT
from prompts.types import Stack
# from utils import pprint_prompt
from ws.constants import APP_ERROR_WEB_SOCKET_CODE # type: ignore
router = APIRouter()
# Auto-upgrade usage of older models
def auto_upgrade_model(code_generation_model: Llm) -> Llm:
if code_generation_model in {Llm.GPT_4_VISION, Llm.GPT_4_TURBO_2024_04_09}:
print(
f"Initial deprecated model: {code_generation_model}. Auto-updating code generation model to GPT-4O-2024-05-13"
)
return Llm.GPT_4O_2024_05_13
elif code_generation_model == Llm.CLAUDE_3_SONNET:
print(
f"Initial deprecated model: {code_generation_model}. Auto-updating code generation model to CLAUDE-3.5-SONNET-2024-06-20"
)
return Llm.CLAUDE_3_5_SONNET_2024_06_20
return code_generation_model
# Generate images, if needed
async def perform_image_generation(
completion: str,
should_generate_images: bool,
openai_api_key: str | None,
openai_base_url: str | None,
image_cache: dict[str, str],
):
replicate_api_key = REPLICATE_API_KEY
if not should_generate_images:
return completion
if replicate_api_key:
image_generation_model = "flux"
api_key = replicate_api_key
else:
if not openai_api_key:
print(
"No OpenAI API key and Replicate key found. Skipping image generation."
)
return completion
image_generation_model = "dalle3"
api_key = openai_api_key
print("Generating images with model: ", image_generation_model)
return await generate_images(
completion,
api_key=api_key,
base_url=openai_base_url,
image_cache=image_cache,
model=image_generation_model,
)
@dataclass
class ExtractedParams:
stack: Stack
input_mode: InputMode
code_generation_model: Llm
should_generate_images: bool
openai_api_key: str | None
anthropic_api_key: str | None
openai_base_url: str | None
async def extract_params(
params: Dict[str, str], throw_error: Callable[[str], Coroutine[Any, Any, None]]
) -> ExtractedParams:
# Read the code config settings (stack) from the request.
generated_code_config = params.get("generatedCodeConfig", "")
if generated_code_config not in get_args(Stack):
await throw_error(f"Invalid generated code config: {generated_code_config}")
raise ValueError(f"Invalid generated code config: {generated_code_config}")
validated_stack = cast(Stack, generated_code_config)
# Validate the input mode
input_mode = params.get("inputMode")
if input_mode not in get_args(InputMode):
await throw_error(f"Invalid input mode: {input_mode}")
raise ValueError(f"Invalid input mode: {input_mode}")
validated_input_mode = cast(InputMode, input_mode)
# Read the model from the request. Fall back to default if not provided.
code_generation_model_str = params.get(
"codeGenerationModel", Llm.GPT_4O_2024_05_13.value
)
try:
code_generation_model = convert_frontend_str_to_llm(code_generation_model_str)
except ValueError:
await throw_error(f"Invalid model: {code_generation_model_str}")
raise ValueError(f"Invalid model: {code_generation_model_str}")
openai_api_key = get_from_settings_dialog_or_env(
params, "openAiApiKey", OPENAI_API_KEY
)
# If neither is provided, we throw an error later only if Claude is used.
anthropic_api_key = get_from_settings_dialog_or_env(
params, "anthropicApiKey", ANTHROPIC_API_KEY
)
# Base URL for OpenAI API
openai_base_url: str | None = None
# Disable user-specified OpenAI Base URL in prod
if not IS_PROD:
openai_base_url = get_from_settings_dialog_or_env(
params, "openAiBaseURL", OPENAI_BASE_URL
)
if not openai_base_url:
print("Using official OpenAI URL")
# Get the image generation flag from the request. Fall back to True if not provided.
should_generate_images = bool(params.get("isImageGenerationEnabled", True))
return ExtractedParams(
stack=validated_stack,
input_mode=validated_input_mode,
code_generation_model=code_generation_model,
should_generate_images=should_generate_images,
openai_api_key=openai_api_key,
anthropic_api_key=anthropic_api_key,
openai_base_url=openai_base_url,
)
def get_from_settings_dialog_or_env(
params: dict[str, str], key: str, env_var: str | None
) -> str | None:
value = params.get(key)
if value:
print(f"Using {key} from client-side settings dialog")
return value
if env_var:
print(f"Using {key} from environment variable")
return env_var
return None
@router.websocket("/generate-code")
async def stream_code(websocket: WebSocket):
await websocket.accept()
print("Incoming websocket connection...")
## Communication protocol setup
async def throw_error(
message: str,
):
print(message)
await websocket.send_json({"type": "error", "value": message})
await websocket.close(APP_ERROR_WEB_SOCKET_CODE)
async def send_message(
type: Literal["chunk", "status", "setCode", "error"],
value: str,
variantIndex: int,
):
# Print for debugging on the backend
if type == "error":
print(f"Error (variant {variantIndex}): {value}")
elif type == "status":
print(f"Status (variant {variantIndex}): {value}")
await websocket.send_json(
{"type": type, "value": value, "variantIndex": variantIndex}
)
## Parameter extract and validation
# TODO: Are the values always strings?
params: dict[str, str] = await websocket.receive_json()
print("Received params")
extracted_params = await extract_params(params, throw_error)
stack = extracted_params.stack
input_mode = extracted_params.input_mode
code_generation_model = extracted_params.code_generation_model
openai_api_key = extracted_params.openai_api_key
openai_base_url = extracted_params.openai_base_url
anthropic_api_key = extracted_params.anthropic_api_key
should_generate_images = extracted_params.should_generate_images
# Auto-upgrade usage of older models
code_generation_model = auto_upgrade_model(code_generation_model)
print(
f"Generating {stack} code in {input_mode} mode using {code_generation_model}..."
)
for i in range(NUM_VARIANTS):
await send_message("status", "Generating code...", i)
### Prompt creation
# Image cache for updates so that we don't have to regenerate images
image_cache: Dict[str, str] = {}
try:
prompt_messages, image_cache = await create_prompt(params, stack, input_mode)
except:
await throw_error(
"Error assembling prompt. Contact support at support@picoapps.xyz"
)
raise
# pprint_prompt(prompt_messages) # type: ignore
### Code generation
async def process_chunk(content: str, variantIndex: int):
await send_message("chunk", content, variantIndex)
if SHOULD_MOCK_AI_RESPONSE:
completions = [await mock_completion(process_chunk, input_mode=input_mode)]
else:
try:
if input_mode == "video":
if not anthropic_api_key:
await throw_error(
"Video only works with Anthropic models. No Anthropic API key found. Please add the environment variable ANTHROPIC_API_KEY to backend/.env or in the settings dialog"
)
raise Exception("No Anthropic key")
completions = [
await stream_claude_response_native(
system_prompt=VIDEO_PROMPT,
messages=prompt_messages, # type: ignore
api_key=anthropic_api_key,
callback=lambda x: process_chunk(x, 0),
model=Llm.CLAUDE_3_OPUS,
include_thinking=True,
)
]
else:
# Depending on the presence and absence of various keys,
# we decide which models to run
variant_models = []
if openai_api_key and anthropic_api_key:
variant_models = ["anthropic", "openai"]
elif openai_api_key:
variant_models = ["openai", "openai"]
elif anthropic_api_key:
variant_models = ["anthropic", "anthropic"]
else:
await throw_error(
"No OpenAI or Anthropic API key found. Please add the environment variable OPENAI_API_KEY or ANTHROPIC_API_KEY to backend/.env or in the settings dialog. If you add it to .env, make sure to restart the backend server."
)
raise Exception("No OpenAI or Anthropic key")
tasks: List[Coroutine[Any, Any, str]] = []
for index, model in enumerate(variant_models):
if model == "openai":
if openai_api_key is None:
await throw_error("OpenAI API key is missing.")
raise Exception("OpenAI API key is missing.")
tasks.append(
stream_openai_response(
prompt_messages,
api_key=openai_api_key,
base_url=openai_base_url,
callback=lambda x, i=index: process_chunk(x, i),
model=Llm.GPT_4O_2024_05_13,
)
)
elif model == "anthropic":
if anthropic_api_key is None:
await throw_error("Anthropic API key is missing.")
raise Exception("Anthropic API key is missing.")
tasks.append(
stream_claude_response(
prompt_messages,
api_key=anthropic_api_key,
callback=lambda x, i=index: process_chunk(x, i),
model=Llm.CLAUDE_3_5_SONNET_2024_06_20,
)
)
# Run the models in parallel and capture exceptions if any
completions = await asyncio.gather(*tasks, return_exceptions=True)
# If all generations failed, throw an error
all_generations_failed = all(
isinstance(completion, Exception) for completion in completions
)
if all_generations_failed:
await throw_error("Error generating code. Please contact support.")
# Print the all the underlying exceptions for debugging
for completion in completions:
traceback.print_exception(
type(completion), completion, completion.__traceback__
)
raise Exception("All generations failed")
# If some completions failed, replace them with empty strings
for index, completion in enumerate(completions):
if isinstance(completion, Exception):
completions[index] = ""
print("Generation failed for variant", index)
print("Models used for generation: ", variant_models)
except openai.AuthenticationError as e:
print("[GENERATE_CODE] Authentication failed", e)
error_message = (
"Incorrect OpenAI key. Please make sure your OpenAI API key is correct, or create a new OpenAI API key on your OpenAI dashboard."
+ (
" Alternatively, you can purchase code generation credits directly on this website."
if IS_PROD
else ""
)
)
return await throw_error(error_message)
except openai.NotFoundError as e:
print("[GENERATE_CODE] Model not found", e)
error_message = (
e.message
+ ". Please make sure you have followed the instructions correctly to obtain an OpenAI key with GPT vision access: https://github.com/abi/screenshot-to-code/blob/main/Troubleshooting.md"
+ (
" Alternatively, you can purchase code generation credits directly on this website."
if IS_PROD
else ""
)
)
return await throw_error(error_message)
except openai.RateLimitError as e:
print("[GENERATE_CODE] Rate limit exceeded", e)
error_message = (
"OpenAI error - 'You exceeded your current quota, please check your plan and billing details.'"
+ (
" Alternatively, you can purchase code generation credits directly on this website."
if IS_PROD
else ""
)
)
return await throw_error(error_message)
## Post-processing
# Strip the completion of everything except the HTML content
completions = [extract_html_content(completion) for completion in completions]
# Write the messages dict into a log so that we can debug later
write_logs(prompt_messages, completions[0])
## Image Generation
for index, _ in enumerate(completions):
await send_message("status", "Generating images...", index)
image_generation_tasks = [
perform_image_generation(
completion,
should_generate_images,
openai_api_key,
openai_base_url,
image_cache,
)
for completion in completions
]
updated_completions = await asyncio.gather(*image_generation_tasks)
for index, updated_html in enumerate(updated_completions):
await send_message("setCode", updated_html, index)
await send_message("status", "Code generation complete.", index)
await websocket.close()

12
backend/routes/home.py Normal file
View File

@ -0,0 +1,12 @@
from fastapi import APIRouter
from fastapi.responses import HTMLResponse
router = APIRouter()
@router.get("/")
async def get_status():
return HTMLResponse(
content="<h3>Your backend is running correctly. Please open the front-end URL (default is http://localhost:5173) to use screenshot-to-code.</h3>"
)

View File

@ -11,7 +11,9 @@ def bytes_to_data_url(image_bytes: bytes, mime_type: str) -> str:
return f"data:{mime_type};base64,{base64_image}" return f"data:{mime_type};base64,{base64_image}"
async def capture_screenshot(target_url, api_key, device="desktop") -> bytes: async def capture_screenshot(
target_url: str, api_key: str, device: str = "desktop"
) -> bytes:
api_base_url = "https://api.screenshotone.com/take" api_base_url = "https://api.screenshotone.com/take"
params = { params = {

107
backend/run_evals.py Normal file
View File

@ -0,0 +1,107 @@
# Load environment variables first
from dotenv import load_dotenv
from llm import Llm
load_dotenv()
import os
from typing import Any, Coroutine
import asyncio
from evals.config import EVALS_DIR
from evals.core import generate_code_for_image
from evals.utils import image_to_data_url
STACK = "html_tailwind"
# MODEL = Llm.CLAUDE_3_5_SONNET_2024_06_20
N = 2 # Number of outputs to generate
async def main():
INPUT_DIR = EVALS_DIR + "/inputs"
OUTPUT_DIR = EVALS_DIR + "/outputs"
# Get all the files in the directory (only grab pngs)
evals = [f for f in os.listdir(INPUT_DIR) if f.endswith(".png")]
tasks: list[Coroutine[Any, Any, str]] = []
for filename in evals:
filepath = os.path.join(INPUT_DIR, filename)
data_url = await image_to_data_url(filepath)
for n in range(N): # Generate N tasks for each input
if n == 0:
task = generate_code_for_image(
image_url=data_url,
stack=STACK,
model=Llm.CLAUDE_3_5_SONNET_2024_06_20,
)
else:
task = generate_code_for_image(
image_url=data_url, stack=STACK, model=Llm.GPT_4O_2024_05_13
)
tasks.append(task)
print(f"Generating {len(tasks)} codes")
results = await asyncio.gather(*tasks)
os.makedirs(OUTPUT_DIR, exist_ok=True)
for i, content in enumerate(results):
# Calculate index for filename and output number
eval_index = i // N
output_number = i % N
filename = evals[eval_index]
# File name is derived from the original filename in evals with an added output number
output_filename = f"{os.path.splitext(filename)[0]}_{output_number}.html"
output_filepath = os.path.join(OUTPUT_DIR, output_filename)
with open(output_filepath, "w") as file:
file.write(content)
# async def text_main():
# OUTPUT_DIR = EVALS_DIR + "/outputs"
# GENERAL_TEXT_V1 = [
# "Login form",
# "Simple notification",
# "button",
# "saas dashboard",
# "landing page for barber shop",
# ]
# tasks: list[Coroutine[Any, Any, str]] = []
# for prompt in GENERAL_TEXT_V1:
# for n in range(N): # Generate N tasks for each input
# if n == 0:
# task = generate_code_for_text(
# text=prompt,
# stack=STACK,
# model=Llm.CLAUDE_3_5_SONNET_2024_06_20,
# )
# else:
# task = generate_code_for_text(
# text=prompt, stack=STACK, model=Llm.GPT_4O_2024_05_13
# )
# tasks.append(task)
# print(f"Generating {len(tasks)} codes")
# results = await asyncio.gather(*tasks)
# os.makedirs(OUTPUT_DIR, exist_ok=True)
# for i, content in enumerate(results):
# # Calculate index for filename and output number
# eval_index = i // N
# output_number = i % N
# filename = GENERAL_TEXT_V1[eval_index]
# # File name is derived from the original filename in evals with an added output number
# output_filename = f"{os.path.splitext(filename)[0]}_{output_number}.html"
# output_filepath = os.path.join(OUTPUT_DIR, output_filename)
# with open(output_filepath, "w") as file:
# file.write(content)
asyncio.run(main())

View File

@ -0,0 +1,85 @@
import asyncio
import os
from typing import List, Optional, Literal
from dotenv import load_dotenv
import aiohttp
from image_generation.core import process_tasks
EVALS = [
"Romantic Background",
"Company logo: A stylized green sprout emerging from a circle",
"Placeholder image of a PDF cover with abstract design",
"A complex bubble diagram showing various interconnected features and aspects of FestivalPro, with a large central bubble surrounded by smaller bubbles of different colors representing different categories and functionalities",
"A vibrant, abstract visualization of the RhythmRise experience ecosystem, featuring interconnected neon elements representing music, technology, and human connection",
"Banner with text 'LiblibAI学院 课程入口'",
"Profile picture of Pierre-Louis Labonne",
"Two hands holding iPhone 14 models with colorful displays",
"Portrait of a woman with long dark hair smiling at the camera",
"Threadless logo on a gradient background from light pink to coral",
"Jordan Schlansky Shows Conan His Favorite Nose Hair Trimmer",
"Team Coco",
"Intro to Large Language Models",
"Andrej Karpathy",
"He built a $200 million toy company",
"CNBC International",
"What will happen in year three of the war?",
"Channel",
"This is it",
"How ASML Dominates Chip Machines",
]
# Load environment variables
load_dotenv()
# Get API keys from environment variables
OPENAI_API_KEY: Optional[str] = os.getenv("OPENAI_API_KEY")
REPLICATE_API_TOKEN: Optional[str] = os.getenv("REPLICATE_API_TOKEN")
# Directory to save generated images
OUTPUT_DIR: str = "generated_images"
async def generate_and_save_images(
prompts: List[str],
model: Literal["dalle3", "flux"],
api_key: Optional[str],
) -> None:
# Ensure the output directory exists
os.makedirs(OUTPUT_DIR, exist_ok=True)
if api_key is None:
raise ValueError(f"API key for {model} is not set in the environment variables")
# Generate images
results: List[Optional[str]] = await process_tasks(
prompts, api_key, None, model=model
)
# Save images to disk
async with aiohttp.ClientSession() as session:
for i, image_url in enumerate(results):
if image_url:
# Get the image data
async with session.get(image_url) as response:
image_data: bytes = await response.read()
# Save the image with a filename based on the input eval
prefix = "replicate_" if model == "flux" else "dalle3_"
filename: str = (
f"{prefix}{prompts[i][:50].replace(' ', '_').replace(':', '')}.png"
)
filepath: str = os.path.join(OUTPUT_DIR, filename)
with open(filepath, "wb") as f:
f.write(image_data)
print(f"Saved {model} image: {filepath}")
else:
print(f"Failed to generate {model} image for prompt: {prompts[i]}")
async def main() -> None:
# await generate_and_save_images(EVALS, "dalle3", OPENAI_API_KEY)
await generate_and_save_images(EVALS, "flux", REPLICATE_API_TOKEN)
if __name__ == "__main__":
asyncio.run(main())

4
backend/start.py Normal file
View File

@ -0,0 +1,4 @@
import uvicorn
if __name__ == "__main__":
uvicorn.run("main:app", port=7001, reload=True)

41
backend/test_llm.py Normal file
View File

@ -0,0 +1,41 @@
import unittest
from llm import convert_frontend_str_to_llm, Llm
class TestConvertFrontendStrToLlm(unittest.TestCase):
def test_convert_valid_strings(self):
self.assertEqual(
convert_frontend_str_to_llm("gpt_4_vision"),
Llm.GPT_4_VISION,
"Should convert 'gpt_4_vision' to Llm.GPT_4_VISION",
)
self.assertEqual(
convert_frontend_str_to_llm("claude_3_sonnet"),
Llm.CLAUDE_3_SONNET,
"Should convert 'claude_3_sonnet' to Llm.CLAUDE_3_SONNET",
)
self.assertEqual(
convert_frontend_str_to_llm("claude-3-opus-20240229"),
Llm.CLAUDE_3_OPUS,
"Should convert 'claude-3-opus-20240229' to Llm.CLAUDE_3_OPUS",
)
self.assertEqual(
convert_frontend_str_to_llm("gpt-4-turbo-2024-04-09"),
Llm.GPT_4_TURBO_2024_04_09,
"Should convert 'gpt-4-turbo-2024-04-09' to Llm.GPT_4_TURBO_2024_04_09",
)
self.assertEqual(
convert_frontend_str_to_llm("gpt-4o-2024-05-13"),
Llm.GPT_4O_2024_05_13,
"Should convert 'gpt-4o-2024-05-13' to Llm.GPT_4O_2024_05_13",
)
def test_convert_invalid_string_raises_exception(self):
with self.assertRaises(ValueError):
convert_frontend_str_to_llm("invalid_string")
with self.assertRaises(ValueError):
convert_frontend_str_to_llm("another_invalid_string")
if __name__ == "__main__":
unittest.main()

View File

@ -1,136 +0,0 @@
from prompts import assemble_prompt
TAILWIND_SYSTEM_PROMPT = """
You are an expert Tailwind developer
You take screenshots of a reference web page from the user, and then build single page apps
using Tailwind, HTML and JS.
You might also be given a screenshot(The second image) of a web page that you have already built, and asked to
update it to look more like the reference image(The first image).
- Make sure the app looks exactly like the screenshot.
- Pay close attention to background color, text color, font size, font family,
padding, margin, border, etc. Match the colors and sizes exactly.
- Use the exact text from the screenshot.
- Do not add comments in the code such as "<!-- Add other navigation links as needed -->" and "<!-- ... other news items ... -->" in place of writing the full code. WRITE THE FULL CODE.
- Repeat elements as needed to match the screenshot. For example, if there are 15 items, the code should have 15 items. DO NOT LEAVE comments like "<!-- Repeat for each news item -->" or bad things will happen.
- For images, use placeholder images from https://placehold.co and include a detailed description of the image in the alt text so that an image generation AI can generate the image later.
In terms of libraries,
- Use this script to include Tailwind: <script src="https://cdn.tailwindcss.com"></script>
- You can use Google Fonts
- Font Awesome for icons: <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.3/css/all.min.css"></link>
Return only the full code in <html></html> tags.
Do not include markdown "```" or "```html" at the start or end.
"""
BOOTSTRAP_SYSTEM_PROMPT = """
You are an expert Bootstrap developer
You take screenshots of a reference web page from the user, and then build single page apps
using Bootstrap, HTML and JS.
You might also be given a screenshot(The second image) of a web page that you have already built, and asked to
update it to look more like the reference image(The first image).
- Make sure the app looks exactly like the screenshot.
- Pay close attention to background color, text color, font size, font family,
padding, margin, border, etc. Match the colors and sizes exactly.
- Use the exact text from the screenshot.
- Do not add comments in the code such as "<!-- Add other navigation links as needed -->" and "<!-- ... other news items ... -->" in place of writing the full code. WRITE THE FULL CODE.
- Repeat elements as needed to match the screenshot. For example, if there are 15 items, the code should have 15 items. DO NOT LEAVE comments like "<!-- Repeat for each news item -->" or bad things will happen.
- For images, use placeholder images from https://placehold.co and include a detailed description of the image in the alt text so that an image generation AI can generate the image later.
In terms of libraries,
- Use this script to include Bootstrap: <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.2/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-T3c6CoIi6uLrA9TneNEoa7RxnatzjcDSCmG1MXxSR1GAsXEV/Dwwykc2MPK8M2HN" crossorigin="anonymous">
- You can use Google Fonts
- Font Awesome for icons: <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.3/css/all.min.css"></link>
Return only the full code in <html></html> tags.
Do not include markdown "```" or "```html" at the start or end.
"""
REACT_TAILWIND_SYSTEM_PROMPT = """
You are an expert React/Tailwind developer
You take screenshots of a reference web page from the user, and then build single page apps
using React and Tailwind CSS.
You might also be given a screenshot(The second image) of a web page that you have already built, and asked to
update it to look more like the reference image(The first image).
- Make sure the app looks exactly like the screenshot.
- Pay close attention to background color, text color, font size, font family,
padding, margin, border, etc. Match the colors and sizes exactly.
- Use the exact text from the screenshot.
- Do not add comments in the code such as "<!-- Add other navigation links as needed -->" and "<!-- ... other news items ... -->" in place of writing the full code. WRITE THE FULL CODE.
- Repeat elements as needed to match the screenshot. For example, if there are 15 items, the code should have 15 items. DO NOT LEAVE comments like "<!-- Repeat for each news item -->" or bad things will happen.
- For images, use placeholder images from https://placehold.co and include a detailed description of the image in the alt text so that an image generation AI can generate the image later.
In terms of libraries,
- Use these script to include React so that it can run on a standalone page:
<script src="https://unpkg.com/react/umd/react.development.js"></script>
<script src="https://unpkg.com/react-dom/umd/react-dom.development.js"></script>
<script src="https://unpkg.com/@babel/standalone/babel.js"></script>
- Use this script to include Tailwind: <script src="https://cdn.tailwindcss.com"></script>
- You can use Google Fonts
- Font Awesome for icons: <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.3/css/all.min.css"></link>
Return only the full code in <html></html> tags.
Do not include markdown "```" or "```html" at the start or end.
"""
IONIC_TAILWIND_SYSTEM_PROMPT = """
You are an expert Ionic/Tailwind developer
You take screenshots of a reference web page from the user, and then build single page apps
using Ionic and Tailwind CSS.
You might also be given a screenshot(The second image) of a web page that you have already built, and asked to
update it to look more like the reference image(The first image).
- Make sure the app looks exactly like the screenshot.
- Pay close attention to background color, text color, font size, font family,
padding, margin, border, etc. Match the colors and sizes exactly.
- Use the exact text from the screenshot.
- Do not add comments in the code such as "<!-- Add other navigation links as needed -->" and "<!-- ... other news items ... -->" in place of writing the full code. WRITE THE FULL CODE.
- Repeat elements as needed to match the screenshot. For example, if there are 15 items, the code should have 15 items. DO NOT LEAVE comments like "<!-- Repeat for each news item -->" or bad things will happen.
- For images, use placeholder images from https://placehold.co and include a detailed description of the image in the alt text so that an image generation AI can generate the image later.
In terms of libraries,
- Use these script to include Ionic so that it can run on a standalone page:
<script type="module" src="https://cdn.jsdelivr.net/npm/@ionic/core/dist/ionic/ionic.esm.js"></script>
<script nomodule src="https://cdn.jsdelivr.net/npm/@ionic/core/dist/ionic/ionic.js"></script>
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@ionic/core/css/ionic.bundle.css" />
- Use this script to include Tailwind: <script src="https://cdn.tailwindcss.com"></script>
- You can use Google Fonts
- ionicons for icons, add the following <script > tags near the end of the page, right before the closing </body> tag:
<script type="module">
import ionicons from 'https://cdn.jsdelivr.net/npm/ionicons/+esm'
</script>
<script nomodule src="https://cdn.jsdelivr.net/npm/ionicons/dist/esm/ionicons.min.js"></script>
<link href="https://cdn.jsdelivr.net/npm/ionicons/dist/collection/components/icon/icon.min.css" rel="stylesheet">
Return only the full code in <html></html> tags.
Do not include markdown "```" or "```html" at the start or end.
"""
def test_prompts():
tailwind_prompt = assemble_prompt(
"image_data_url", "html_tailwind", "result_image_data_url"
)
assert tailwind_prompt[0]["content"] == TAILWIND_SYSTEM_PROMPT
react_tailwind_prompt = assemble_prompt(
"image_data_url", "react_tailwind", "result_image_data_url"
)
assert react_tailwind_prompt[0]["content"] == REACT_TAILWIND_SYSTEM_PROMPT
bootstrap_prompt = assemble_prompt(
"image_data_url", "bootstrap", "result_image_data_url"
)
assert bootstrap_prompt[0]["content"] == BOOTSTRAP_SYSTEM_PROMPT
ionic_tailwind = assemble_prompt(
"image_data_url", "ionic_tailwind", "result_image_data_url"
)
assert ionic_tailwind[0]["content"] == IONIC_TAILWIND_SYSTEM_PROMPT

View File

@ -1,28 +1,30 @@
import copy import copy
import json import json
from typing import List
from openai.types.chat import ChatCompletionMessageParam
def pprint_prompt(prompt_messages): def pprint_prompt(prompt_messages: List[ChatCompletionMessageParam]):
print(json.dumps(truncate_data_strings(prompt_messages), indent=4)) print(json.dumps(truncate_data_strings(prompt_messages), indent=4))
def truncate_data_strings(data): def truncate_data_strings(data: List[ChatCompletionMessageParam]): # type: ignore
# Deep clone the data to avoid modifying the original object # Deep clone the data to avoid modifying the original object
cloned_data = copy.deepcopy(data) cloned_data = copy.deepcopy(data)
if isinstance(cloned_data, dict): if isinstance(cloned_data, dict):
for key, value in cloned_data.items(): for key, value in cloned_data.items(): # type: ignore
# Recursively call the function if the value is a dictionary or a list # Recursively call the function if the value is a dictionary or a list
if isinstance(value, (dict, list)): if isinstance(value, (dict, list)):
cloned_data[key] = truncate_data_strings(value) cloned_data[key] = truncate_data_strings(value) # type: ignore
# Truncate the string if it it's long and add ellipsis and length # Truncate the string if it it's long and add ellipsis and length
elif isinstance(value, str): elif isinstance(value, str):
cloned_data[key] = value[:40] cloned_data[key] = value[:40] # type: ignore
if len(value) > 40: if len(value) > 40:
cloned_data[key] += "..." + f" ({len(value)} chars)" cloned_data[key] += "..." + f" ({len(value)} chars)" # type: ignore
elif isinstance(cloned_data, list): elif isinstance(cloned_data, list): # type: ignore
# Process each item in the list # Process each item in the list
cloned_data = [truncate_data_strings(item) for item in cloned_data] cloned_data = [truncate_data_strings(item) for item in cloned_data] # type: ignore
return cloned_data return cloned_data # type: ignore

134
backend/video/utils.py Normal file
View File

@ -0,0 +1,134 @@
# Extract HTML content from the completion string
import base64
import io
import mimetypes
import os
import tempfile
import uuid
from typing import Any, Union, cast
from moviepy.editor import VideoFileClip # type: ignore
from PIL import Image
import math
DEBUG = True
TARGET_NUM_SCREENSHOTS = (
20 # Should be max that Claude supports (20) - reduce to save tokens on testing
)
async def assemble_claude_prompt_video(video_data_url: str) -> list[Any]:
images = split_video_into_screenshots(video_data_url)
# Save images to tmp if we're debugging
if DEBUG:
save_images_to_tmp(images)
# Validate number of images
print(f"Number of frames extracted from video: {len(images)}")
if len(images) > 20:
print(f"Too many screenshots: {len(images)}")
raise ValueError("Too many screenshots extracted from video")
# Convert images to the message format for Claude
content_messages: list[dict[str, Union[dict[str, str], str]]] = []
for image in images:
# Convert Image to buffer
buffered = io.BytesIO()
image.save(buffered, format="JPEG")
# Encode bytes as base64
base64_data = base64.b64encode(buffered.getvalue()).decode("utf-8")
media_type = "image/jpeg"
content_messages.append(
{
"type": "image",
"source": {
"type": "base64",
"media_type": media_type,
"data": base64_data,
},
}
)
return [
{
"role": "user",
"content": content_messages,
},
]
# Returns a list of images/frame (RGB format)
def split_video_into_screenshots(video_data_url: str) -> list[Image.Image]:
target_num_screenshots = TARGET_NUM_SCREENSHOTS
# Decode the base64 URL to get the video bytes
video_encoded_data = video_data_url.split(",")[1]
video_bytes = base64.b64decode(video_encoded_data)
mime_type = video_data_url.split(";")[0].split(":")[1]
suffix = mimetypes.guess_extension(mime_type)
with tempfile.NamedTemporaryFile(suffix=suffix, delete=True) as temp_video_file:
print(temp_video_file.name)
temp_video_file.write(video_bytes)
temp_video_file.flush()
clip = VideoFileClip(temp_video_file.name)
images: list[Image.Image] = []
total_frames = cast(int, clip.reader.nframes) # type: ignore
# Calculate frame skip interval by dividing total frames by the target number of screenshots
# Ensuring a minimum skip of 1 frame
frame_skip = max(1, math.ceil(total_frames / target_num_screenshots))
# Iterate over each frame in the clip
for i, frame in enumerate(clip.iter_frames()):
# Save every nth frame
if i % frame_skip == 0:
frame_image = Image.fromarray(frame) # type: ignore
images.append(frame_image)
# Ensure that we don't capture more than the desired number of frames
if len(images) >= target_num_screenshots:
break
# Close the video file to release resources
clip.close()
return images
# Save a list of PIL images to a random temporary directory
def save_images_to_tmp(images: list[Image.Image]):
# Create a unique temporary directory
unique_dir_name = f"screenshots_{uuid.uuid4()}"
tmp_screenshots_dir = os.path.join(tempfile.gettempdir(), unique_dir_name)
os.makedirs(tmp_screenshots_dir, exist_ok=True)
for idx, image in enumerate(images):
# Generate a unique image filename using index
image_filename = f"screenshot_{idx}.jpg"
tmp_filepath = os.path.join(tmp_screenshots_dir, image_filename)
image.save(tmp_filepath, format="JPEG")
print("Saved to " + tmp_screenshots_dir)
def extract_tag_content(tag: str, text: str) -> str:
"""
Extracts content for a given tag from the provided text.
:param tag: The tag to search for.
:param text: The text to search within.
:return: The content found within the tag, if any.
"""
tag_start = f"<{tag}>"
tag_end = f"</{tag}>"
start_idx = text.find(tag_start)
end_idx = text.find(tag_end, start_idx)
if start_idx != -1 and end_idx != -1:
return text[start_idx : end_idx + len(tag_end)]
return ""

122
backend/video_to_app.py Normal file
View File

@ -0,0 +1,122 @@
# Load environment variables first
from dotenv import load_dotenv
load_dotenv()
import base64
import mimetypes
import time
import subprocess
import os
import asyncio
from datetime import datetime
from prompts.claude_prompts import VIDEO_PROMPT
from utils import pprint_prompt
from config import ANTHROPIC_API_KEY
from video.utils import extract_tag_content, assemble_claude_prompt_video
from llm import (
Llm,
stream_claude_response_native,
)
STACK = "html_tailwind"
VIDEO_DIR = "./video_evals/videos"
SCREENSHOTS_DIR = "./video_evals/screenshots"
OUTPUTS_DIR = "./video_evals/outputs"
async def main():
video_filename = "shortest.mov"
is_followup = False
if not ANTHROPIC_API_KEY:
raise ValueError("ANTHROPIC_API_KEY is not set")
# Get previous HTML
previous_html = ""
if is_followup:
previous_html_file = max(
[
os.path.join(OUTPUTS_DIR, f)
for f in os.listdir(OUTPUTS_DIR)
if f.endswith(".html")
],
key=os.path.getctime,
)
with open(previous_html_file, "r") as file:
previous_html = file.read()
video_file = os.path.join(VIDEO_DIR, video_filename)
mime_type = mimetypes.guess_type(video_file)[0]
with open(video_file, "rb") as file:
video_content = file.read()
video_data_url = (
f"data:{mime_type};base64,{base64.b64encode(video_content).decode('utf-8')}"
)
prompt_messages = await assemble_claude_prompt_video(video_data_url)
# Tell the model to continue
# {"role": "assistant", "content": SECOND_MESSAGE},
# {"role": "user", "content": "continue"},
if is_followup:
prompt_messages += [
{"role": "assistant", "content": previous_html},
{
"role": "user",
"content": "You've done a good job with a first draft. Improve this further based on the original instructions so that the app is fully functional like in the original video.",
},
] # type: ignore
async def process_chunk(content: str):
print(content, end="", flush=True)
response_prefix = "<thinking>"
pprint_prompt(prompt_messages) # type: ignore
start_time = time.time()
completion = await stream_claude_response_native(
system_prompt=VIDEO_PROMPT,
messages=prompt_messages,
api_key=ANTHROPIC_API_KEY,
callback=lambda x: process_chunk(x),
model=Llm.CLAUDE_3_OPUS,
include_thinking=True,
)
end_time = time.time()
# Prepend the response prefix to the completion
completion = response_prefix + completion
# Extract the outputs
html_content = extract_tag_content("html", completion)
thinking = extract_tag_content("thinking", completion)
print(thinking)
print(f"Operation took {end_time - start_time} seconds")
os.makedirs(OUTPUTS_DIR, exist_ok=True)
# Generate a unique filename based on the current time
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
filename = f"video_test_output_{timestamp}.html"
output_path = os.path.join(OUTPUTS_DIR, filename)
# Write the HTML content to the file
with open(output_path, "w") as file:
file.write(html_content)
print(f"Output file path: {output_path}")
# Show a notification
subprocess.run(["osascript", "-e", 'display notification "Coding Complete"'])
asyncio.run(main())

0
backend/ws/__init__.py Normal file
View File

2
backend/ws/constants.py Normal file
View File

@ -0,0 +1,2 @@
# WebSocket protocol (RFC 6455) allows for the use of custom close codes in the range 4000-4999
APP_ERROR_WEB_SOCKET_CODE = 4332

59
blog/evaluating-claude.md Normal file
View File

@ -0,0 +1,59 @@
# Claude 3 for converting screenshots to code
Claude 3 dropped yesterday, claiming to rival GPT-4 on a wide variety of tasks. I maintain a very popular open source project called “screenshot-to-code” (this one!) that uses GPT-4 vision to convert screenshots/designs into clean code. Naturally, I was excited to see how good Claude 3 was at this task.
**TLDR:** Claude 3 is on par with GPT-4 vision for screenshot to code, better in some ways but worse in others.
## Evaluation Setup
I dont know of a public benchmark for “screenshot to code” so I created simple evaluation setup for the purposes of testing:
- **Evaluation Dataset**: 16 screenshots with a mix of UI elements, landing pages, dashboards and popular websites.
<img width="784" alt="Screenshot 2024-03-05 at 3 05 52PM" src="https://github.com/abi/screenshot-to-code/assets/23818/c32af2db-eb5a-44c1-9a19-2f0c3dd11ab4">
- **Evaluation Metric**: Replication accuracy, as in “How close does the generated code look to the screenshot?” While there are other metrics that are important like code quality, speed and so on, this is by far the #1 thing most users of the repo care about.
- **Evaluation Mechanism**: Each output is subjectively rated by a human on a rating scale from 0 to 4. 4 = very close to an exact replica while 0 = nothing like the screenshot. With 16 screenshots, the maximum any model can score is 64.
To make the evaluation process easy, I created [a Python script](https://github.com/abi/screenshot-to-code/blob/main/backend/run_evals.py) that runs code for all the inputs in parallel. I also made a simple UI to do a side-by-side comparison of the input and output.
![Google Chrome](https://github.com/abi/screenshot-to-code/assets/23818/38126f8f-205d-4ed1-b8cf-039e81dcc3d0)
## Results
Quick note about what kind of code well be generating: currently, screenshot-to-code supports generating code in HTML + Tailwind, React, Vue, and several other frameworks. Stacks can impact the replication accuracy quite a bit. For example, because Bootstrap uses a relatively restrictive set of user elements, generations using Bootstrap tend to have a distinct "Bootstrap" style.
I only ran the evals on HTML/Tailwind here which is the stack where GPT-4 vision tends to perform the best.
Here are the results (average of 3 runs for each model):
- GPT-4 Vision obtains a score of **65.10%** - this is what were trying to beat
- Claude 3 Sonnet receives a score of **70.31%**, which is a bit better.
- Surprisingly, Claude 3 Opus which is supposed to be the smarter and slower model scores worse than both GPT-4 vision and Claude 3 Sonnet, comes in at **61.46%**.
Overall, a very strong showing for Claude 3. Obviously, there's a lot of subjectivity involved in this evaluation but Claude 3 is definitely on par with GPT-4 Vision, if not better.
You can see the [side-by-side comparison for a run of Claude 3 Sonnet here](https://github.com/abi/screenshot-to-code-files/blob/main/sonnet%20results.png). And for [a run of GPT-4 Vision here](https://github.com/abi/screenshot-to-code-files/blob/main/gpt%204%20vision%20results.png).
Other notes:
- The prompts used are optimized for GPT-4 vision. Adjusting the prompts a bit for Claude did yield a small improvement. But nothing game-changing and potentially not worth the trade-off of maintaining two sets of prompts.
- All the models excel at code quality - the quality is usually comparable to a human or better.
- Claude 3 is much less lazy than GPT-4 Vision. When asked to recreate Hacker News, GPT-4 Vision will only create two items in the list and leave comments in this code like `<!-- Repeat for each news item -->` and `<!-- ... other news items ... -->`.
<img width="699" alt="Screenshot 2024-03-05 at 9 25 04PM" src="https://github.com/abi/screenshot-to-code/assets/23818/04b03155-45e0-40b0-8de0-b1f0b4382bee">
While Claude 3 Sonnet can sometimes be lazy too, most of the time, it does what you ask it to do.
<img width="904" alt="Screenshot 2024-03-05 at 9 30 23PM" src="https://github.com/abi/screenshot-to-code/assets/23818/b7c7d1ba-47c1-414d-928f-6989e81cf41d">
- For some reason, all the models struggle with side-by-side "flex" layouts
<img width="1090" alt="Screenshot 2024-03-05 at 9 20 58PM" src="https://github.com/abi/screenshot-to-code/assets/23818/8957bb3a-da66-467d-997d-1c7cc24e6d9a">
- Claude 3 Sonnet is a lot faster
- Claude 3 gets background and text colors wrong quite often! (like in the Hacker News image above)
- My suspicion is that Claude 3 Opus results can be improved to be on par with the other models through better prompting
Overall, I'm very impressed with Claude 3 Sonnet for this use case. I've added it as an alternative to GPT-4 Vision in the open source repo (hosted version update coming soon).
If youd like to contribute to this effort, I have some documentation on [running these evals yourself here](https://github.com/abi/screenshot-to-code/blob/main/Evaluation.md). I'm also working on a better evaluation mechanism with Elo ratings and would love some help on that.

5
design-docs.md Normal file
View File

@ -0,0 +1,5 @@
## Version History
Version history is stored as a tree on the client-side.
![Screenshot to Code](https://github.com/abi/screenshot-to-code/assets/23818/e35644aa-b90a-4aa7-8027-b8732796fd7c)

3
frontend/.gitignore vendored
View File

@ -25,3 +25,6 @@ dist-ssr
# Env files # Env files
.env* .env*
# Test files
src/tests/results/

View File

@ -1,4 +1,4 @@
FROM node:20.9-bullseye-slim FROM node:22-bullseye-slim
# Set the working directory in the container # Set the working directory in the container
WORKDIR /app WORKDIR /app
@ -6,6 +6,9 @@ WORKDIR /app
# Copy package.json and yarn.lock # Copy package.json and yarn.lock
COPY package.json yarn.lock /app/ COPY package.json yarn.lock /app/
# Set the environment variable to skip Puppeteer download
ENV PUPPETEER_SKIP_DOWNLOAD=true
# Install dependencies # Install dependencies
RUN yarn install RUN yarn install

View File

@ -2,11 +2,7 @@
<html lang="en"> <html lang="en">
<head> <head>
<meta charset="UTF-8" /> <meta charset="UTF-8" />
<link <link rel="icon" type="image/png" href="/favicon/main.png" />
rel="icon"
type="image/svg+xml"
href="https://picoapps.xyz/favicon.png"
/>
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" />
<!-- Google Fonts --> <!-- Google Fonts -->

9
frontend/jest.config.js Normal file
View File

@ -0,0 +1,9 @@
export default {
preset: "ts-jest",
testEnvironment: "node",
setupFiles: ["<rootDir>/src/setupTests.ts"],
transform: {
"^.+\\.tsx?$": "ts-jest",
},
testTimeout: 30000,
};

View File

@ -10,14 +10,16 @@
"build-hosted": "tsc && vite build --mode prod", "build-hosted": "tsc && vite build --mode prod",
"lint": "eslint . --ext ts,tsx --report-unused-disable-directives --max-warnings 0", "lint": "eslint . --ext ts,tsx --report-unused-disable-directives --max-warnings 0",
"preview": "vite preview", "preview": "vite preview",
"test": "vitest" "test": "jest"
}, },
"dependencies": { "dependencies": {
"@codemirror/lang-html": "^6.4.6", "@codemirror/lang-html": "^6.4.6",
"@radix-ui/react-accordion": "^1.1.2", "@radix-ui/react-accordion": "^1.1.2",
"@radix-ui/react-alert-dialog": "^1.0.5", "@radix-ui/react-alert-dialog": "^1.0.5",
"@radix-ui/react-checkbox": "^1.0.4", "@radix-ui/react-checkbox": "^1.0.4",
"@radix-ui/react-collapsible": "^1.0.3",
"@radix-ui/react-dialog": "^1.0.5", "@radix-ui/react-dialog": "^1.0.5",
"@radix-ui/react-hover-card": "^1.0.7",
"@radix-ui/react-icons": "^1.3.0", "@radix-ui/react-icons": "^1.3.0",
"@radix-ui/react-label": "^2.0.2", "@radix-ui/react-label": "^2.0.2",
"@radix-ui/react-popover": "^1.0.7", "@radix-ui/react-popover": "^1.0.7",
@ -34,29 +36,39 @@
"codemirror": "^6.0.1", "codemirror": "^6.0.1",
"copy-to-clipboard": "^3.3.3", "copy-to-clipboard": "^3.3.3",
"html2canvas": "^1.4.1", "html2canvas": "^1.4.1",
"nanoid": "^5.0.7",
"react": "^18.2.0", "react": "^18.2.0",
"react-dom": "^18.2.0", "react-dom": "^18.2.0",
"react-dropzone": "^14.2.3", "react-dropzone": "^14.2.3",
"react-hot-toast": "^2.4.1", "react-hot-toast": "^2.4.1",
"react-icons": "^4.12.0", "react-icons": "^4.12.0",
"react-router-dom": "^6.20.1",
"tailwind-merge": "^2.0.0", "tailwind-merge": "^2.0.0",
"tailwindcss-animate": "^1.0.7", "tailwindcss-animate": "^1.0.7",
"thememirror": "^2.0.1", "thememirror": "^2.0.1",
"vite-plugin-checker": "^0.6.2" "vite-plugin-checker": "^0.6.2",
"webm-duration-fix": "^1.0.4",
"zustand": "^4.5.2"
}, },
"devDependencies": { "devDependencies": {
"@types/jest": "^29.5.12",
"@types/node": "^20.9.0", "@types/node": "^20.9.0",
"@types/puppeteer": "^7.0.4",
"@types/react": "^18.2.15", "@types/react": "^18.2.15",
"@types/react-dom": "^18.2.7", "@types/react-dom": "^18.2.7",
"@typescript-eslint/eslint-plugin": "^6.0.0", "@typescript-eslint/eslint-plugin": "^6.0.0",
"@typescript-eslint/parser": "^6.0.0", "@typescript-eslint/parser": "^6.0.0",
"@vitejs/plugin-react": "^4.0.3", "@vitejs/plugin-react": "^4.0.3",
"autoprefixer": "^10.4.16", "autoprefixer": "^10.4.16",
"dotenv": "^16.4.5",
"eslint": "^8.45.0", "eslint": "^8.45.0",
"eslint-plugin-react-hooks": "^4.6.0", "eslint-plugin-react-hooks": "^4.6.0",
"eslint-plugin-react-refresh": "^0.4.3", "eslint-plugin-react-refresh": "^0.4.3",
"jest": "^29.7.0",
"postcss": "^8.4.31", "postcss": "^8.4.31",
"puppeteer": "^22.6.4",
"tailwindcss": "^3.3.5", "tailwindcss": "^3.3.5",
"ts-jest": "^29.1.2",
"typescript": "^5.0.2", "typescript": "^5.0.2",
"vite": "^4.4.5", "vite": "^4.4.5",
"vite-plugin-html": "^3.2.0", "vite-plugin-html": "^3.2.0",

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

View File

@ -0,0 +1,2 @@
TEST_SCREENSHOTONE_API_KEY=
TEST_ROOT_PATH=

View File

@ -1,75 +1,101 @@
import { useEffect, useRef, useState } from "react"; import { useEffect, useRef } from "react";
import ImageUpload from "./components/ImageUpload"; import { generateCode } from "./generateCode";
import CodePreview from "./components/CodePreview"; import SettingsDialog from "./components/settings/SettingsDialog";
import Preview from "./components/Preview"; import { AppState, CodeGenerationParams, EditorTheme, Settings } from "./types";
import { CodeGenerationParams, generateCode } from "./generateCode";
import Spinner from "./components/Spinner";
import classNames from "classnames";
import {
FaCode,
FaDesktop,
FaDownload,
FaMobile,
FaUndo,
} from "react-icons/fa";
import { Switch } from "./components/ui/switch";
import { Button } from "@/components/ui/button";
import { Textarea } from "@/components/ui/textarea";
import { Tabs, TabsContent, TabsList, TabsTrigger } from "./components/ui/tabs";
import SettingsDialog from "./components/SettingsDialog";
import { Settings, EditorTheme, AppState, GeneratedCodeConfig } from "./types";
import { IS_RUNNING_ON_CLOUD } from "./config"; import { IS_RUNNING_ON_CLOUD } from "./config";
import { PicoBadge } from "./components/PicoBadge"; import { PicoBadge } from "./components/messages/PicoBadge";
import { OnboardingNote } from "./components/OnboardingNote"; import { OnboardingNote } from "./components/messages/OnboardingNote";
import { usePersistedState } from "./hooks/usePersistedState"; import { usePersistedState } from "./hooks/usePersistedState";
import { UrlInputSection } from "./components/UrlInputSection";
import TermsOfServiceDialog from "./components/TermsOfServiceDialog"; import TermsOfServiceDialog from "./components/TermsOfServiceDialog";
import html2canvas from "html2canvas";
import { USER_CLOSE_WEB_SOCKET_CODE } from "./constants"; import { USER_CLOSE_WEB_SOCKET_CODE } from "./constants";
import CodeTab from "./components/CodeTab"; import { extractHistory } from "./components/history/utils";
import OutputSettingsSection from "./components/OutputSettingsSection";
import { History } from "./components/history/history_types";
import HistoryDisplay from "./components/history/HistoryDisplay";
import { extractHistoryTree } from "./components/history/utils";
import toast from "react-hot-toast"; import toast from "react-hot-toast";
import { Stack } from "./lib/stacks";
const IS_OPENAI_DOWN = false; import { CodeGenerationModel } from "./lib/models";
import useBrowserTabIndicator from "./hooks/useBrowserTabIndicator";
import TipLink from "./components/messages/TipLink";
import { useAppStore } from "./store/app-store";
import { useProjectStore } from "./store/project-store";
import Sidebar from "./components/sidebar/Sidebar";
import PreviewPane from "./components/preview/PreviewPane";
import DeprecationMessage from "./components/messages/DeprecationMessage";
import { GenerationSettings } from "./components/settings/GenerationSettings";
import StartPane from "./components/start-pane/StartPane";
import { takeScreenshot } from "./lib/takeScreenshot";
import { Commit } from "./components/commits/types";
import { createCommit } from "./components/commits/utils";
function App() { function App() {
const [appState, setAppState] = useState<AppState>(AppState.INITIAL); const {
const [generatedCode, setGeneratedCode] = useState<string>(""); // Inputs
inputMode,
setInputMode,
isImportedFromCode,
setIsImportedFromCode,
referenceImages,
setReferenceImages,
const [referenceImages, setReferenceImages] = useState<string[]>([]); head,
const [executionConsole, setExecutionConsole] = useState<string[]>([]); commits,
const [updateInstruction, setUpdateInstruction] = useState(""); addCommit,
removeCommit,
setHead,
appendCommitCode,
setCommitCode,
resetCommits,
resetHead,
// Outputs
appendExecutionConsole,
resetExecutionConsoles,
} = useProjectStore();
const {
disableInSelectAndEditMode,
setUpdateInstruction,
appState,
setAppState,
shouldIncludeResultImage,
setShouldIncludeResultImage,
} = useAppStore();
// Settings // Settings
const [settings, setSettings] = usePersistedState<Settings>( const [settings, setSettings] = usePersistedState<Settings>(
{ {
openAiApiKey: null, openAiApiKey: null,
openAiBaseURL: null, openAiBaseURL: null,
anthropicApiKey: null,
screenshotOneApiKey: null, screenshotOneApiKey: null,
isImageGenerationEnabled: true, isImageGenerationEnabled: true,
editorTheme: EditorTheme.COBALT, editorTheme: EditorTheme.COBALT,
generatedCodeConfig: GeneratedCodeConfig.HTML_TAILWIND, generatedCodeConfig: Stack.HTML_TAILWIND,
codeGenerationModel: CodeGenerationModel.CLAUDE_3_5_SONNET_2024_06_20,
// Only relevant for hosted version // Only relevant for hosted version
isTermOfServiceAccepted: false, isTermOfServiceAccepted: false,
accessCode: null,
}, },
"setting" "setting"
); );
// App history
const [appHistory, setAppHistory] = useState<History>([]);
// Tracks the currently viewed version from app history
const [currentVersion, setCurrentVersion] = useState<number | null>(null);
const [shouldIncludeResultImage, setShouldIncludeResultImage] =
useState<boolean>(false);
const wsRef = useRef<WebSocket>(null); const wsRef = useRef<WebSocket>(null);
// Code generation model from local storage or the default value
const model =
settings.codeGenerationModel || CodeGenerationModel.GPT_4_VISION;
const showBetterModelMessage =
model !== CodeGenerationModel.GPT_4O_2024_05_13 &&
model !== CodeGenerationModel.CLAUDE_3_5_SONNET_2024_06_20 &&
appState === AppState.INITIAL;
const showSelectAndEditFeature =
(model === CodeGenerationModel.GPT_4O_2024_05_13 ||
model === CodeGenerationModel.CLAUDE_3_5_SONNET_2024_06_20) &&
(settings.generatedCodeConfig === Stack.HTML_TAILWIND ||
settings.generatedCodeConfig === Stack.HTML_CSS);
// Indicate coding state using the browser tab's favicon and title
useBrowserTabIndicator(appState === AppState.CODING);
// When the user already has the settings in local storage, newly added keys // When the user already has the settings in local storage, newly added keys
// do not get added to the settings so if it's falsy, we populate it with the default // do not get added to the settings so if it's falsy, we populate it with the default
// value // value
@ -77,108 +103,129 @@ function App() {
if (!settings.generatedCodeConfig) { if (!settings.generatedCodeConfig) {
setSettings((prev) => ({ setSettings((prev) => ({
...prev, ...prev,
generatedCodeConfig: GeneratedCodeConfig.HTML_TAILWIND, generatedCodeConfig: Stack.HTML_TAILWIND,
})); }));
} }
}, [settings.generatedCodeConfig, setSettings]); }, [settings.generatedCodeConfig, setSettings]);
const takeScreenshot = async (): Promise<string> => { // Functions
const iframeElement = document.querySelector(
"#preview-desktop"
) as HTMLIFrameElement;
if (!iframeElement?.contentWindow?.document.body) {
return "";
}
const canvas = await html2canvas(iframeElement.contentWindow.document.body);
const png = canvas.toDataURL("image/png");
return png;
};
const downloadCode = () => {
// Create a blob from the generated code
const blob = new Blob([generatedCode], { type: "text/html" });
const url = URL.createObjectURL(blob);
// Create an anchor element and set properties for download
const a = document.createElement("a");
a.href = url;
a.download = "index.html"; // Set the file name for download
document.body.appendChild(a); // Append to the document
a.click(); // Programmatically click the anchor to trigger download
// Clean up by removing the anchor and revoking the Blob URL
document.body.removeChild(a);
URL.revokeObjectURL(url);
};
const reset = () => { const reset = () => {
setAppState(AppState.INITIAL); setAppState(AppState.INITIAL);
setGeneratedCode(""); setShouldIncludeResultImage(false);
setUpdateInstruction("");
disableInSelectAndEditMode();
resetExecutionConsoles();
resetCommits();
resetHead();
// Inputs
setInputMode("image");
setReferenceImages([]); setReferenceImages([]);
setExecutionConsole([]); setIsImportedFromCode(false);
setAppHistory([]);
}; };
const stop = () => { const regenerate = () => {
if (head === null) {
toast.error(
"No current version set. Please contact support via chat or Github."
);
throw new Error("Regenerate called with no head");
}
// Retrieve the previous command
const currentCommit = commits[head];
if (currentCommit.type !== "ai_create") {
toast.error("Only the first version can be regenerated.");
return;
}
// Re-run the create
doCreate(referenceImages, inputMode);
};
// Used when the user cancels the code generation
const cancelCodeGeneration = () => {
wsRef.current?.close?.(USER_CLOSE_WEB_SOCKET_CODE); wsRef.current?.close?.(USER_CLOSE_WEB_SOCKET_CODE);
// make sure stop can correct the state even if the websocket is already closed
setAppState(AppState.CODE_READY);
}; };
function doGenerateCode( // Used for code generation failure as well
params: CodeGenerationParams, const cancelCodeGenerationAndReset = (commit: Commit) => {
parentVersion: number | null // When the current commit is the first version, reset the entire app state
) { if (commit.type === "ai_create") {
setExecutionConsole([]); reset();
} else {
// Otherwise, remove current commit from commits
removeCommit(commit.hash);
// Revert to parent commit
const parentCommitHash = commit.parentHash;
if (parentCommitHash) {
setHead(parentCommitHash);
} else {
throw new Error("Parent commit not found");
}
setAppState(AppState.CODE_READY);
}
};
function doGenerateCode(params: CodeGenerationParams) {
// Reset the execution console
resetExecutionConsoles();
// Set the app state
setAppState(AppState.CODING); setAppState(AppState.CODING);
// Merge settings with params // Merge settings with params
const updatedParams = { ...params, ...settings }; const updatedParams = { ...params, ...settings };
const baseCommitObject = {
variants: [{ code: "" }, { code: "" }],
};
const commitInputObject =
params.generationType === "create"
? {
...baseCommitObject,
type: "ai_create" as const,
parentHash: null,
inputs: { image_url: referenceImages[0] },
}
: {
...baseCommitObject,
type: "ai_edit" as const,
parentHash: head,
inputs: {
prompt: params.history
? params.history[params.history.length - 1]
: "",
},
};
// Create a new commit and set it as the head
const commit = createCommit(commitInputObject);
addCommit(commit);
setHead(commit.hash);
generateCode( generateCode(
wsRef, wsRef,
updatedParams, updatedParams,
(token) => setGeneratedCode((prev) => prev + token), // On change
(code) => { (token, variantIndex) => {
setGeneratedCode(code); appendCommitCode(commit.hash, variantIndex, token);
if (params.generationType === "create") {
setAppHistory([
{
type: "ai_create",
parentIndex: null,
code,
inputs: { image_url: referenceImages[0] },
},
]);
setCurrentVersion(0);
} else {
setAppHistory((prev) => {
// Validate parent version
if (parentVersion === null) {
toast.error(
"No parent version set. Contact support or open a Github issue."
);
return prev;
}
const newHistory: History = [
...prev,
{
type: "ai_edit",
parentIndex: parentVersion,
code,
inputs: {
prompt: updateInstruction,
},
},
];
setCurrentVersion(newHistory.length - 1);
return newHistory;
});
}
}, },
(line) => setExecutionConsole((prev) => [...prev, line]), // On set code
(code, variantIndex) => {
setCommitCode(commit.hash, variantIndex, code);
},
// On status update
(line, variantIndex) => appendExecutionConsole(variantIndex, line),
// On cancel
() => {
cancelCodeGenerationAndReset(commit);
},
// On complete
() => { () => {
setAppState(AppState.CODE_READY); setAppState(AppState.CODE_READY);
} }
@ -186,59 +233,75 @@ function App() {
} }
// Initial version creation // Initial version creation
function doCreate(referenceImages: string[]) { function doCreate(referenceImages: string[], inputMode: "image" | "video") {
// Reset any existing state // Reset any existing state
reset(); reset();
// Set the input states
setReferenceImages(referenceImages); setReferenceImages(referenceImages);
setInputMode(inputMode);
// Kick off the code generation
if (referenceImages.length > 0) { if (referenceImages.length > 0) {
doGenerateCode( doGenerateCode({
{ generationType: "create",
generationType: "create", image: referenceImages[0],
image: referenceImages[0], inputMode,
}, });
currentVersion
);
} }
} }
// Subsequent updates // Subsequent updates
async function doUpdate() { async function doUpdate(
if (currentVersion === null) { updateInstruction: string,
toast.error( selectedElement?: HTMLElement
"No current version set. Contact support or open a Github issue." ) {
); if (updateInstruction.trim() === "") {
toast.error("Please include some instructions for AI on what to update.");
return; return;
} }
const updatedHistory = [ if (head === null) {
...extractHistoryTree(appHistory, currentVersion), toast.error(
updateInstruction, "No current version set. Contact support or open a Github issue."
];
if (shouldIncludeResultImage) {
const resultImage = await takeScreenshot();
doGenerateCode(
{
generationType: "update",
image: referenceImages[0],
resultImage: resultImage,
history: updatedHistory,
},
currentVersion
);
} else {
doGenerateCode(
{
generationType: "update",
image: referenceImages[0],
history: updatedHistory,
},
currentVersion
); );
throw new Error("Update called with no head");
} }
setGeneratedCode(""); let historyTree;
try {
historyTree = extractHistory(head, commits);
} catch {
toast.error(
"Version history is invalid. This shouldn't happen. Please contact support or open a Github issue."
);
throw new Error("Invalid version history");
}
let modifiedUpdateInstruction = updateInstruction;
// Send in a reference to the selected element if it exists
if (selectedElement) {
modifiedUpdateInstruction =
updateInstruction +
" referring to this element specifically: " +
selectedElement.outerHTML;
}
const updatedHistory = [...historyTree, modifiedUpdateInstruction];
const resultImage = shouldIncludeResultImage
? await takeScreenshot()
: undefined;
doGenerateCode({
generationType: "update",
inputMode,
image: referenceImages[0],
resultImage,
history: updatedHistory,
isImportedFromCode,
});
setUpdateInstruction(""); setUpdateInstruction("");
} }
@ -249,9 +312,37 @@ function App() {
})); }));
}; };
function setStack(stack: Stack) {
setSettings((prev) => ({
...prev,
generatedCodeConfig: stack,
}));
}
function importFromCode(code: string, stack: Stack) {
// Set input state
setIsImportedFromCode(true);
// Set up this project
setStack(stack);
// Create a new commit and set it as the head
const commit = createCommit({
type: "code_create",
parentHash: null,
variants: [{ code }],
inputs: null,
});
addCommit(commit);
setHead(commit.hash);
// Set the app state
setAppState(AppState.CODE_READY);
}
return ( return (
<div className="mt-2 dark:bg-black dark:text-white"> <div className="mt-2 dark:bg-black dark:text-white">
{IS_RUNNING_ON_CLOUD && <PicoBadge settings={settings} />} {IS_RUNNING_ON_CLOUD && <PicoBadge />}
{IS_RUNNING_ON_CLOUD && ( {IS_RUNNING_ON_CLOUD && (
<TermsOfServiceDialog <TermsOfServiceDialog
open={!settings.isTermOfServiceAccepted} open={!settings.isTermOfServiceAccepted}
@ -260,198 +351,47 @@ function App() {
)} )}
<div className="lg:fixed lg:inset-y-0 lg:z-40 lg:flex lg:w-96 lg:flex-col"> <div className="lg:fixed lg:inset-y-0 lg:z-40 lg:flex lg:w-96 lg:flex-col">
<div className="flex grow flex-col gap-y-2 overflow-y-auto border-r border-gray-200 bg-white px-6 dark:bg-zinc-950 dark:text-white"> <div className="flex grow flex-col gap-y-2 overflow-y-auto border-r border-gray-200 bg-white px-6 dark:bg-zinc-950 dark:text-white">
{/* Header with access to settings */}
<div className="flex items-center justify-between mt-10 mb-2"> <div className="flex items-center justify-between mt-10 mb-2">
<h1 className="text-2xl ">Screenshot to Code</h1> <h1 className="text-2xl ">Screenshot to Code</h1>
<SettingsDialog settings={settings} setSettings={setSettings} /> <SettingsDialog settings={settings} setSettings={setSettings} />
</div> </div>
<OutputSettingsSection {/* Generation settings like stack and model */}
generatedCodeConfig={settings.generatedCodeConfig} <GenerationSettings settings={settings} setSettings={setSettings} />
setGeneratedCodeConfig={(config: GeneratedCodeConfig) =>
setSettings((prev) => ({
...prev,
generatedCodeConfig: config,
}))
}
shouldDisableUpdates={
appState === AppState.CODING || appState === AppState.CODE_READY
}
/>
{IS_RUNNING_ON_CLOUD && {/* Show auto updated message when older models are choosen */}
!(settings.openAiApiKey || settings.accessCode) && ( {showBetterModelMessage && <DeprecationMessage />}
<OnboardingNote />
)}
{IS_OPENAI_DOWN && ( {/* Show tip link until coding is complete */}
<div className="bg-black text-white dark:bg-white dark:text-black p-3 rounded"> {appState !== AppState.CODE_READY && <TipLink />}
OpenAI API is currently down. Try back in 30 minutes or later. We
apologize for the inconvenience.
</div>
)}
{IS_RUNNING_ON_CLOUD && !settings.openAiApiKey && <OnboardingNote />}
{/* Rest of the sidebar when we're not in the initial state */}
{(appState === AppState.CODING || {(appState === AppState.CODING ||
appState === AppState.CODE_READY) && ( appState === AppState.CODE_READY) && (
<> <Sidebar
{/* Show code preview only when coding */} showSelectAndEditFeature={showSelectAndEditFeature}
{appState === AppState.CODING && ( doUpdate={doUpdate}
<div className="flex flex-col"> regenerate={regenerate}
<div className="flex items-center gap-x-1"> cancelCodeGeneration={cancelCodeGeneration}
<Spinner />
{executionConsole.slice(-1)[0]}
</div>
<div className="flex mt-4 w-full">
<Button
onClick={stop}
className="w-full dark:text-white dark:bg-gray-700"
>
Stop
</Button>
</div>
<CodePreview code={generatedCode} />
</div>
)}
{appState === AppState.CODE_READY && (
<div>
<div className="grid w-full gap-2">
<Textarea
placeholder="Tell the AI what to change..."
onChange={(e) => setUpdateInstruction(e.target.value)}
value={updateInstruction}
/>
<div className="flex justify-between items-center gap-x-2">
<div className="font-500 text-xs text-slate-700 dark:text-white">
Include screenshot of current version?
</div>
<Switch
checked={shouldIncludeResultImage}
onCheckedChange={setShouldIncludeResultImage}
className="dark:bg-gray-700"
/>
</div>
<Button
onClick={doUpdate}
className="dark:text-white dark:bg-gray-700"
>
Update
</Button>
</div>
<div className="flex items-center gap-x-2 mt-2">
<Button
onClick={downloadCode}
className="flex items-center gap-x-2 dark:text-white dark:bg-gray-700"
>
<FaDownload /> Download
</Button>
<Button
onClick={reset}
className="flex items-center gap-x-2 dark:text-white dark:bg-gray-700"
>
<FaUndo />
Reset
</Button>
</div>
</div>
)}
{/* Reference image display */}
<div className="flex gap-x-2 mt-2">
<div className="flex flex-col">
<div
className={classNames({
"scanning relative": appState === AppState.CODING,
})}
>
<img
className="w-[340px] border border-gray-200 rounded-md"
src={referenceImages[0]}
alt="Reference"
/>
</div>
<div className="text-gray-400 uppercase text-sm text-center mt-1">
Original Screenshot
</div>
</div>
<div className="bg-gray-400 px-4 py-2 rounded text-sm hidden">
<h2 className="text-lg mb-4 border-b border-gray-800">
Console
</h2>
{executionConsole.map((line, index) => (
<div
key={index}
className="border-b border-gray-400 mb-2 text-gray-600 font-mono"
>
{line}
</div>
))}
</div>
</div>
</>
)}
{
<HistoryDisplay
history={appHistory}
currentVersion={currentVersion}
revertToVersion={(index) => {
if (
index < 0 ||
index >= appHistory.length ||
!appHistory[index]
)
return;
setCurrentVersion(index);
setGeneratedCode(appHistory[index].code);
}}
shouldDisableReverts={appState === AppState.CODING}
/> />
} )}
</div> </div>
</div> </div>
<main className="py-2 lg:pl-96"> <main className="py-2 lg:pl-96">
{appState === AppState.INITIAL && ( {appState === AppState.INITIAL && (
<div className="flex flex-col justify-center items-center gap-y-10"> <StartPane
<ImageUpload setReferenceImages={doCreate} /> doCreate={doCreate}
<UrlInputSection importFromCode={importFromCode}
doCreate={doCreate} settings={settings}
screenshotOneApiKey={settings.screenshotOneApiKey} />
/>
</div>
)} )}
{(appState === AppState.CODING || appState === AppState.CODE_READY) && ( {(appState === AppState.CODING || appState === AppState.CODE_READY) && (
<div className="ml-4"> <PreviewPane doUpdate={doUpdate} reset={reset} settings={settings} />
<Tabs defaultValue="desktop">
<div className="flex justify-end mr-8 mb-4">
<TabsList>
<TabsTrigger value="desktop" className="flex gap-x-2">
<FaDesktop /> Desktop
</TabsTrigger>
<TabsTrigger value="mobile" className="flex gap-x-2">
<FaMobile /> Mobile
</TabsTrigger>
<TabsTrigger value="code" className="flex gap-x-2">
<FaCode />
Code
</TabsTrigger>
</TabsList>
</div>
<TabsContent value="desktop">
<Preview code={generatedCode} device="desktop" />
</TabsContent>
<TabsContent value="mobile">
<Preview code={generatedCode} device="mobile" />
</TabsContent>
<TabsContent value="code">
<CodeTab
code={generatedCode}
setCode={setGeneratedCode}
settings={settings}
/>
</TabsContent>
</Tabs>
</div>
)} )}
</main> </main>
</div> </div>

View File

@ -1,7 +1,12 @@
import { useState, useEffect, useMemo, useCallback } from "react"; import { useState, useEffect, useMemo } from "react";
// useCallback
import { useDropzone } from "react-dropzone"; import { useDropzone } from "react-dropzone";
// import { PromptImage } from "../../../types"; // import { PromptImage } from "../../../types";
import { toast } from "react-hot-toast"; import { toast } from "react-hot-toast";
import { URLS } from "../urls";
import { Badge } from "./ui/badge";
import ScreenRecorder from "./recording/ScreenRecorder";
import { ScreenRecorderState } from "../types";
const baseStyle = { const baseStyle = {
flex: 1, flex: 1,
@ -50,19 +55,31 @@ type FileWithPreview = {
} & File; } & File;
interface Props { interface Props {
setReferenceImages: (referenceImages: string[]) => void; setReferenceImages: (
referenceImages: string[],
inputMode: "image" | "video"
) => void;
} }
function ImageUpload({ setReferenceImages }: Props) { function ImageUpload({ setReferenceImages }: Props) {
const [files, setFiles] = useState<FileWithPreview[]>([]); const [files, setFiles] = useState<FileWithPreview[]>([]);
// TODO: Switch to Zustand
const [screenRecorderState, setScreenRecorderState] =
useState<ScreenRecorderState>(ScreenRecorderState.INITIAL);
const { getRootProps, getInputProps, isFocused, isDragAccept, isDragReject } = const { getRootProps, getInputProps, isFocused, isDragAccept, isDragReject } =
useDropzone({ useDropzone({
maxFiles: 1, maxFiles: 1,
maxSize: 1024 * 1024 * 5, // 5 MB maxSize: 1024 * 1024 * 20, // 20 MB
accept: { accept: {
// Image formats
"image/png": [".png"], "image/png": [".png"],
"image/jpeg": [".jpeg"], "image/jpeg": [".jpeg"],
"image/jpg": [".jpg"], "image/jpg": [".jpg"],
// Video formats
"video/quicktime": [".mov"],
"video/mp4": [".mp4"],
"video/webm": [".webm"],
}, },
onDrop: (acceptedFiles) => { onDrop: (acceptedFiles) => {
// Set up the preview thumbnail images // Set up the preview thumbnail images
@ -77,7 +94,14 @@ function ImageUpload({ setReferenceImages }: Props) {
// Convert images to data URLs and set the prompt images state // Convert images to data URLs and set the prompt images state
Promise.all(acceptedFiles.map((file) => fileToDataURL(file))) Promise.all(acceptedFiles.map((file) => fileToDataURL(file)))
.then((dataUrls) => { .then((dataUrls) => {
setReferenceImages(dataUrls.map((dataUrl) => dataUrl as string)); if (dataUrls.length > 0) {
setReferenceImages(
dataUrls.map((dataUrl) => dataUrl as string),
(dataUrls[0] as string).startsWith("data:video")
? "video"
: "image"
);
}
}) })
.catch((error) => { .catch((error) => {
toast.error("Error reading files" + error); toast.error("Error reading files" + error);
@ -89,39 +113,39 @@ function ImageUpload({ setReferenceImages }: Props) {
}, },
}); });
const pasteEvent = useCallback( // const pasteEvent = useCallback(
(event: ClipboardEvent) => { // (event: ClipboardEvent) => {
const clipboardData = event.clipboardData; // const clipboardData = event.clipboardData;
if (!clipboardData) return; // if (!clipboardData) return;
const items = clipboardData.items; // const items = clipboardData.items;
const files = []; // const files = [];
for (let i = 0; i < items.length; i++) { // for (let i = 0; i < items.length; i++) {
const file = items[i].getAsFile(); // const file = items[i].getAsFile();
if (file && file.type.startsWith("image/")) { // if (file && file.type.startsWith("image/")) {
files.push(file); // files.push(file);
} // }
} // }
// Convert images to data URLs and set the prompt images state // // Convert images to data URLs and set the prompt images state
Promise.all(files.map((file) => fileToDataURL(file))) // Promise.all(files.map((file) => fileToDataURL(file)))
.then((dataUrls) => { // .then((dataUrls) => {
if (dataUrls.length > 0) { // if (dataUrls.length > 0) {
setReferenceImages(dataUrls.map((dataUrl) => dataUrl as string)); // setReferenceImages(dataUrls.map((dataUrl) => dataUrl as string));
} // }
}) // })
.catch((error) => { // .catch((error) => {
// TODO: Display error to user // // TODO: Display error to user
console.error("Error reading files:", error); // console.error("Error reading files:", error);
}); // });
}, // },
[setReferenceImages] // [setReferenceImages]
); // );
// TODO: Make sure we don't listen to paste events in text input components // TODO: Make sure we don't listen to paste events in text input components
useEffect(() => { // useEffect(() => {
window.addEventListener("paste", pasteEvent); // window.addEventListener("paste", pasteEvent);
}, [pasteEvent]); // }, [pasteEvent]);
useEffect(() => { useEffect(() => {
return () => files.forEach((file) => URL.revokeObjectURL(file.preview)); return () => files.forEach((file) => URL.revokeObjectURL(file.preview));
@ -139,15 +163,34 @@ function ImageUpload({ setReferenceImages }: Props) {
return ( return (
<section className="container"> <section className="container">
{/* eslint-disable-next-line @typescript-eslint/no-explicit-any */} {screenRecorderState === ScreenRecorderState.INITIAL && (
<div {...getRootProps({ style: style as any })}> /* eslint-disable-next-line @typescript-eslint/no-explicit-any */
<input {...getInputProps()} /> <div {...getRootProps({ style: style as any })}>
<p className="text-slate-700 text-lg"> <input {...getInputProps()} className="file-input" />
Drag & drop a screenshot here, <br /> <p className="text-slate-700 text-lg">
or paste from clipboard, <br /> Drag & drop a screenshot here, <br />
or click to upload or click to upload
</p> </p>
</div> </div>
)}
{screenRecorderState === ScreenRecorderState.INITIAL && (
<div className="text-center text-sm text-slate-800 mt-4">
<Badge>New!</Badge> Upload a screen recording (.mp4, .mov) or record
your screen to clone a whole app (experimental).{" "}
<a
className="underline"
href={URLS["intro-to-video"]}
target="_blank"
>
Learn more.
</a>
</div>
)}
<ScreenRecorder
screenRecorderState={screenRecorderState}
setScreenRecorderState={setScreenRecorderState}
generateCode={setReferenceImages}
/>
</section> </section>
); );
} }

View File

@ -0,0 +1,76 @@
import { useState } from "react";
import { Button } from "./ui/button";
import {
Dialog,
DialogContent,
DialogDescription,
DialogFooter,
DialogHeader,
DialogTitle,
DialogTrigger,
} from "./ui/dialog";
import { Textarea } from "./ui/textarea";
import OutputSettingsSection from "./settings/OutputSettingsSection";
import toast from "react-hot-toast";
import { Stack } from "../lib/stacks";
interface Props {
importFromCode: (code: string, stack: Stack) => void;
}
function ImportCodeSection({ importFromCode }: Props) {
const [code, setCode] = useState("");
const [stack, setStack] = useState<Stack | undefined>(undefined);
const doImport = () => {
if (code === "") {
toast.error("Please paste in some code");
return;
}
if (stack === undefined) {
toast.error("Please select your stack");
return;
}
importFromCode(code, stack);
};
return (
<Dialog>
<DialogTrigger asChild>
<Button className="import-from-code-btn" variant="secondary">
Import from Code
</Button>
</DialogTrigger>
<DialogContent className="sm:max-w-[425px]">
<DialogHeader>
<DialogTitle>Paste in your HTML code</DialogTitle>
<DialogDescription>
Make sure that the code you're importing is valid HTML.
</DialogDescription>
</DialogHeader>
<Textarea
value={code}
onChange={(e) => setCode(e.target.value)}
className="w-full h-64"
/>
<OutputSettingsSection
stack={stack}
setStack={(config: Stack) => setStack(config)}
label="Stack:"
shouldDisableUpdates={false}
/>
<DialogFooter>
<Button className="import-btn" type="submit" onClick={doImport}>
Import
</Button>
</DialogFooter>
</DialogContent>
</Dialog>
);
}
export default ImportCodeSection;

View File

@ -1,92 +0,0 @@
import {
Select,
SelectContent,
SelectGroup,
SelectItem,
SelectTrigger,
} from "./ui/select";
import { GeneratedCodeConfig } from "../types";
function generateDisplayComponent(config: GeneratedCodeConfig) {
switch (config) {
case GeneratedCodeConfig.HTML_TAILWIND:
return (
<div>
<span className="font-semibold">HTML</span> +{" "}
<span className="font-semibold">Tailwind</span>
</div>
);
case GeneratedCodeConfig.REACT_TAILWIND:
return (
<div>
<span className="font-semibold">React</span> +{" "}
<span className="font-semibold">Tailwind</span>
</div>
);
case GeneratedCodeConfig.BOOTSTRAP:
return (
<div>
<span className="font-semibold">Bootstrap</span>
</div>
);
case GeneratedCodeConfig.IONIC_TAILWIND:
return (
<div>
<span className="font-semibold">Ionic</span> +{" "}
<span className="font-semibold">Tailwind</span>
</div>
);
default:
// TODO: Should never reach this out. Error out
return config;
}
}
interface Props {
generatedCodeConfig: GeneratedCodeConfig;
setGeneratedCodeConfig: (config: GeneratedCodeConfig) => void;
shouldDisableUpdates?: boolean;
}
function OutputSettingsSection({
generatedCodeConfig,
setGeneratedCodeConfig,
shouldDisableUpdates = false,
}: Props) {
return (
<div className="flex flex-col gap-y-2 justify-between text-sm">
<div className="grid grid-cols-3 items-center gap-4">
<span>Generating:</span>
<Select
value={generatedCodeConfig}
onValueChange={(value: string) =>
setGeneratedCodeConfig(value as GeneratedCodeConfig)
}
disabled={shouldDisableUpdates}
>
<SelectTrigger className="col-span-2" id="output-settings-js">
{generateDisplayComponent(generatedCodeConfig)}
</SelectTrigger>
<SelectContent>
<SelectGroup>
<SelectItem value={GeneratedCodeConfig.HTML_TAILWIND}>
{generateDisplayComponent(GeneratedCodeConfig.HTML_TAILWIND)}
</SelectItem>
<SelectItem value={GeneratedCodeConfig.REACT_TAILWIND}>
{generateDisplayComponent(GeneratedCodeConfig.REACT_TAILWIND)}
</SelectItem>
<SelectItem value={GeneratedCodeConfig.BOOTSTRAP}>
{generateDisplayComponent(GeneratedCodeConfig.BOOTSTRAP)}
</SelectItem>
<SelectItem value={GeneratedCodeConfig.IONIC_TAILWIND}>
{generateDisplayComponent(GeneratedCodeConfig.IONIC_TAILWIND)}
</SelectItem>
</SelectGroup>
</SelectContent>
</Select>
</div>
</div>
);
}
export default OutputSettingsSection;

View File

@ -1,39 +0,0 @@
import { Settings } from "../types";
export function PicoBadge({ settings }: { settings: Settings }) {
return (
<>
<a
href="https://screenshot-to-code.canny.io/feature-requests"
target="_blank"
>
<div
className="fixed z-50 bottom-16 right-5 rounded-md shadow bg-black
text-white px-4 text-xs py-3 cursor-pointer"
>
feature requests?
</div>
</a>
{!settings.accessCode && (
<a href="https://picoapps.xyz?ref=screenshot-to-code" target="_blank">
<div
className="fixed z-50 bottom-5 right-5 rounded-md shadow text-black
bg-white px-4 text-xs py-3 cursor-pointer"
>
an open source project by Pico
</div>
</a>
)}
{settings.accessCode && (
<a href="mailto:support@picoapps.xyz" target="_blank">
<div
className="fixed z-50 bottom-5 right-5 rounded-md shadow text-black
bg-white px-4 text-xs py-3 cursor-pointer"
>
email support
</div>
</a>
)}
</>
);
}

View File

@ -1,42 +0,0 @@
import { useEffect, useRef } from 'react';
import classNames from "classnames";
import useThrottle from "../hooks/useThrottle";
interface Props {
code: string;
device: "mobile" | "desktop";
}
function Preview({ code, device }: Props) {
const throttledCode = useThrottle(code, 200);
const iframeRef = useRef<HTMLIFrameElement | null>(null);
useEffect(() => {
const iframe = iframeRef.current;
if (iframe && iframe.contentDocument) {
iframe.contentDocument.open();
iframe.contentDocument.write(throttledCode);
iframe.contentDocument.close();
}
}, [throttledCode]);
return (
<div className="flex justify-center mx-2">
<iframe
id={`preview-${device}`}
ref={iframeRef}
title="Preview"
className={classNames(
"border-[4px] border-black rounded-[20px] shadow-lg",
"transform scale-[0.9] origin-top",
{
"w-full h-[832px]": device === "desktop",
"w-[400px] h-[832px]": device === "mobile",
}
)}
></iframe>
</div>
);
}
export default Preview;

View File

@ -6,7 +6,7 @@ import { toast } from "react-hot-toast";
interface Props { interface Props {
screenshotOneApiKey: string | null; screenshotOneApiKey: string | null;
doCreate: (urls: string[]) => void; doCreate: (urls: string[], inputMode: "image" | "video") => void;
} }
export function UrlInputSection({ doCreate, screenshotOneApiKey }: Props) { export function UrlInputSection({ doCreate, screenshotOneApiKey }: Props) {
@ -46,7 +46,7 @@ export function UrlInputSection({ doCreate, screenshotOneApiKey }: Props) {
} }
const res = await response.json(); const res = await response.json();
doCreate([res.url]); doCreate([res.url], "image");
} catch (error) { } catch (error) {
console.error(error); console.error(error);
toast.error( toast.error(
@ -69,7 +69,7 @@ export function UrlInputSection({ doCreate, screenshotOneApiKey }: Props) {
<Button <Button
onClick={takeScreenshot} onClick={takeScreenshot}
disabled={isLoading} disabled={isLoading}
className="bg-slate-400" className="bg-slate-400 capture-btn"
> >
{isLoading ? "Capturing..." : "Capture"} {isLoading ? "Capturing..." : "Capture"}
</Button> </Button>

View File

@ -0,0 +1,37 @@
export type CommitHash = string;
export type Variant = {
code: string;
};
export type BaseCommit = {
hash: CommitHash;
parentHash: CommitHash | null;
dateCreated: Date;
isCommitted: boolean;
variants: Variant[];
selectedVariantIndex: number;
};
export type CommitType = "ai_create" | "ai_edit" | "code_create";
export type AiCreateCommit = BaseCommit & {
type: "ai_create";
inputs: {
image_url: string;
};
};
export type AiEditCommit = BaseCommit & {
type: "ai_edit";
inputs: {
prompt: string;
};
};
export type CodeCreateCommit = BaseCommit & {
type: "code_create";
inputs: null;
};
export type Commit = AiCreateCommit | AiEditCommit | CodeCreateCommit;

View File

@ -0,0 +1,32 @@
import { nanoid } from "nanoid";
import {
AiCreateCommit,
AiEditCommit,
CodeCreateCommit,
Commit,
} from "./types";
export function createCommit(
commit:
| Omit<
AiCreateCommit,
"hash" | "dateCreated" | "selectedVariantIndex" | "isCommitted"
>
| Omit<
AiEditCommit,
"hash" | "dateCreated" | "selectedVariantIndex" | "isCommitted"
>
| Omit<
CodeCreateCommit,
"hash" | "dateCreated" | "selectedVariantIndex" | "isCommitted"
>
): Commit {
const hash = nanoid();
return {
...commit,
hash,
isCommitted: false,
dateCreated: new Date(),
selectedVariantIndex: 0,
};
}

View File

@ -0,0 +1,25 @@
import React from "react";
import { BsArrowReturnLeft } from "react-icons/bs";
interface KeyboardShortcutBadgeProps {
letter: string;
}
const KeyboardShortcutBadge: React.FC<KeyboardShortcutBadgeProps> = ({
letter,
}) => {
const icon =
letter.toLowerCase() === "enter" || letter.toLowerCase() === "return" ? (
<BsArrowReturnLeft />
) : (
letter.toUpperCase()
);
return (
<span className="font-mono text-xs ml-2 rounded bg-gray-700 dark:bg-gray-900 text-white py-[2px] px-2">
{icon}
</span>
);
};
export default KeyboardShortcutBadge;

View File

@ -0,0 +1,74 @@
import React, { useEffect } from "react";
import { HTTP_BACKEND_URL } from "../../config";
import RatingPicker from "./RatingPicker";
interface Eval {
input: string;
outputs: string[];
}
function EvalsPage() {
const [evals, setEvals] = React.useState<Eval[]>([]);
const [ratings, setRatings] = React.useState<number[]>([]);
const total = ratings.reduce((a, b) => a + b, 0);
const max = ratings.length * 4;
const score = ((total / max) * 100 || 0).toFixed(2);
useEffect(() => {
if (evals.length > 0) return;
fetch(`${HTTP_BACKEND_URL}/evals`)
.then((res) => res.json())
.then((data) => {
setEvals(data);
setRatings(new Array(data.length).fill(0));
});
}, [evals]);
return (
<div className="mx-auto">
{/* Display total */}
<div className="flex items-center justify-center w-full h-12 bg-zinc-950">
<span className="text-2xl font-semibold text-white">
Total: {total} out of {max} ({score}%)
</span>
</div>
<div className="flex flex-col gap-y-4 mt-4 mx-auto justify-center">
{evals.map((e, index) => (
<div className="flex flex-col justify-center" key={index}>
<h2 className="font-bold text-lg ml-4">{index}</h2>
<div className="flex gap-x-2 justify-center ml-4">
{/* Update w if N changes to a fixed number like w-[600px] */}
<div className="w-1/2 p-1 border">
<img src={e.input} alt={`Input for eval ${index}`} />
</div>
{e.outputs.map((output, outputIndex) => (
<div className="w-1/2 p-1 border" key={outputIndex}>
{/* Put output into an iframe */}
<iframe
srcDoc={output}
className="w-[1200px] h-[800px] transform scale-[0.60]"
style={{ transformOrigin: "top left" }}
></iframe>
</div>
))}
</div>
<div className="ml-8 mt-4 flex justify-center">
<RatingPicker
onSelect={(rating) => {
const newRatings = [...ratings];
newRatings[index] = rating;
setRatings(newRatings);
}}
/>
</div>
</div>
))}
</div>
</div>
);
}
export default EvalsPage;

View File

@ -0,0 +1,38 @@
import React from "react";
interface Props {
onSelect: (rating: number) => void;
}
function RatingPicker({ onSelect }: Props) {
const [selected, setSelected] = React.useState<number | null>(null);
const renderCircle = (number: number) => {
const isSelected = selected === number;
const bgColor = isSelected ? "bg-black" : "bg-gray-300";
const textColor = isSelected ? "text-white" : "text-black";
return (
<div
className={`flex items-center justify-center w-8 h-8 ${bgColor} rounded-full cursor-pointer`}
onClick={() => {
setSelected(number);
onSelect(number);
}}
>
<span className={`text-lg font-semibold ${textColor}`}>{number}</span>
</div>
);
};
return (
<div className="flex space-x-4">
{renderCircle(1)}
{renderCircle(2)}
{renderCircle(3)}
{renderCircle(4)}
</div>
);
}
export default RatingPicker;

View File

@ -1,75 +1,87 @@
import { ScrollArea } from "@/components/ui/scroll-area";
import { History, HistoryItemType } from "./history_types";
import toast from "react-hot-toast"; import toast from "react-hot-toast";
import classNames from "classnames"; import classNames from "classnames";
import { Badge } from "../ui/badge";
import { renderHistory } from "./utils";
import {
Collapsible,
CollapsibleContent,
CollapsibleTrigger,
} from "../ui/collapsible";
import { Button } from "../ui/button";
import { CaretSortIcon } from "@radix-ui/react-icons";
import { useProjectStore } from "../../store/project-store";
interface Props { interface Props {
history: History;
currentVersion: number | null;
revertToVersion: (version: number) => void;
shouldDisableReverts: boolean; shouldDisableReverts: boolean;
} }
function displayHistoryItemType(itemType: HistoryItemType) { export default function HistoryDisplay({ shouldDisableReverts }: Props) {
switch (itemType) { const { commits, head, setHead } = useProjectStore();
case "ai_create":
return "Create";
case "ai_edit":
return "Edit";
default:
// TODO: Error out since this is exhaustive
return "Unknown";
}
}
export default function HistoryDisplay({ // Put all commits into an array and sort by created date (oldest first)
history, const flatHistory = Object.values(commits).sort(
currentVersion, (a, b) =>
revertToVersion, new Date(a.dateCreated).getTime() - new Date(b.dateCreated).getTime()
shouldDisableReverts, );
}: Props) {
return history.length === 0 ? null : ( // Annotate history items with a summary, parent version, etc.
const renderedHistory = renderHistory(flatHistory);
return renderedHistory.length === 0 ? null : (
<div className="flex flex-col h-screen"> <div className="flex flex-col h-screen">
<h1 className="font-bold mb-2">Versions</h1> <h1 className="font-bold mb-2">Versions</h1>
<ScrollArea className="flex-1 overflow-y-auto"> <ul className="space-y-0 flex flex-col-reverse">
<ul className="space-y-0 flex flex-col-reverse"> {renderedHistory.map((item, index) => (
{history.map((item, index) => ( <li key={index}>
<li <Collapsible>
key={index} <div
className={classNames( className={classNames(
"flex items-center space-x-2 justify-between p-2", "flex items-center justify-between space-x-2 w-full pr-2",
"border-b cursor-pointer", "border-b cursor-pointer",
{ {
" hover:bg-black hover:text-white": index !== currentVersion, " hover:bg-black hover:text-white": item.hash === head,
"bg-slate-500 text-white": index === currentVersion, "bg-slate-500 text-white": item.hash === head,
} }
)} )}
onClick={() => >
shouldDisableReverts <div
? toast.error( className="flex justify-between truncate flex-1 p-2"
"Please wait for code generation to complete before viewing an older version." onClick={() =>
) shouldDisableReverts
: revertToVersion(index) ? toast.error(
} "Please wait for code generation to complete before viewing an older version."
> )
<div className="flex gap-x-1"> : setHead(item.hash)
<h2 className="text-sm">{displayHistoryItemType(item.type)}</h2> }
{item.parentIndex !== null && item.parentIndex !== index - 1 ? ( >
<h2 className="text-sm"> <div className="flex gap-x-1 truncate">
(parent: v{(item.parentIndex || 0) + 1}) <h2 className="text-sm truncate">{item.summary}</h2>
</h2> {item.parentVersion !== null && (
) : null} <h2 className="text-sm">
(parent: v{item.parentVersion})
</h2>
)}
</div>
<h2 className="text-sm">v{index + 1}</h2>
</div>
<CollapsibleTrigger asChild>
<Button variant="ghost" size="sm" className="h-6">
<CaretSortIcon className="h-4 w-4" />
<span className="sr-only">Toggle</span>
</Button>
</CollapsibleTrigger>
</div> </div>
<h2 className="text-sm"> <CollapsibleContent className="w-full bg-slate-300 p-2">
{item.type === "ai_edit" <div>Full prompt: {item.summary}</div>
? item.inputs.prompt <div className="flex justify-end">
: item.inputs.image_url} <Badge>{item.type}</Badge>
</h2> </div>
<h2 className="text-sm">v{index + 1}</h2> </CollapsibleContent>
</li> </Collapsible>
))} </li>
</ul> ))}
</ScrollArea> </ul>
</div> </div>
); );
} }

View File

@ -1,26 +0,0 @@
export type HistoryItemType = "ai_create" | "ai_edit";
type CommonHistoryItem = {
parentIndex: null | number;
code: string;
};
export type HistoryItem =
| ({
type: "ai_create";
inputs: AiCreateInputs;
} & CommonHistoryItem)
| ({
type: "ai_edit";
inputs: AiEditInputs;
} & CommonHistoryItem);
export type AiCreateInputs = {
image_url: string;
};
export type AiEditInputs = {
prompt: string;
};
export type History = HistoryItem[];

View File

@ -1,103 +1,242 @@
import { expect, test } from "vitest"; import { extractHistory, renderHistory } from "./utils";
import { extractHistoryTree } from "./utils"; import { Commit, CommitHash } from "../commits/types";
import type { History } from "./history_types";
const basicLinearHistory: History = [ const basicLinearHistory: Record<CommitHash, Commit> = {
{ "0": {
hash: "0",
dateCreated: new Date(),
isCommitted: false,
type: "ai_create", type: "ai_create",
parentIndex: null, parentHash: null,
code: "<html>1. create</html>", variants: [{ code: "<html>1. create</html>" }],
selectedVariantIndex: 0,
inputs: { inputs: {
image_url: "", image_url: "",
}, },
}, },
{ "1": {
hash: "1",
dateCreated: new Date(),
isCommitted: false,
type: "ai_edit", type: "ai_edit",
parentIndex: 0, parentHash: "0",
code: "<html>2. edit with better icons</html>", variants: [{ code: "<html>2. edit with better icons</html>" }],
selectedVariantIndex: 0,
inputs: { inputs: {
prompt: "use better icons", prompt: "use better icons",
}, },
}, },
{ "2": {
hash: "2",
dateCreated: new Date(),
isCommitted: false,
type: "ai_edit", type: "ai_edit",
parentIndex: 1, parentHash: "1",
code: "<html>3. edit with better icons and red text</html>", variants: [{ code: "<html>3. edit with better icons and red text</html>" }],
selectedVariantIndex: 0,
inputs: { inputs: {
prompt: "make text red", prompt: "make text red",
}, },
}, },
]; };
const basicBranchingHistory: History = [ const basicLinearHistoryWithCode: Record<CommitHash, Commit> = {
"0": {
hash: "0",
dateCreated: new Date(),
isCommitted: false,
type: "code_create",
parentHash: null,
variants: [{ code: "<html>1. create</html>" }],
selectedVariantIndex: 0,
inputs: null,
},
...Object.fromEntries(Object.entries(basicLinearHistory).slice(1)),
};
const basicBranchingHistory: Record<CommitHash, Commit> = {
...basicLinearHistory, ...basicLinearHistory,
{ "3": {
hash: "3",
dateCreated: new Date(),
isCommitted: false,
type: "ai_edit", type: "ai_edit",
parentIndex: 1, parentHash: "1",
code: "<html>4. edit with better icons and green text</html>", variants: [
{ code: "<html>4. edit with better icons and green text</html>" },
],
selectedVariantIndex: 0,
inputs: { inputs: {
prompt: "make text green", prompt: "make text green",
}, },
}, },
]; };
const longerBranchingHistory: History = [ const longerBranchingHistory: Record<CommitHash, Commit> = {
...basicBranchingHistory, ...basicBranchingHistory,
{ "4": {
hash: "4",
dateCreated: new Date(),
isCommitted: false,
type: "ai_edit", type: "ai_edit",
parentIndex: 3, parentHash: "3",
code: "<html>5. edit with better icons and green, bold text</html>", variants: [
{ code: "<html>5. edit with better icons and green, bold text</html>" },
],
selectedVariantIndex: 0,
inputs: { inputs: {
prompt: "make text bold", prompt: "make text bold",
}, },
}, },
]; };
test("should only include history from this point onward", () => { const basicBadHistory: Record<CommitHash, Commit> = {
expect(extractHistoryTree(basicLinearHistory, 2)).toEqual([ "0": {
"<html>1. create</html>", hash: "0",
"use better icons", dateCreated: new Date(),
"<html>2. edit with better icons</html>", isCommitted: false,
"make text red", type: "ai_create",
"<html>3. edit with better icons and red text</html>", parentHash: null,
]); variants: [{ code: "<html>1. create</html>" }],
selectedVariantIndex: 0,
inputs: {
image_url: "",
},
},
"1": {
hash: "1",
dateCreated: new Date(),
isCommitted: false,
type: "ai_edit",
parentHash: "2", // <- Bad parent hash
variants: [{ code: "<html>2. edit with better icons</html>" }],
selectedVariantIndex: 0,
inputs: {
prompt: "use better icons",
},
},
};
expect(extractHistoryTree(basicLinearHistory, 0)).toEqual([ describe("History Utils", () => {
"<html>1. create</html>", test("should correctly extract the history tree", () => {
]); expect(extractHistory("2", basicLinearHistory)).toEqual([
"<html>1. create</html>",
"use better icons",
"<html>2. edit with better icons</html>",
"make text red",
"<html>3. edit with better icons and red text</html>",
]);
// Test branching expect(extractHistory("0", basicLinearHistory)).toEqual([
expect(extractHistoryTree(basicBranchingHistory, 3)).toEqual([ "<html>1. create</html>",
"<html>1. create</html>", ]);
"use better icons",
"<html>2. edit with better icons</html>",
"make text green",
"<html>4. edit with better icons and green text</html>",
]);
expect(extractHistoryTree(longerBranchingHistory, 4)).toEqual([ // Test branching
"<html>1. create</html>", expect(extractHistory("3", basicBranchingHistory)).toEqual([
"use better icons", "<html>1. create</html>",
"<html>2. edit with better icons</html>", "use better icons",
"make text green", "<html>2. edit with better icons</html>",
"<html>4. edit with better icons and green text</html>", "make text green",
"make text bold", "<html>4. edit with better icons and green text</html>",
"<html>5. edit with better icons and green, bold text</html>", ]);
]);
expect(extractHistoryTree(longerBranchingHistory, 2)).toEqual([ expect(extractHistory("4", longerBranchingHistory)).toEqual([
"<html>1. create</html>", "<html>1. create</html>",
"use better icons", "use better icons",
"<html>2. edit with better icons</html>", "<html>2. edit with better icons</html>",
"make text red", "make text green",
"<html>3. edit with better icons and red text</html>", "<html>4. edit with better icons and green text</html>",
]); "make text bold",
"<html>5. edit with better icons and green, bold text</html>",
]);
// Errors - TODO: Handle these expect(extractHistory("2", longerBranchingHistory)).toEqual([
// Bad index "<html>1. create</html>",
// TODO: Throw an exception instead? "use better icons",
expect(extractHistoryTree(basicLinearHistory, 100)).toEqual([]); "<html>2. edit with better icons</html>",
expect(extractHistoryTree(basicLinearHistory, -2)).toEqual([]); "make text red",
"<html>3. edit with better icons and red text</html>",
]);
// Bad tree // Errors
// Bad hash
expect(() => extractHistory("100", basicLinearHistory)).toThrow();
// Bad tree
expect(() => extractHistory("1", basicBadHistory)).toThrow();
});
test("should correctly render the history tree", () => {
expect(renderHistory(Object.values(basicLinearHistory))).toEqual([
{
...basicLinearHistory["0"],
type: "Create",
summary: "Create",
parentVersion: null,
},
{
...basicLinearHistory["1"],
type: "Edit",
summary: "use better icons",
parentVersion: null,
},
{
...basicLinearHistory["2"],
type: "Edit",
summary: "make text red",
parentVersion: null,
},
]);
// Render a history with code
expect(renderHistory(Object.values(basicLinearHistoryWithCode))).toEqual([
{
...basicLinearHistoryWithCode["0"],
type: "Imported from code",
summary: "Imported from code",
parentVersion: null,
},
{
...basicLinearHistoryWithCode["1"],
type: "Edit",
summary: "use better icons",
parentVersion: null,
},
{
...basicLinearHistoryWithCode["2"],
type: "Edit",
summary: "make text red",
parentVersion: null,
},
]);
// Render a non-linear history
expect(renderHistory(Object.values(basicBranchingHistory))).toEqual([
{
...basicBranchingHistory["0"],
type: "Create",
summary: "Create",
parentVersion: null,
},
{
...basicBranchingHistory["1"],
type: "Edit",
summary: "use better icons",
parentVersion: null,
},
{
...basicBranchingHistory["2"],
type: "Edit",
summary: "make text red",
parentVersion: null,
},
{
...basicBranchingHistory["3"],
type: "Edit",
summary: "make text green",
parentVersion: 2,
},
]);
});
}); });

View File

@ -1,32 +1,92 @@
import { History, HistoryItem } from "./history_types"; import { Commit, CommitHash, CommitType } from "../commits/types";
export function extractHistoryTree( export function extractHistory(
history: History, hash: CommitHash,
version: number commits: Record<CommitHash, Commit>
): string[] { ): string[] {
const flatHistory: string[] = []; const flatHistory: string[] = [];
let currentIndex: number | null = version; let currentCommitHash: CommitHash | null = hash;
while (currentIndex !== null) { while (currentCommitHash !== null) {
const item: HistoryItem = history[currentIndex]; const commit: Commit | null = commits[currentCommitHash];
if (item) { if (commit) {
if (item.type === "ai_create") { flatHistory.unshift(commit.variants[commit.selectedVariantIndex].code);
// Don't include the image for ai_create
flatHistory.unshift(item.code); // For edits, add the prompt to the history
} else { if (commit.type === "ai_edit") {
flatHistory.unshift(item.code); flatHistory.unshift(commit.inputs.prompt);
flatHistory.unshift(item.inputs.prompt);
} }
// Move to the parent of the current item // Move to the parent of the current item
currentIndex = item.parentIndex; currentCommitHash = commit.parentHash;
} else { } else {
// TODO: Throw an exception here? throw new Error("Malformed history: missing parent index");
// Break the loop if the item is not found (should not happen in a well-formed history)
break;
} }
} }
return flatHistory; return flatHistory;
} }
function displayHistoryItemType(itemType: CommitType) {
switch (itemType) {
case "ai_create":
return "Create";
case "ai_edit":
return "Edit";
case "code_create":
return "Imported from code";
default: {
const exhaustiveCheck: never = itemType;
throw new Error(`Unhandled case: ${exhaustiveCheck}`);
}
}
}
const setParentVersion = (commit: Commit, history: Commit[]) => {
// If the commit has no parent, return null
if (!commit.parentHash) return null;
const parentIndex = history.findIndex(
(item) => item.hash === commit.parentHash
);
const currentIndex = history.findIndex((item) => item.hash === commit.hash);
// Only set parent version if the parent is not the previous commit
// and parent exists
return parentIndex !== -1 && parentIndex != currentIndex - 1
? parentIndex + 1
: null;
};
export function summarizeHistoryItem(commit: Commit) {
const commitType = commit.type;
switch (commitType) {
case "ai_create":
return "Create";
case "ai_edit":
return commit.inputs.prompt;
case "code_create":
return "Imported from code";
default: {
const exhaustiveCheck: never = commitType;
throw new Error(`Unhandled case: ${exhaustiveCheck}`);
}
}
}
export const renderHistory = (history: Commit[]) => {
const renderedHistory = [];
for (let i = 0; i < history.length; i++) {
const commit = history[i];
renderedHistory.push({
...commit,
type: displayHistoryItemType(commit.type),
summary: summarizeHistoryItem(commit),
parentVersion: setParentVersion(commit, history),
});
}
return renderedHistory;
};

View File

@ -0,0 +1,16 @@
import React from "react";
interface DeprecationMessageProps {}
const DeprecationMessage: React.FC<DeprecationMessageProps> = () => {
return (
<div className="rounded-lg p-2 bg-fuchsia-200">
<p className="text-gray-800 text-sm">
We no longer support this model. Instead, code generation will use
GPT-4o or Claude Sonnet 3.5, the 2 state-of-the-art models.
</p>
</div>
);
};
export default DeprecationMessage;

View File

@ -0,0 +1,25 @@
export function PicoBadge() {
return (
<>
<a
href="https://screenshot-to-code.canny.io/feature-requests"
target="_blank"
>
<div
className="fixed z-50 bottom-16 right-5 rounded-md shadow bg-black
text-white px-4 text-xs py-3 cursor-pointer"
>
feature requests?
</div>
</a>
<a href="https://picoapps.xyz?ref=screenshot-to-code" target="_blank">
<div
className="fixed z-50 bottom-5 right-5 rounded-md shadow text-black
bg-white px-4 text-xs py-3 cursor-pointer"
>
an open source project by Pico
</div>
</a>
</>
);
}

View File

@ -0,0 +1,16 @@
import { URLS } from "../../urls";
function TipLink() {
return (
<a
className="text-xs underline text-gray-500 text-right"
href={URLS.tips}
target="_blank"
rel="noopener"
>
Tips for better results
</a>
);
}
export default TipLink;

View File

@ -1,7 +1,7 @@
import { FaCopy } from "react-icons/fa"; import { FaCopy } from "react-icons/fa";
import CodeMirror from "./CodeMirror"; import CodeMirror from "./CodeMirror";
import { Button } from "./ui/button"; import { Button } from "../ui/button";
import { Settings } from "../types"; import { Settings } from "../../types";
import copy from "copy-to-clipboard"; import copy from "copy-to-clipboard";
import { useCallback } from "react"; import { useCallback } from "react";
import toast from "react-hot-toast"; import toast from "react-hot-toast";

View File

@ -0,0 +1,56 @@
import { useEffect, useRef, useState } from "react";
import classNames from "classnames";
import useThrottle from "../../hooks/useThrottle";
import EditPopup from "../select-and-edit/EditPopup";
interface Props {
code: string;
device: "mobile" | "desktop";
doUpdate: (updateInstruction: string, selectedElement?: HTMLElement) => void;
}
function PreviewComponent({ code, device, doUpdate }: Props) {
const iframeRef = useRef<HTMLIFrameElement | null>(null);
// Don't update code more often than every 200ms.
const throttledCode = useThrottle(code, 200);
// Select and edit functionality
const [clickEvent, setClickEvent] = useState<MouseEvent | null>(null);
useEffect(() => {
const iframe = iframeRef.current;
if (iframe) {
iframe.srcdoc = throttledCode;
// Set up click handler for select and edit funtionality
iframe.addEventListener("load", function () {
iframe.contentWindow?.document.body.addEventListener(
"click",
setClickEvent
);
});
}
}, [throttledCode]);
return (
<div className="flex justify-center mx-2">
<iframe
id={`preview-${device}`}
ref={iframeRef}
title="Preview"
className={classNames(
"border-[4px] border-black rounded-[20px] shadow-lg",
"transform scale-[0.9] origin-top",
{
"w-full h-[832px]": device === "desktop",
"w-[400px] h-[832px]": device === "mobile",
}
)}
></iframe>
<EditPopup event={clickEvent} iframeRef={iframeRef} doUpdate={doUpdate} />
</div>
);
}
export default PreviewComponent;

View File

@ -0,0 +1,99 @@
import { Tabs, TabsList, TabsTrigger, TabsContent } from "../ui/tabs";
import {
FaUndo,
FaDownload,
FaDesktop,
FaMobile,
FaCode,
} from "react-icons/fa";
import { AppState, Settings } from "../../types";
import CodeTab from "./CodeTab";
import { Button } from "../ui/button";
import { useAppStore } from "../../store/app-store";
import { useProjectStore } from "../../store/project-store";
import { extractHtml } from "./extractHtml";
import PreviewComponent from "./PreviewComponent";
import { downloadCode } from "./download";
interface Props {
doUpdate: (instruction: string) => void;
reset: () => void;
settings: Settings;
}
function PreviewPane({ doUpdate, reset, settings }: Props) {
const { appState } = useAppStore();
const { inputMode, head, commits } = useProjectStore();
const currentCommit = head && commits[head] ? commits[head] : "";
const currentCode = currentCommit
? currentCommit.variants[currentCommit.selectedVariantIndex].code
: "";
const previewCode =
inputMode === "video" && appState === AppState.CODING
? extractHtml(currentCode)
: currentCode;
return (
<div className="ml-4">
<Tabs defaultValue="desktop">
<div className="flex justify-between mr-8 mb-4">
<div className="flex items-center gap-x-2">
{appState === AppState.CODE_READY && (
<>
<Button
onClick={reset}
className="flex items-center ml-4 gap-x-2 dark:text-white dark:bg-gray-700"
>
<FaUndo />
Reset
</Button>
<Button
onClick={() => downloadCode(previewCode)}
variant="secondary"
className="flex items-center gap-x-2 mr-4 dark:text-white dark:bg-gray-700 download-btn"
>
<FaDownload /> Download
</Button>
</>
)}
</div>
<div className="flex items-center">
<TabsList>
<TabsTrigger value="desktop" className="flex gap-x-2">
<FaDesktop /> Desktop
</TabsTrigger>
<TabsTrigger value="mobile" className="flex gap-x-2">
<FaMobile /> Mobile
</TabsTrigger>
<TabsTrigger value="code" className="flex gap-x-2">
<FaCode />
Code
</TabsTrigger>
</TabsList>
</div>
</div>
<TabsContent value="desktop">
<PreviewComponent
code={previewCode}
device="desktop"
doUpdate={doUpdate}
/>
</TabsContent>
<TabsContent value="mobile">
<PreviewComponent
code={previewCode}
device="mobile"
doUpdate={doUpdate}
/>
</TabsContent>
<TabsContent value="code">
<CodeTab code={previewCode} setCode={() => {}} settings={settings} />
</TabsContent>
</Tabs>
</div>
);
}
export default PreviewPane;

View File

@ -0,0 +1,16 @@
export const downloadCode = (code: string) => {
// Create a blob from the generated code
const blob = new Blob([code], { type: "text/html" });
const url = URL.createObjectURL(blob);
// Create an anchor element and set properties for download
const a = document.createElement("a");
a.href = url;
a.download = "index.html"; // Set the file name for download
document.body.appendChild(a); // Append to the document
a.click(); // Programmatically click the anchor to trigger download
// Clean up by removing the anchor and revoking the Blob URL
document.body.removeChild(a);
URL.revokeObjectURL(url);
};

View File

@ -0,0 +1,16 @@
// Not robust enough to support <html lang='en'> for instance
export function extractHtml(code: string): string {
const lastHtmlStartIndex = code.lastIndexOf("<html>");
let htmlEndIndex = code.indexOf("</html>", lastHtmlStartIndex);
if (lastHtmlStartIndex !== -1) {
// If "</html>" is found, adjust htmlEndIndex to include the "</html>" tag
if (htmlEndIndex !== -1) {
htmlEndIndex += "</html>".length;
return code.slice(lastHtmlStartIndex, htmlEndIndex);
}
// If "</html>" is not found, return the rest of the string starting from the last "<html>"
return code.slice(lastHtmlStartIndex);
}
return "";
}

View File

@ -0,0 +1,10 @@
export function simpleHash(str: string, seed = 0) {
let hash = seed;
for (let i = 0; i < str.length; i++) {
const char = str.charCodeAt(i);
hash = (hash << 5) - hash + char;
hash |= 0; // Convert to 32bit integer
}
return hash;
}

View File

@ -0,0 +1,145 @@
import { useState } from "react";
import { Button } from "../ui/button";
import { ScreenRecorderState } from "../../types";
import { blobToBase64DataUrl } from "./utils";
import fixWebmDuration from "webm-duration-fix";
import toast from "react-hot-toast";
interface Props {
screenRecorderState: ScreenRecorderState;
setScreenRecorderState: (state: ScreenRecorderState) => void;
generateCode: (
referenceImages: string[],
inputMode: "image" | "video"
) => void;
}
function ScreenRecorder({
screenRecorderState,
setScreenRecorderState,
generateCode,
}: Props) {
const [mediaStream, setMediaStream] = useState<MediaStream | null>(null);
const [mediaRecorder, setMediaRecorder] = useState<MediaRecorder | null>(
null
);
const [screenRecordingDataUrl, setScreenRecordingDataUrl] = useState<
string | null
>(null);
const startScreenRecording = async () => {
try {
// Get the screen recording stream
const stream = await navigator.mediaDevices.getDisplayMedia({
video: true,
audio: { echoCancellation: true },
});
setMediaStream(stream);
// TODO: Test across different browsers
// Create the media recorder
const options = { mimeType: "video/webm" };
const mediaRecorder = new MediaRecorder(stream, options);
setMediaRecorder(mediaRecorder);
const chunks: BlobPart[] = [];
// Accumalate chunks as data is available
mediaRecorder.ondataavailable = (e: BlobEvent) => chunks.push(e.data);
// When media recorder is stopped, create a data URL
mediaRecorder.onstop = async () => {
// TODO: Do I need to fix duration if it's not a webm?
const completeBlob = await fixWebmDuration(
new Blob(chunks, {
type: options.mimeType,
})
);
const dataUrl = await blobToBase64DataUrl(completeBlob);
setScreenRecordingDataUrl(dataUrl);
setScreenRecorderState(ScreenRecorderState.FINISHED);
};
// Start recording
mediaRecorder.start();
setScreenRecorderState(ScreenRecorderState.RECORDING);
} catch (error) {
toast.error("Could not start screen recording");
throw error;
}
};
const stopScreenRecording = () => {
// Stop the recorder
if (mediaRecorder) {
mediaRecorder.stop();
setMediaRecorder(null);
}
// Stop the screen sharing stream
if (mediaStream) {
mediaStream.getTracks().forEach((track) => {
track.stop();
});
}
};
const kickoffGeneration = () => {
if (screenRecordingDataUrl) {
generateCode([screenRecordingDataUrl], "video");
} else {
toast.error("Screen recording does not exist. Please try again.");
throw new Error("No screen recording data url");
}
};
return (
<div className="flex items-center justify-center my-3">
{screenRecorderState === ScreenRecorderState.INITIAL && (
<Button onClick={startScreenRecording}>Record Screen</Button>
)}
{screenRecorderState === ScreenRecorderState.RECORDING && (
<div className="flex items-center flex-col gap-y-4">
<div className="flex items-center mr-2 text-xl gap-x-1">
<span className="block h-10 w-10 bg-red-600 rounded-full mr-1 animate-pulse"></span>
<span>Recording...</span>
</div>
<Button onClick={stopScreenRecording}>Finish Recording</Button>
</div>
)}
{screenRecorderState === ScreenRecorderState.FINISHED && (
<div className="flex items-center flex-col gap-y-4">
<div className="flex items-center mr-2 text-xl gap-x-1">
<span>Screen Recording Captured.</span>
</div>
{screenRecordingDataUrl && (
<video
muted
autoPlay
loop
className="w-[340px] border border-gray-200 rounded-md"
src={screenRecordingDataUrl}
/>
)}
<div className="flex gap-x-2">
<Button
variant="secondary"
onClick={() =>
setScreenRecorderState(ScreenRecorderState.INITIAL)
}
>
Re-record
</Button>
<Button onClick={kickoffGeneration}>Generate</Button>
</div>
</div>
)}
</div>
);
}
export default ScreenRecorder;

View File

@ -0,0 +1,31 @@
export function downloadBlob(blob: Blob) {
// Create a URL for the blob object
const videoURL = URL.createObjectURL(blob);
// Create a temporary anchor element and trigger the download
const a = document.createElement("a");
a.href = videoURL;
a.download = "recording.webm";
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
// Clear object URL
URL.revokeObjectURL(videoURL);
}
export function blobToBase64DataUrl(blob: Blob): Promise<string> {
return new Promise((resolve, reject) => {
const reader = new FileReader();
reader.onloadend = () => {
if (reader.result) {
resolve(reader.result as string);
} else {
reject(new Error("FileReader did not return a result."));
}
};
reader.onerror = () =>
reject(new Error("FileReader encountered an error."));
reader.readAsDataURL(blob);
});
}

View File

@ -0,0 +1,150 @@
import React, { useEffect, useRef, useState } from "react";
import { Textarea } from "../ui/textarea";
import { Button } from "../ui/button";
import { addHighlight, getAdjustedCoordinates, removeHighlight } from "./utils";
import { useAppStore } from "../../store/app-store";
import KeyboardShortcutBadge from "../core/KeyboardShortcutBadge";
interface EditPopupProps {
event: MouseEvent | null;
iframeRef: React.RefObject<HTMLIFrameElement>;
doUpdate: (updateInstruction: string, selectedElement?: HTMLElement) => void;
}
const EditPopup: React.FC<EditPopupProps> = ({
event,
iframeRef,
doUpdate,
}) => {
// App state
const { inSelectAndEditMode } = useAppStore();
// Create a wrapper ref to store inSelectAndEditMode so the value is not stale
// in a event listener
const inSelectAndEditModeRef = useRef(inSelectAndEditMode);
// Update the ref whenever the state changes
useEffect(() => {
inSelectAndEditModeRef.current = inSelectAndEditMode;
}, [inSelectAndEditMode]);
// Popup state
const [popupVisible, setPopupVisible] = useState(false);
const [popupPosition, setPopupPosition] = useState({ x: 0, y: 0 });
// Edit state
const [selectedElement, setSelectedElement] = useState<
HTMLElement | undefined
>(undefined);
const [updateText, setUpdateText] = useState("");
// Textarea ref for focusing
const textareaRef = useRef<HTMLTextAreaElement | null>(null);
function onUpdate(updateText: string) {
// Perform the update
doUpdate(
updateText,
selectedElement ? removeHighlight(selectedElement) : selectedElement
);
// Unselect the element
setSelectedElement(undefined);
// Hide the popup
setPopupVisible(false);
}
// Remove highlight and reset state when not in select and edit mode
useEffect(() => {
if (!inSelectAndEditMode) {
if (selectedElement) removeHighlight(selectedElement);
setSelectedElement(undefined);
setPopupVisible(false);
}
}, [inSelectAndEditMode, selectedElement]);
// Handle the click event
useEffect(() => {
// Return if not in select and edit mode
if (!inSelectAndEditModeRef.current || !event) {
return;
}
// Prevent default to avoid issues like label clicks triggering textareas, etc.
event.preventDefault();
const targetElement = event.target as HTMLElement;
// Return if no target element
if (!targetElement) return;
// Highlight and set the selected element
setSelectedElement((prev) => {
// Remove style from previous element
if (prev) {
removeHighlight(prev);
}
return addHighlight(targetElement);
});
// Calculate adjusted coordinates
const adjustedCoordinates = getAdjustedCoordinates(
event.clientX,
event.clientY,
iframeRef.current?.getBoundingClientRect()
);
// Show the popup at the click position
setPopupVisible(true);
setPopupPosition({ x: adjustedCoordinates.x, y: adjustedCoordinates.y });
// Reset the update text
setUpdateText("");
// Focus the textarea
textareaRef.current?.focus();
}, [event, iframeRef]);
// Focus the textarea when the popup is visible (we can't do this only when handling the click event
// because the textarea is not rendered yet)
// We need to also do it in the click event because popupVisible doesn't change values in that event
useEffect(() => {
if (popupVisible) {
textareaRef.current?.focus();
}
}, [popupVisible]);
if (!popupVisible) return;
return (
<div
className="absolute bg-white dark:bg-gray-800 p-4 border border-gray-300 dark:border-gray-600 rounded shadow-lg w-60"
style={{ top: popupPosition.y, left: popupPosition.x }}
>
<Textarea
ref={textareaRef}
value={updateText}
onChange={(e) => setUpdateText(e.target.value)}
placeholder="Tell the AI what to change about this element..."
className="dark:bg-gray-700 dark:text-white"
onKeyDown={(e) => {
if (e.key === "Enter") {
e.preventDefault();
onUpdate(updateText);
}
}}
/>
<div className="flex justify-end mt-2">
<Button
className="dark:bg-gray-700 dark:text-white"
onClick={() => onUpdate(updateText)}
>
Update <KeyboardShortcutBadge letter="enter" />
</Button>
</div>
</div>
);
};
export default EditPopup;

Some files were not shown because too many files have changed in this diff Show More