sync from upstream
This commit is contained in:
parent
e9c6756c2e
commit
101d1738b6
105
backend/llm.py
105
backend/llm.py
@ -62,7 +62,7 @@ async def stream_openai_response(
|
|||||||
model: Llm,
|
model: Llm,
|
||||||
) -> str:
|
) -> str:
|
||||||
client = AsyncOpenAI(api_key=api_key, base_url=base_url)
|
client = AsyncOpenAI(api_key=api_key, base_url=base_url)
|
||||||
|
print(f"--stream_openai_response--{model}")
|
||||||
# Base parameters
|
# Base parameters
|
||||||
params = {
|
params = {
|
||||||
"model": model.value,
|
"model": model.value,
|
||||||
@ -105,9 +105,9 @@ async def stream_claude_response(
|
|||||||
callback: Callable[[str], Awaitable[None]],
|
callback: Callable[[str], Awaitable[None]],
|
||||||
model: Llm,
|
model: Llm,
|
||||||
) -> str:
|
) -> str:
|
||||||
|
print(f"--stream_openai_response--{model}")
|
||||||
# client = AsyncAnthropic(api_key=api_key)
|
# client = AsyncAnthropic(api_key=api_key)
|
||||||
|
modelId = BEDROCK_LLM_MODELID_LIST[model]
|
||||||
# Base parameters
|
# Base parameters
|
||||||
max_tokens = 4096
|
max_tokens = 4096
|
||||||
temperature = 0.0
|
temperature = 0.0
|
||||||
@ -256,17 +256,6 @@ async def stream_claude_response_native(
|
|||||||
response_text += chunk_obj['delta']['text']
|
response_text += chunk_obj['delta']['text']
|
||||||
await callback(chunk_obj['delta']['text'])
|
await callback(chunk_obj['delta']['text'])
|
||||||
|
|
||||||
# if chunk_obj['type'] == 'content_block_delta':
|
|
||||||
# print(f"\nStop reason: {chunk_obj['delta']['stop_reason']}")
|
|
||||||
# print(f"Stop sequence: {chunk_obj['delta']['stop_sequence']}")
|
|
||||||
# print(f"Output tokens: {chunk_obj['usage']['output_tokens']}")
|
|
||||||
# output_tokens = chunk_obj['usage']['output_tokens']
|
|
||||||
# # input_tokens = chunk_obj['usage']['input_tokens']
|
|
||||||
# # input_tokens = chunk_obj['amazon-bedrock-invocationMetrics']['inputTokenCount']
|
|
||||||
# if chunk_obj['type'] == 'text_delta':
|
|
||||||
# print(chunk_obj['delta']['text'])
|
|
||||||
# response_text += chunk_obj['delta']['text']
|
|
||||||
# await callback(chunk_obj['delta']['text'])
|
|
||||||
print(response_text)
|
print(response_text)
|
||||||
|
|
||||||
# Write each pass's code to .html file and thinking to .txt file
|
# Write each pass's code to .html file and thinking to .txt file
|
||||||
@ -304,91 +293,3 @@ async def stream_claude_response_native(
|
|||||||
raise Exception("No HTML response found in AI response")
|
raise Exception("No HTML response found in AI response")
|
||||||
else:
|
else:
|
||||||
return response_text
|
return response_text
|
||||||
|
|
||||||
async def stream_claude_response_native_bak(
|
|
||||||
system_prompt: str,
|
|
||||||
messages: list[Any],
|
|
||||||
api_key: str,
|
|
||||||
callback: Callable[[str], Awaitable[None]],
|
|
||||||
include_thinking: bool = False,
|
|
||||||
model: Llm = Llm.CLAUDE_3_OPUS,
|
|
||||||
) -> str:
|
|
||||||
|
|
||||||
client = AsyncAnthropic(api_key=api_key)
|
|
||||||
|
|
||||||
# Base model parameters
|
|
||||||
max_tokens = 4096
|
|
||||||
temperature = 0.0
|
|
||||||
|
|
||||||
# Multi-pass flow
|
|
||||||
current_pass_num = 1
|
|
||||||
max_passes = 2
|
|
||||||
|
|
||||||
prefix = "<thinking>"
|
|
||||||
response = None
|
|
||||||
|
|
||||||
# For debugging
|
|
||||||
full_stream = ""
|
|
||||||
debug_file_writer = DebugFileWriter()
|
|
||||||
|
|
||||||
while current_pass_num <= max_passes:
|
|
||||||
current_pass_num += 1
|
|
||||||
|
|
||||||
# Set up message depending on whether we have a <thinking> prefix
|
|
||||||
messages_to_send = (
|
|
||||||
messages + [{"role": "assistant", "content": prefix}]
|
|
||||||
if include_thinking
|
|
||||||
else messages
|
|
||||||
)
|
|
||||||
|
|
||||||
pprint_prompt(messages_to_send)
|
|
||||||
|
|
||||||
async with client.messages.stream(
|
|
||||||
model=model.value,
|
|
||||||
max_tokens=max_tokens,
|
|
||||||
temperature=temperature,
|
|
||||||
system=system_prompt,
|
|
||||||
messages=messages_to_send, # type: ignore
|
|
||||||
) as stream:
|
|
||||||
async for text in stream.text_stream:
|
|
||||||
print(text, end="", flush=True)
|
|
||||||
full_stream += text
|
|
||||||
await callback(text)
|
|
||||||
|
|
||||||
response = await stream.get_final_message()
|
|
||||||
response_text = response.content[0].text
|
|
||||||
|
|
||||||
# Write each pass's code to .html file and thinking to .txt file
|
|
||||||
if IS_DEBUG_ENABLED:
|
|
||||||
debug_file_writer.write_to_file(
|
|
||||||
f"pass_{current_pass_num - 1}.html",
|
|
||||||
debug_file_writer.extract_html_content(response_text),
|
|
||||||
)
|
|
||||||
debug_file_writer.write_to_file(
|
|
||||||
f"thinking_pass_{current_pass_num - 1}.txt",
|
|
||||||
response_text.split("</thinking>")[0],
|
|
||||||
)
|
|
||||||
|
|
||||||
# Set up messages array for next pass
|
|
||||||
messages += [
|
|
||||||
{"role": "assistant", "content": str(prefix) + response.content[0].text},
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "You've done a good job with a first draft. Improve this further based on the original instructions so that the app is fully functional and looks like the original video of the app we're trying to replicate.",
|
|
||||||
},
|
|
||||||
]
|
|
||||||
|
|
||||||
print(
|
|
||||||
f"Token usage: Input Tokens: {response.usage.input_tokens}, Output Tokens: {response.usage.output_tokens}"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Close the Anthropic client
|
|
||||||
await client.close()
|
|
||||||
|
|
||||||
if IS_DEBUG_ENABLED:
|
|
||||||
debug_file_writer.write_to_file("full_stream.txt", full_stream)
|
|
||||||
|
|
||||||
if not response:
|
|
||||||
raise Exception("No HTML response found in AI response")
|
|
||||||
else:
|
|
||||||
return response.content[0].text
|
|
||||||
|
|||||||
@ -441,7 +441,7 @@ function App() {
|
|||||||
2x faster. Give it a try!
|
2x faster. Give it a try!
|
||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
)} */}
|
)}
|
||||||
|
|
||||||
{appState !== AppState.CODE_READY && <TipLink />}
|
{appState !== AppState.CODE_READY && <TipLink />}
|
||||||
|
|
||||||
|
|||||||
@ -1,10 +1,10 @@
|
|||||||
// Keep in sync with backend (llm.py)
|
// Keep in sync with backend (llm.py)
|
||||||
// Order here matches dropdown order
|
// Order here matches dropdown order
|
||||||
export enum CodeGenerationModel {
|
export enum CodeGenerationModel {
|
||||||
CLAUDE_3_5_SONNET = "claude_3_5_sonnet",
|
CLAUDE_3_5_SONNET_2024_06_20 = "claude-3-5-sonnet-20240620",
|
||||||
|
// CLAUDE_3_5_SONNET = "claude_3_5_sonnet",
|
||||||
CLAUDE_3_SONNET = "claude_3_sonnet",
|
CLAUDE_3_SONNET = "claude_3_sonnet",
|
||||||
GPT_4O_2024_05_13 = "gpt-4o-2024-05-13",
|
GPT_4O_2024_05_13 = "gpt-4o-2024-05-13",
|
||||||
CLAUDE_3_5_SONNET_2024_06_20 = "claude-3-5-sonnet-20240620",
|
|
||||||
GPT_4_TURBO_2024_04_09 = "gpt-4-turbo-2024-04-09",
|
GPT_4_TURBO_2024_04_09 = "gpt-4-turbo-2024-04-09",
|
||||||
GPT_4_VISION = "gpt_4_vision",
|
GPT_4_VISION = "gpt_4_vision",
|
||||||
}
|
}
|
||||||
@ -13,10 +13,10 @@ export enum CodeGenerationModel {
|
|||||||
export const CODE_GENERATION_MODEL_DESCRIPTIONS: {
|
export const CODE_GENERATION_MODEL_DESCRIPTIONS: {
|
||||||
[key in CodeGenerationModel]: { name: string; inBeta: boolean };
|
[key in CodeGenerationModel]: { name: string; inBeta: boolean };
|
||||||
} = {
|
} = {
|
||||||
"gpt-4o-2024-05-13": { name: "GPT-4o 🌟", inBeta: false },
|
|
||||||
"claude-3-5-sonnet-20240620": { name: "Claude 3.5 Sonnet 🌟", inBeta: false },
|
"claude-3-5-sonnet-20240620": { name: "Claude 3.5 Sonnet 🌟", inBeta: false },
|
||||||
|
"gpt-4o-2024-05-13": { name: "GPT-4o 🌟", inBeta: false },
|
||||||
"gpt-4-turbo-2024-04-09": { name: "GPT-4 Turbo (Apr 2024)", inBeta: false },
|
"gpt-4-turbo-2024-04-09": { name: "GPT-4 Turbo (Apr 2024)", inBeta: false },
|
||||||
gpt_4_vision: { name: "GPT-4 Vision (Nov 2023)", inBeta: false },
|
gpt_4_vision: { name: "GPT-4 Vision (Nov 2023)", inBeta: false },
|
||||||
claude_3_sonnet: { name: "Claude 3 Sonnet", inBeta: false },
|
claude_3_sonnet: { name: "Claude 3 Sonnet", inBeta: false },
|
||||||
claude_3_5_sonnet: { name: "Claude 3.5 Sonnet", inBeta: false },
|
// claude_3_5_sonnet: { name: "Claude 3.5 Sonnet", inBeta: false },
|
||||||
};
|
};
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user