From 28d33a4d7e768034fc35a01b6269fe0c7e2e0161 Mon Sep 17 00:00:00 2001
From: Abi Raja <abimanyuraja@gmail.com>
Date: Thu, 7 Mar 2024 17:31:39 -0500
Subject: [PATCH] do multiple passes for video claude

---
 backend/llm.py | 71 ++++++++++++++++++++++++++++++++++----------------
 1 file changed, 48 insertions(+), 23 deletions(-)
diff --git a/backend/llm.py b/backend/llm.py
index 2be7792..b9be56d 100644
--- a/backend/llm.py
+++ b/backend/llm.py
@@ -3,6 +3,8 @@ from anthropic import AsyncAnthropic
 from openai import AsyncOpenAI
 from openai.types.chat import ChatCompletionMessageParam, ChatCompletionChunk
 
+from utils import pprint_prompt
+
 MODEL_GPT_4_VISION = "gpt-4-vision-preview"
 MODEL_CLAUDE_SONNET = "claude-3-sonnet-20240229"
 MODEL_CLAUDE_OPUS = "claude-3-opus-20240229"
@@ -113,34 +115,57 @@ async def stream_claude_response_native(
 
     client = AsyncAnthropic(api_key=api_key)
 
-    # Base parameters
+    # Base model parameters
     max_tokens = 4096
     temperature = 0.0
 
-    # Stream Claude response
+    # Multi-pass flow
+    current_pass_num = 1
+    max_passes = 2
 
-    # Set up message depending on whether we have a <thinking> prefix
-    messages = (
-        messages + [{"role": "assistant", "content": "<thinking>"}]
-        if include_thinking
-        else messages
-    )
+    prefix = "<thinking>"
+    response = None
 
-    async with client.messages.stream(
-        model=model,
-        max_tokens=max_tokens,
-        temperature=temperature,
-        system=system_prompt,
-        messages=messages,  # type: ignore
-    ) as stream:
-        async for text in stream.text_stream:
-            await callback(text)
+    while current_pass_num <= max_passes:
+        current_pass_num += 1
 
-    # Return final message
-    response = await stream.get_final_message()
+        # Set up message depending on whether we have a <thinking> prefix
+        messages_to_send = (
+            messages + [{"role": "assistant", "content": prefix}]
+            if include_thinking
+            else messages
+        )
 
-    print(
-        f"Token usage: Input Tokens: {response.usage.input_tokens}, Output Tokens: {response.usage.output_tokens}"
-    )
+        pprint_prompt(messages_to_send)
 
-    return response.content[0].text
+        async with client.messages.stream(
+            model=model,
+            max_tokens=max_tokens,
+            temperature=temperature,
+            system=system_prompt,
+            messages=messages_to_send,  # type: ignore
+        ) as stream:
+            async for text in stream.text_stream:
+                print(text, end="", flush=True)
+                await callback(text)
+
+        # Return final message
+        response = await stream.get_final_message()
+
+        # Set up messages array for next pass
+        messages += [
+            {"role": "assistant", "content": str(prefix) + response.content[0].text},
+            {
+                "role": "user",
+                "content": "You've done a good job with a first draft. Improve this further based on the original instructions so that the app is fully functional and looks like the original video of the app we're trying to replicate.",
+            },
+        ]
+
+        print(
+            f"Token usage: Input Tokens: {response.usage.input_tokens}, Output Tokens: {response.usage.output_tokens}"
+        )
+
+    if not response:
+        raise Exception("No HTML response found in AI response")
+    else:
+        return response.content[0].text