screenshot-to-code/backend/codegen/utils.py
2024-06-27 12:33:55 +08:00

21 lines
608 B
Python

import re
import sentry_sdk
def extract_html_content(text: str):
# Use regex to find content within <html> tags and include the tags themselves
match = re.search(r"(<html.*?>.*?</html>)", text, re.DOTALL)
if match:
return match.group(1)
else:
# Otherwise, we just send the previous HTML over
print(
"[HTML Extraction] No <html> tags found in the generated content: " + text
)
try:
raise Exception("No <html> tags found in the generated content")
except:
sentry_sdk.capture_exception()
return text