21 lines
608 B
Python
21 lines
608 B
Python
import re
|
|
|
|
import sentry_sdk
|
|
|
|
|
|
def extract_html_content(text: str):
|
|
# Use regex to find content within <html> tags and include the tags themselves
|
|
match = re.search(r"(<html.*?>.*?</html>)", text, re.DOTALL)
|
|
if match:
|
|
return match.group(1)
|
|
else:
|
|
# Otherwise, we just send the previous HTML over
|
|
print(
|
|
"[HTML Extraction] No <html> tags found in the generated content: " + text
|
|
)
|
|
try:
|
|
raise Exception("No <html> tags found in the generated content")
|
|
except:
|
|
sentry_sdk.capture_exception()
|
|
return text
|