diff --git a/backend/codegen/__init__.py b/backend/codegen/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/codegen/test_utils.py b/backend/codegen/test_utils.py new file mode 100644 index 0000000..cd663f8 --- /dev/null +++ b/backend/codegen/test_utils.py @@ -0,0 +1,57 @@ +import unittest +from codegen.utils import extract_html_content + + +class TestUtils(unittest.TestCase): + + def test_extract_html_content_with_html_tags(self): + text = "
Hello, World!
" + expected = "Hello, World!
" + result = extract_html_content(text) + self.assertEqual(result, expected) + + def test_extract_html_content_without_html_tags(self): + text = "No HTML content here." + expected = "No HTML content here." + result = extract_html_content(text) + self.assertEqual(result, expected) + + def test_extract_html_content_with_partial_html_tags(self): + text = "Hello, World!
" + expected = "Hello, World!
" + result = extract_html_content(text) + self.assertEqual(result, expected) + + def test_extract_html_content_with_multiple_html_tags(self): + text = "First
Some textSecond
" + expected = "First
" + result = extract_html_content(text) + self.assertEqual(result, expected) + + ## The following are tests based on actual LLM outputs + + def test_extract_html_content_some_explanation_before(self): + text = """Got it! You want the song list to be displayed horizontally. I'll update the code to ensure that the song list is displayed in a horizontal layout. + + Here's the updated code: + + """ + expected = '' + result = extract_html_content(text) + self.assertEqual(result, expected) + + def test_markdown_tags(self): + text = "```html```" + expected = "```html```" + result = extract_html_content(text) + self.assertEqual(result, expected) + + def test_doctype_text(self): + text = '' + expected = '' + result = extract_html_content(text) + self.assertEqual(result, expected) + + +if __name__ == "__main__": + unittest.main() diff --git a/backend/codegen/utils.py b/backend/codegen/utils.py new file mode 100644 index 0000000..107579b --- /dev/null +++ b/backend/codegen/utils.py @@ -0,0 +1,14 @@ +import re + + +def extract_html_content(text: str): + # Use regex to find content within tags and include the tags themselves + match = re.search(r"(