from kag.common.utils import extract_tag_content
def run_extra_tag():
test_cases = [
{
"input": "abcedsome wordother tags",
"expected": [("tag1", "abced"), ("", "some word"), ("tag2", "other tags")],
"description": "基本闭合标签与无标签文本混合",
},
{
"input": "
Hello world this is test",
"expected": [
("p", "Hello "),
("b", "world"),
("", " this is "),
("i", "test"),
],
"description": "混合闭合与未闭合标签",
},
{
"input": "plain text without any tags",
"expected": [("", "plain text without any tags")],
"description": "纯文本无标签",
},
{
"input": "
\n Line 1\n Line 2\n Line 3\n
",
"expected": [
("div", "\n Line 1\n Line 2\n Line 3\n")
],
"description": "多行内容和空白处理",
},
{
"input": "ABC",
"expected": [("a", "A"), ("b", "B"), ("c", "C")],
"description": "连续多个闭合标签",
},
{
"input": "My DocumentThis is the content",
"expected": [("title", "My Document"), ("content", "This is the content")],
"description": "未闭合标签(EOF结尾)",
},
{
"input": "Error: &*^%$#@!;End of log",
"expected": [("log", "Error: &*^%$#@!;"), ("note", "End of log")],
"description": "含特殊字符的内容",
},
{
"input": "",
"expected": [],
"description": "空字符串输入",
},
]
for i, test in enumerate(test_cases):
result = extract_tag_content(test["input"])
assert (
result == test["expected"]
), f"Test {i+1} failed: {test['description']}\nGot: {result}\nExpected: {test['expected']}"
print(f"Test {i+1} passed: {test['description']}")
if __name__ == "__main__":
run_extra_tag()