from kag.common.utils import extract_tag_content def run_extra_tag(): test_cases = [ { "input": "abcedsome wordother tags", "expected": [("tag1", "abced"), ("", "some word"), ("tag2", "other tags")], "description": "基本闭合标签与无标签文本混合", }, { "input": "

Hello world this is test", "expected": [ ("p", "Hello "), ("b", "world"), ("", " this is "), ("i", "test"), ], "description": "混合闭合与未闭合标签", }, { "input": "plain text without any tags", "expected": [("", "plain text without any tags")], "description": "纯文本无标签", }, { "input": "

\n Line 1\n Line 2\n Line 3\n
", "expected": [ ("div", "\n Line 1\n Line 2\n Line 3\n") ], "description": "多行内容和空白处理", }, { "input": "ABC", "expected": [("a", "A"), ("b", "B"), ("c", "C")], "description": "连续多个闭合标签", }, { "input": "My DocumentThis is the content", "expected": [("title", "My Document"), ("content", "This is the content")], "description": "未闭合标签(EOF结尾)", }, { "input": "Error: &*^%$#@!;End of log", "expected": [("log", "Error: &*^%$#@!;"), ("note", "End of log")], "description": "含特殊字符的内容", }, { "input": "", "expected": [], "description": "空字符串输入", }, ] for i, test in enumerate(test_cases): result = extract_tag_content(test["input"]) assert ( result == test["expected"] ), f"Test {i+1} failed: {test['description']}\nGot: {result}\nExpected: {test['expected']}" print(f"Test {i+1} passed: {test['description']}") if __name__ == "__main__": run_extra_tag()