chore: update remove_leading_symbols pattern, keep 【 (#29419)

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
wangxiaolei 2025-12-11 09:47:39 +08:00 committed by GitHub
parent b4afc7e435
commit d152d63e7d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 2 additions and 1 deletions

View File

@ -13,5 +13,5 @@ def remove_leading_symbols(text: str) -> str:
"""
# Match Unicode ranges for punctuation and symbols
# FIXME this pattern is confused quick fix for #11868 maybe refactor it later
pattern = r"^[\u2000-\u206F\u2E00-\u2E7F\u3000-\u303F\"#$%&'()*+,./:;<=>?@^_`~]+"
pattern = r'^[\[\]\u2000-\u2025\u2027-\u206F\u2E00-\u2E7F\u3000-\u300F\u3011-\u303F"#$%&\'()*+,./:;<=>?@^_`~]+'
return re.sub(pattern, "", text)

View File

@ -14,6 +14,7 @@ from core.tools.utils.text_processing_utils import remove_leading_symbols
("Hello, World!", "Hello, World!"),
("", ""),
(" ", " "),
("【测试】", "【测试】"),
],
)
def test_remove_leading_symbols(input_text, expected_output):