mirror of
https://github.com/HKUDS/LightRAG.git
synced 2025-11-13 16:43:53 +00:00
Remove space between chinese chars and Egnlish symbols
This commit is contained in:
parent
ff65cba544
commit
17f5439952
@ -1448,9 +1448,9 @@ def normalize_extracted_info(name: str, is_entity=False) -> str:
|
|||||||
# (?=[\u4e00-\u9fa5]): Positive lookahead for Chinese character
|
# (?=[\u4e00-\u9fa5]): Positive lookahead for Chinese character
|
||||||
name = re.sub(r"(?<=[\u4e00-\u9fa5])\s+(?=[\u4e00-\u9fa5])", "", name)
|
name = re.sub(r"(?<=[\u4e00-\u9fa5])\s+(?=[\u4e00-\u9fa5])", "", name)
|
||||||
|
|
||||||
# Remove spaces between Chinese and English/numbers
|
# Remove spaces between Chinese and English/numbers/symbols
|
||||||
name = re.sub(r"(?<=[\u4e00-\u9fa5])\s+(?=[a-zA-Z0-9])", "", name)
|
name = re.sub(r"(?<=[\u4e00-\u9fa5])\s+(?=[a-zA-Z0-9\(\)\[\]@#$%!&\*\-=+_])", "", name)
|
||||||
name = re.sub(r"(?<=[a-zA-Z0-9])\s+(?=[\u4e00-\u9fa5])", "", name)
|
name = re.sub(r"(?<=[a-zA-Z0-9\(\)\[\]@#$%!&\*\-=+_])\s+(?=[\u4e00-\u9fa5])", "", name)
|
||||||
|
|
||||||
# Remove English quotation marks from the beginning and end
|
# Remove English quotation marks from the beginning and end
|
||||||
if len(name) >= 2 and name.startswith('"') and name.endswith('"'):
|
if len(name) >= 2 and name.startswith('"') and name.endswith('"'):
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user