mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-04 02:46:33 +00:00
49 lines
1.3 KiB
Python
49 lines
1.3 KiB
Python
#!/usr/bin/env python3
|
|
|
|
"""
|
|
Check whether given python files contain non-ASCII comments.
|
|
|
|
How to check the whole git repo:
|
|
|
|
```
|
|
$ git ls-files -z -- '*.py' | xargs -0 python3 check_comment_ascii.py
|
|
```
|
|
"""
|
|
|
|
import sys
|
|
import tokenize
|
|
import ast
|
|
import pathlib
|
|
import re
|
|
|
|
ASCII = re.compile(r"^[\n -~]*\Z") # Printable ASCII + newline
|
|
|
|
|
|
def check(src: str, name: str) -> int:
|
|
"""
|
|
docstring line 1
|
|
docstring line 2
|
|
"""
|
|
ok = 1
|
|
# A common comment begins with `#`
|
|
with tokenize.open(src) as fp:
|
|
for tk in tokenize.generate_tokens(fp.readline):
|
|
if tk.type == tokenize.COMMENT and not ASCII.fullmatch(tk.string):
|
|
print(f"{name}:{tk.start[0]}: non-ASCII comment: {tk.string}")
|
|
ok = 0
|
|
# A docstring begins and ends with `'''`
|
|
for node in ast.walk(ast.parse(pathlib.Path(src).read_text(), filename=name)):
|
|
if isinstance(node, (ast.FunctionDef, ast.ClassDef, ast.Module)):
|
|
if (doc := ast.get_docstring(node)) and not ASCII.fullmatch(doc):
|
|
print(f"{name}:{node.lineno}: non-ASCII docstring: {doc}")
|
|
ok = 0
|
|
return ok
|
|
|
|
|
|
if __name__ == "__main__":
|
|
status = 0
|
|
for file in sys.argv[1:]:
|
|
if not check(file, file):
|
|
status = 1
|
|
sys.exit(status)
|