diff --git a/deepdoc/parser/excel_parser.py b/deepdoc/parser/excel_parser.py index 2c3e67757..736ac32ef 100644 --- a/deepdoc/parser/excel_parser.py +++ b/deepdoc/parser/excel_parser.py @@ -69,7 +69,7 @@ class RAGFlowExcelParser: if fnm.split(".")[-1].lower() in ["csv", "txt"]: encoding = find_codec(binary) - txt = binary.decode(encoding) + txt = binary.decode(encoding, errors="ignore") return len(txt.split("\n")) diff --git a/rag/app/book.py b/rag/app/book.py index 70aee29c2..c4bc62abf 100644 --- a/rag/app/book.py +++ b/rag/app/book.py @@ -91,7 +91,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, txt = "" if binary: encoding = find_codec(binary) - txt = binary.decode(encoding) + txt = binary.decode(encoding, errors="ignore") else: with open(filename, "r") as f: while True: diff --git a/rag/app/laws.py b/rag/app/laws.py index 473eca9c7..6361d62cb 100644 --- a/rag/app/laws.py +++ b/rag/app/laws.py @@ -113,7 +113,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, txt = "" if binary: encoding = find_codec(binary) - txt = binary.decode(encoding) + txt = binary.decode(encoding, errors="ignore") else: with open(filename, "r") as f: while True: diff --git a/rag/app/naive.py b/rag/app/naive.py index 55fab84c2..c557a6267 100644 --- a/rag/app/naive.py +++ b/rag/app/naive.py @@ -141,7 +141,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, txt = "" if binary: encoding = find_codec(binary) - txt = binary.decode(encoding) + txt = binary.decode(encoding, errors="ignore") else: with open(filename, "r") as f: while True: diff --git a/rag/app/one.py b/rag/app/one.py index f5c78f5aa..531fd0a70 100644 --- a/rag/app/one.py +++ b/rag/app/one.py @@ -85,7 +85,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, txt = "" if binary: encoding = find_codec(binary) - txt = binary.decode(encoding) + txt = binary.decode(encoding, errors="ignore") else: with open(filename, "r") as f: while True: diff --git a/rag/app/qa.py b/rag/app/qa.py index a37ff63fd..1ecf9b187 100644 --- a/rag/app/qa.py +++ b/rag/app/qa.py @@ -107,7 +107,7 @@ def chunk(filename, binary=None, lang="Chinese", callback=None, **kwargs): txt = "" if binary: encoding = find_codec(binary) - txt = binary.decode(encoding) + txt = binary.decode(encoding, errors="ignore") else: with open(filename, "r") as f: while True: diff --git a/rag/app/table.py b/rag/app/table.py index 96a53aac4..368d1ce85 100644 --- a/rag/app/table.py +++ b/rag/app/table.py @@ -149,7 +149,7 @@ def chunk(filename, binary=None, from_page=0, to_page=10000000000, txt = "" if binary: encoding = find_codec(binary) - txt = binary.decode(encoding) + txt = binary.decode(encoding, errors="ignore") else: with open(filename, "r") as f: while True: