mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-11-13 16:44:07 +00:00
refine text decode (#657)
### What problem does this PR solve? #651 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
parent
de839fc3f0
commit
7013d7f620
@ -69,7 +69,7 @@ class RAGFlowExcelParser:
|
|||||||
|
|
||||||
if fnm.split(".")[-1].lower() in ["csv", "txt"]:
|
if fnm.split(".")[-1].lower() in ["csv", "txt"]:
|
||||||
encoding = find_codec(binary)
|
encoding = find_codec(binary)
|
||||||
txt = binary.decode(encoding)
|
txt = binary.decode(encoding, errors="ignore")
|
||||||
return len(txt.split("\n"))
|
return len(txt.split("\n"))
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -91,7 +91,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
|||||||
txt = ""
|
txt = ""
|
||||||
if binary:
|
if binary:
|
||||||
encoding = find_codec(binary)
|
encoding = find_codec(binary)
|
||||||
txt = binary.decode(encoding)
|
txt = binary.decode(encoding, errors="ignore")
|
||||||
else:
|
else:
|
||||||
with open(filename, "r") as f:
|
with open(filename, "r") as f:
|
||||||
while True:
|
while True:
|
||||||
|
|||||||
@ -113,7 +113,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
|||||||
txt = ""
|
txt = ""
|
||||||
if binary:
|
if binary:
|
||||||
encoding = find_codec(binary)
|
encoding = find_codec(binary)
|
||||||
txt = binary.decode(encoding)
|
txt = binary.decode(encoding, errors="ignore")
|
||||||
else:
|
else:
|
||||||
with open(filename, "r") as f:
|
with open(filename, "r") as f:
|
||||||
while True:
|
while True:
|
||||||
|
|||||||
@ -141,7 +141,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
|||||||
txt = ""
|
txt = ""
|
||||||
if binary:
|
if binary:
|
||||||
encoding = find_codec(binary)
|
encoding = find_codec(binary)
|
||||||
txt = binary.decode(encoding)
|
txt = binary.decode(encoding, errors="ignore")
|
||||||
else:
|
else:
|
||||||
with open(filename, "r") as f:
|
with open(filename, "r") as f:
|
||||||
while True:
|
while True:
|
||||||
|
|||||||
@ -85,7 +85,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
|||||||
txt = ""
|
txt = ""
|
||||||
if binary:
|
if binary:
|
||||||
encoding = find_codec(binary)
|
encoding = find_codec(binary)
|
||||||
txt = binary.decode(encoding)
|
txt = binary.decode(encoding, errors="ignore")
|
||||||
else:
|
else:
|
||||||
with open(filename, "r") as f:
|
with open(filename, "r") as f:
|
||||||
while True:
|
while True:
|
||||||
|
|||||||
@ -107,7 +107,7 @@ def chunk(filename, binary=None, lang="Chinese", callback=None, **kwargs):
|
|||||||
txt = ""
|
txt = ""
|
||||||
if binary:
|
if binary:
|
||||||
encoding = find_codec(binary)
|
encoding = find_codec(binary)
|
||||||
txt = binary.decode(encoding)
|
txt = binary.decode(encoding, errors="ignore")
|
||||||
else:
|
else:
|
||||||
with open(filename, "r") as f:
|
with open(filename, "r") as f:
|
||||||
while True:
|
while True:
|
||||||
|
|||||||
@ -149,7 +149,7 @@ def chunk(filename, binary=None, from_page=0, to_page=10000000000,
|
|||||||
txt = ""
|
txt = ""
|
||||||
if binary:
|
if binary:
|
||||||
encoding = find_codec(binary)
|
encoding = find_codec(binary)
|
||||||
txt = binary.decode(encoding)
|
txt = binary.decode(encoding, errors="ignore")
|
||||||
else:
|
else:
|
||||||
with open(filename, "r") as f:
|
with open(filename, "r") as f:
|
||||||
while True:
|
while True:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user