mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-11-10 14:54:24 +00:00
Bug fix pdf parse index out of range (#440)
### What problem does this PR solve? fix a bug comes when parse some pdf file #436 ### Type of change - [☑️ ] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
parent
1dada69daa
commit
39f1feaccb
@ -830,10 +830,13 @@ class HuParser:
|
|||||||
pn = [bx["page_number"]]
|
pn = [bx["page_number"]]
|
||||||
top = bx["top"] - self.page_cum_height[pn[0] - 1]
|
top = bx["top"] - self.page_cum_height[pn[0] - 1]
|
||||||
bott = bx["bottom"] - self.page_cum_height[pn[0] - 1]
|
bott = bx["bottom"] - self.page_cum_height[pn[0] - 1]
|
||||||
if pn[-1] - 1 >= len(self.page_images): return ""
|
page_images_cnt = len(self.page_images)
|
||||||
|
if pn[-1] - 1 >= page_images_cnt: return ""
|
||||||
while bott * ZM > self.page_images[pn[-1] - 1].size[1]:
|
while bott * ZM > self.page_images[pn[-1] - 1].size[1]:
|
||||||
bott -= self.page_images[pn[-1] - 1].size[1] / ZM
|
bott -= self.page_images[pn[-1] - 1].size[1] / ZM
|
||||||
pn.append(pn[-1] + 1)
|
pn.append(pn[-1] + 1)
|
||||||
|
if pn[-1] - 1 >= page_images_cnt:
|
||||||
|
return ""
|
||||||
|
|
||||||
return "@@{}\t{:.1f}\t{:.1f}\t{:.1f}\t{:.1f}##" \
|
return "@@{}\t{:.1f}\t{:.1f}\t{:.1f}\t{:.1f}##" \
|
||||||
.format("-".join([str(p) for p in pn]),
|
.format("-".join([str(p) for p in pn]),
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user