mirror of
https://github.com/docling-project/docling.git
synced 2025-06-27 05:20:05 +00:00
fix: Ensure uninitialized pages are removed before assembling document (#1812)
Ensure uninitialized pages are removed before assembling document Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
parent
861abcdcb0
commit
dd7f64ff28
@ -124,7 +124,7 @@ class ReadingOrderModel:
|
||||
page_no = page.page_no + 1
|
||||
size = page.size
|
||||
|
||||
assert size is not None
|
||||
assert size is not None, "Page size is not initialized."
|
||||
|
||||
out_doc.add_page(page_no=page_no, size=size)
|
||||
|
||||
|
@ -193,6 +193,17 @@ class PaginatedPipeline(BasePipeline): # TODO this is a bad name.
|
||||
)
|
||||
raise e
|
||||
|
||||
# Filter out uninitialized pages (those with size=None) that may remain
|
||||
# after timeout or processing failures to prevent assertion errors downstream
|
||||
initial_page_count = len(conv_res.pages)
|
||||
conv_res.pages = [page for page in conv_res.pages if page.size is not None]
|
||||
|
||||
if len(conv_res.pages) < initial_page_count:
|
||||
_log.info(
|
||||
f"Filtered out {initial_page_count - len(conv_res.pages)} uninitialized pages "
|
||||
f"due to timeout or processing failures"
|
||||
)
|
||||
|
||||
return conv_res
|
||||
|
||||
def _unload(self, conv_res: ConversionResult) -> ConversionResult:
|
||||
|
Loading…
x
Reference in New Issue
Block a user