mirror of
https://github.com/allenai/olmocr.git
synced 2025-08-20 06:42:26 +00:00
Trying to make it faster
This commit is contained in:
parent
278422b8ff
commit
fd17652d55
@ -121,13 +121,8 @@ class PageReport:
|
||||
text_elements: List[TextElement]
|
||||
image_elements: List[ImageElement]
|
||||
|
||||
@lru_cache(maxsize=5)
|
||||
def _get_cached_pdf_reader(local_pdf_path: str) -> PdfReader:
|
||||
# Cached, because you are going to often iterate through a whole pdf, so this will make it a lot faster on subsequent iterations
|
||||
return PdfReader(local_pdf_path)
|
||||
|
||||
def _pdf_report(local_pdf_path: str, page_num: int) -> PageReport:
|
||||
reader = _get_cached_pdf_reader(local_pdf_path)
|
||||
reader = PdfReader(local_pdf_path)
|
||||
page = reader.pages[page_num - 1]
|
||||
resources = page.get("/Resources", {})
|
||||
xobjects = resources.get("/XObject", {})
|
||||
|
Loading…
x
Reference in New Issue
Block a user