From fd17652d55d13d89ae4a246221bda852208f07be Mon Sep 17 00:00:00 2001 From: Jake Poznanski Date: Fri, 15 Nov 2024 11:06:50 -0800 Subject: [PATCH] Trying to make it faster --- pdelfin/prompts/anchor.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/pdelfin/prompts/anchor.py b/pdelfin/prompts/anchor.py index df389d1..c0b1b68 100644 --- a/pdelfin/prompts/anchor.py +++ b/pdelfin/prompts/anchor.py @@ -121,13 +121,8 @@ class PageReport: text_elements: List[TextElement] image_elements: List[ImageElement] -@lru_cache(maxsize=5) -def _get_cached_pdf_reader(local_pdf_path: str) -> PdfReader: - # Cached, because you are going to often iterate through a whole pdf, so this will make it a lot faster on subsequent iterations - return PdfReader(local_pdf_path) - def _pdf_report(local_pdf_path: str, page_num: int) -> PageReport: - reader = _get_cached_pdf_reader(local_pdf_path) + reader = PdfReader(local_pdf_path) page = reader.pages[page_num - 1] resources = page.get("/Resources", {}) xobjects = resources.get("/XObject", {})