Trying to make it faster

This commit is contained in:
Jake Poznanski 2024-11-15 11:06:50 -08:00
parent 278422b8ff
commit fd17652d55

View File

@ -121,13 +121,8 @@ class PageReport:
text_elements: List[TextElement] text_elements: List[TextElement]
image_elements: List[ImageElement] image_elements: List[ImageElement]
@lru_cache(maxsize=5)
def _get_cached_pdf_reader(local_pdf_path: str) -> PdfReader:
# Cached, because you are going to often iterate through a whole pdf, so this will make it a lot faster on subsequent iterations
return PdfReader(local_pdf_path)
def _pdf_report(local_pdf_path: str, page_num: int) -> PageReport: def _pdf_report(local_pdf_path: str, page_num: int) -> PageReport:
reader = _get_cached_pdf_reader(local_pdf_path) reader = PdfReader(local_pdf_path)
page = reader.pages[page_num - 1] page = reader.pages[page_num - 1]
resources = page.get("/Resources", {}) resources = page.get("/Resources", {})
xobjects = resources.get("/XObject", {}) xobjects = resources.get("/XObject", {})