From 27f503ce3131ee01006205124c2e6484cf0510c5 Mon Sep 17 00:00:00 2001 From: Nathan <168383951+Nathan-GoSupply@users.noreply.github.com> Date: Tue, 8 Apr 2025 17:47:24 +1000 Subject: [PATCH] Update pdfminer_utils.py (#3974) Fix for 'PSSyntaxError' import error: "cannot import name 'PSSyntaxError' from 'pdfminer.pdfparser'" Latest pdfminer-six doesn't import PSSyntaxError into `pdfminer.pdfparser` anymore. It must now be directly imported from its source (`pdfminer.psexceptions`) --- CHANGELOG.md | 1 + unstructured/partition/pdf_image/pdfminer_utils.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4da58bbc9..62ae488af 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ This makes it impossible to write stable unit tests, for example, or to obtain r ### Fixes - **Removed out of date ubuntu Dockerfile.** The Dockerfile was out of date and non-functional. +- **Fix for 'PSSyntaxError' import error: "cannot import name 'PSSyntaxError' from 'pdfminer.pdfparser'"** PSSyntaxError needed to be imported from its source 'pdfminer.psexceptions'. ## 0.17.4 diff --git a/unstructured/partition/pdf_image/pdfminer_utils.py b/unstructured/partition/pdf_image/pdfminer_utils.py index ad6f98191..3993f41ae 100644 --- a/unstructured/partition/pdf_image/pdfminer_utils.py +++ b/unstructured/partition/pdf_image/pdfminer_utils.py @@ -6,7 +6,7 @@ from pdfminer.converter import PDFPageAggregator from pdfminer.layout import LAParams, LTContainer, LTImage, LTItem, LTTextLine from pdfminer.pdfinterp import PDFPageInterpreter, PDFResourceManager from pdfminer.pdfpage import PDFPage -from pdfminer.psparser import PSSyntaxError +from pdfminer.psexceptions import PSSyntaxError from pydantic import BaseModel from unstructured.logger import logger