olmocr/tests/test_filter.py
Jake Poznanski bab32aa9b3 Formatting
2024-09-18 22:52:42 +00:00

26 lines
766 B
Python

import os
import unittest
from pypdf import PdfReader
from pdelfin.filter import PdfFilter
from pdelfin.filter.imagedetect import pdf_page_image_area
class PdfFilterTest(unittest.TestCase):
def setUp(self) -> None:
self.filter = PdfFilter()
def testFormLaterPages(self):
self.assertTrue(
self.filter._is_form(os.path.join(os.path.dirname(__file__), "gnarly_pdfs", "form_on_later_pages.pdf"))
)
class ImageDetectionTest(unittest.TestCase):
def testSlideshowMostlyImages(self):
self.pdf = PdfReader(os.path.join(os.path.dirname(__file__), "gnarly_pdfs", "slideshow_mostly_images.pdf"))
for page in range(self.pdf.get_num_pages()):
print(page, pdf_page_image_area(self.pdf, page + 1))