olmocr/tests/test_filter.py
2025-01-27 18:30:41 +00:00

25 lines
644 B
Python

import os
import unittest
from pypdf import PdfReader
from olmocr.filter import PdfFilter
class PdfFilterTest(unittest.TestCase):
def testFormLaterPages(self):
self.filter = PdfFilter(apply_form_check=True)
self.assertTrue(self.filter.filter_out_pdf(os.path.join(
os.path.dirname(__file__),
"gnarly_pdfs",
"form_on_later_pages.pdf"
)))
self.filter = PdfFilter(apply_form_check=False)
self.assertFalse(self.filter.filter_out_pdf(os.path.join(
os.path.dirname(__file__),
"gnarly_pdfs",
"form_on_later_pages.pdf"
)))