olmocr/tests/test_filter.py
2025-01-29 15:30:39 -08:00

18 lines
553 B
Python

import os
import unittest
from pypdf import PdfReader
from olmocr.filter import PdfFilter
class PdfFilterTest(unittest.TestCase):
def testFormLaterPages(self):
self.filter = PdfFilter(apply_form_check=True)
self.assertTrue(self.filter.filter_out_pdf(os.path.join(os.path.dirname(__file__), "gnarly_pdfs", "form_on_later_pages.pdf")))
self.filter = PdfFilter(apply_form_check=False)
self.assertFalse(self.filter.filter_out_pdf(os.path.join(os.path.dirname(__file__), "gnarly_pdfs", "form_on_later_pages.pdf")))