olmocr/tests/test_filter.py

25 lines
645 B
Python
Raw Normal View History

2024-09-17 15:16:58 +00:00
import os
2024-09-18 22:52:42 +00:00
import unittest
from pypdf import PdfReader
2024-09-17 15:16:58 +00:00
from pdelfin.filter import PdfFilter
2024-09-17 15:16:58 +00:00
class PdfFilterTest(unittest.TestCase):
def testFormLaterPages(self):
2024-10-17 22:36:38 +00:00
self.filter = PdfFilter(apply_form_check=True)
self.assertTrue(self.filter.filter_out_pdf(os.path.join(
os.path.dirname(__file__),
"gnarly_pdfs",
"form_on_later_pages.pdf"
)))
self.filter = PdfFilter(apply_form_check=False)
2024-09-18 22:52:42 +00:00
2024-10-17 22:36:38 +00:00
self.assertFalse(self.filter.filter_out_pdf(os.path.join(
os.path.dirname(__file__),
"gnarly_pdfs",
"form_on_later_pages.pdf"
)))