2024-09-17 15:16:58 +00:00
|
|
|
import os
|
2024-09-18 22:52:42 +00:00
|
|
|
import unittest
|
|
|
|
|
|
|
|
from pypdf import PdfReader
|
2024-09-17 15:16:58 +00:00
|
|
|
|
2025-01-27 18:30:41 +00:00
|
|
|
from olmocr.filter import PdfFilter
|
2024-09-17 16:26:55 +00:00
|
|
|
|
2024-09-17 15:16:58 +00:00
|
|
|
|
|
|
|
class PdfFilterTest(unittest.TestCase):
|
|
|
|
def testFormLaterPages(self):
|
2024-10-17 22:36:38 +00:00
|
|
|
self.filter = PdfFilter(apply_form_check=True)
|
|
|
|
|
2025-01-29 15:30:39 -08:00
|
|
|
self.assertTrue(self.filter.filter_out_pdf(os.path.join(os.path.dirname(__file__), "gnarly_pdfs", "form_on_later_pages.pdf")))
|
2024-10-17 22:36:38 +00:00
|
|
|
|
|
|
|
self.filter = PdfFilter(apply_form_check=False)
|
2024-09-18 22:52:42 +00:00
|
|
|
|
2025-01-29 15:30:39 -08:00
|
|
|
self.assertFalse(self.filter.filter_out_pdf(os.path.join(os.path.dirname(__file__), "gnarly_pdfs", "form_on_later_pages.pdf")))
|