mirror of
https://github.com/allenai/olmocr.git
synced 2025-06-27 04:00:02 +00:00
25 lines
644 B
Python
25 lines
644 B
Python
import os
|
|
import unittest
|
|
|
|
from pypdf import PdfReader
|
|
|
|
from olmocr.filter import PdfFilter
|
|
|
|
|
|
class PdfFilterTest(unittest.TestCase):
|
|
def testFormLaterPages(self):
|
|
self.filter = PdfFilter(apply_form_check=True)
|
|
|
|
self.assertTrue(self.filter.filter_out_pdf(os.path.join(
|
|
os.path.dirname(__file__),
|
|
"gnarly_pdfs",
|
|
"form_on_later_pages.pdf"
|
|
)))
|
|
|
|
self.filter = PdfFilter(apply_form_check=False)
|
|
|
|
self.assertFalse(self.filter.filter_out_pdf(os.path.join(
|
|
os.path.dirname(__file__),
|
|
"gnarly_pdfs",
|
|
"form_on_later_pages.pdf"
|
|
))) |