mirror of
https://github.com/allenai/olmocr.git
synced 2025-07-03 07:05:50 +00:00
28 lines
823 B
Python
28 lines
823 B
Python
import glob
|
|
import json
|
|
import os
|
|
import unittest
|
|
|
|
import pytest
|
|
|
|
|
|
@pytest.mark.nonci
|
|
class TestPipelineIntegration(unittest.TestCase):
|
|
def setUp(self):
|
|
self.data = []
|
|
|
|
for file in glob.glob(os.path.join("localworkspace", "results", "*.jsonl")):
|
|
with open(file, "r") as jf:
|
|
for line in jf:
|
|
if len(line.strip()) > 0:
|
|
self.data.append(json.loads(line))
|
|
|
|
def test_edgar(self) -> None:
|
|
self.assertTrue(any("King of England" in line["text"] for line in self.data))
|
|
|
|
def test_ambig(self) -> None:
|
|
self.assertTrue(any("Apples and Bananas" in line["text"] for line in self.data))
|
|
|
|
def test_dolma(self) -> None:
|
|
self.assertTrue(any("We extensively document Dolma" in line["text"] for line in self.data))
|