mirror of
https://github.com/allenai/olmocr.git
synced 2025-10-03 12:28:35 +00:00
Trying to get new CI to work
This commit is contained in:
parent
1db1b3406b
commit
f5d92bdb14
2
.github/workflows/main.yml
vendored
2
.github/workflows/main.yml
vendored
@ -112,7 +112,7 @@ jobs:
|
||||
needs: [checks]
|
||||
env:
|
||||
BEAKER_TOKEN: ${{ secrets.BEAKER_TOKEN }}
|
||||
BEAKER_IMAGE: chrisw/olmocr-gpu-ci
|
||||
BEAKER_IMAGE: jakep/olmocr-gpu-ci
|
||||
BEAKER_BUDGET: ai2/oe-data
|
||||
BEAKER_WORKSPACE: ai2/olmocr
|
||||
steps:
|
||||
|
@ -9,7 +9,7 @@ git clone https://github.com/allenai/olmocr.git olmocr \
|
||||
.[gpu] \
|
||||
pytest \
|
||||
--find-links https://flashinfer.ai/whl/cu124/torch2.4/flashinfer/ \
|
||||
&& bash tests/gnarly_pdfs/test_gnarly_pdfs.sh
|
||||
&& bash scripts/run_integration_test.sh
|
||||
|
||||
|
||||
|
||||
|
6
scripts/run_integration_test.sh
Normal file
6
scripts/run_integration_test.sh
Normal file
@ -0,0 +1,6 @@
|
||||
#/usr/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
python -m olmocr.pipeline ./localworkspace --pdfs tests/gnarly_pdfs/ambiguous.pdf tests/gnarly_pdfs/edgar.pdf tests/gnarly_pdfs/dolma-page-1.pdf \
|
||||
&& pytest tests/test_integration.py
|
@ -1,7 +0,0 @@
|
||||
import unittest
|
||||
|
||||
|
||||
class TestGnarlyPdfs(unittest.TestCase):
|
||||
def test_nothing_in_particular(self) -> None:
|
||||
"""Noop pending jake's impl"""
|
||||
self.assertTrue(True)
|
@ -1,6 +0,0 @@
|
||||
#/usr/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
python -m olmocr.pipeline ./localworkspace --pdfs tests/gnarly_pdfs/ambiguous.pdf \
|
||||
&& pytest tests/gnarly_pdfs/test_gnarly_pdfs.py
|
27
tests/test_integration.py
Normal file
27
tests/test_integration.py
Normal file
@ -0,0 +1,27 @@
|
||||
import glob
|
||||
import json
|
||||
import os
|
||||
import unittest
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.nonci
|
||||
class TestPipelineIntegration(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.data = []
|
||||
|
||||
for file in glob.glob(os.path.join("localworkspace", "results", "*.jsonl")):
|
||||
with open(file, "r") as jf:
|
||||
for line in jf:
|
||||
if len(line.strip()) > 0:
|
||||
self.data.append(json.loads(line))
|
||||
|
||||
def test_edgar(self) -> None:
|
||||
self.assertTrue(any("King of England" in line["text"] for line in self.data))
|
||||
|
||||
def test_ambig(self) -> None:
|
||||
self.assertTrue(any("Apples and Bananas" in line["text"] for line in self.data))
|
||||
|
||||
def test_dolma(self) -> None:
|
||||
self.assertTrue(any("We extensively document Dolma" in line["text"] for line in self.data))
|
Loading…
x
Reference in New Issue
Block a user