mirror of
https://github.com/allenai/olmocr.git
synced 2025-10-03 20:34:09 +00:00
Trying to get new CI to work
This commit is contained in:
parent
1db1b3406b
commit
f5d92bdb14
2
.github/workflows/main.yml
vendored
2
.github/workflows/main.yml
vendored
@ -112,7 +112,7 @@ jobs:
|
|||||||
needs: [checks]
|
needs: [checks]
|
||||||
env:
|
env:
|
||||||
BEAKER_TOKEN: ${{ secrets.BEAKER_TOKEN }}
|
BEAKER_TOKEN: ${{ secrets.BEAKER_TOKEN }}
|
||||||
BEAKER_IMAGE: chrisw/olmocr-gpu-ci
|
BEAKER_IMAGE: jakep/olmocr-gpu-ci
|
||||||
BEAKER_BUDGET: ai2/oe-data
|
BEAKER_BUDGET: ai2/oe-data
|
||||||
BEAKER_WORKSPACE: ai2/olmocr
|
BEAKER_WORKSPACE: ai2/olmocr
|
||||||
steps:
|
steps:
|
||||||
|
@ -9,7 +9,7 @@ git clone https://github.com/allenai/olmocr.git olmocr \
|
|||||||
.[gpu] \
|
.[gpu] \
|
||||||
pytest \
|
pytest \
|
||||||
--find-links https://flashinfer.ai/whl/cu124/torch2.4/flashinfer/ \
|
--find-links https://flashinfer.ai/whl/cu124/torch2.4/flashinfer/ \
|
||||||
&& bash tests/gnarly_pdfs/test_gnarly_pdfs.sh
|
&& bash scripts/run_integration_test.sh
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
6
scripts/run_integration_test.sh
Normal file
6
scripts/run_integration_test.sh
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
#/usr/bin/bash
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
python -m olmocr.pipeline ./localworkspace --pdfs tests/gnarly_pdfs/ambiguous.pdf tests/gnarly_pdfs/edgar.pdf tests/gnarly_pdfs/dolma-page-1.pdf \
|
||||||
|
&& pytest tests/test_integration.py
|
@ -1,7 +0,0 @@
|
|||||||
import unittest
|
|
||||||
|
|
||||||
|
|
||||||
class TestGnarlyPdfs(unittest.TestCase):
|
|
||||||
def test_nothing_in_particular(self) -> None:
|
|
||||||
"""Noop pending jake's impl"""
|
|
||||||
self.assertTrue(True)
|
|
@ -1,6 +0,0 @@
|
|||||||
#/usr/bin/bash
|
|
||||||
|
|
||||||
set -ex
|
|
||||||
|
|
||||||
python -m olmocr.pipeline ./localworkspace --pdfs tests/gnarly_pdfs/ambiguous.pdf \
|
|
||||||
&& pytest tests/gnarly_pdfs/test_gnarly_pdfs.py
|
|
27
tests/test_integration.py
Normal file
27
tests/test_integration.py
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
import glob
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.nonci
|
||||||
|
class TestPipelineIntegration(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self.data = []
|
||||||
|
|
||||||
|
for file in glob.glob(os.path.join("localworkspace", "results", "*.jsonl")):
|
||||||
|
with open(file, "r") as jf:
|
||||||
|
for line in jf:
|
||||||
|
if len(line.strip()) > 0:
|
||||||
|
self.data.append(json.loads(line))
|
||||||
|
|
||||||
|
def test_edgar(self) -> None:
|
||||||
|
self.assertTrue(any("King of England" in line["text"] for line in self.data))
|
||||||
|
|
||||||
|
def test_ambig(self) -> None:
|
||||||
|
self.assertTrue(any("Apples and Bananas" in line["text"] for line in self.data))
|
||||||
|
|
||||||
|
def test_dolma(self) -> None:
|
||||||
|
self.assertTrue(any("We extensively document Dolma" in line["text"] for line in self.data))
|
Loading…
x
Reference in New Issue
Block a user