Hopefully CI runs now

2025-12-13 08:11:22 +00:00 · 2025-02-14 20:42:19 +00:00 · 2025-02-14 20:42:19 +00:00 · c05e01532c
commit c05e01532c
parent 15f9b8b9dc
7 changed files with 15 additions and 63 deletions
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@ -31,7 +31,7 @@ jobs:
        task:
          - name: Test
            run: |
-              pytest -v --color=yes tests/
+              pytest -v --color=yes  -m "not nonci" tests/

        include:
          - python: "3.11"
@ -39,7 +39,7 @@ jobs:
              name: Lint
              run: ruff check .

-          # Removing mypy for now, as it isn't handling async things correctly
+          # Removing mypy for now, as it isn't handling async things correctly and crashing
          # - python: "3.11"
          #   task:
          #     name: Type check
--- a/pyproject.toml
+++ b/pyproject.toml
@ -158,3 +158,6 @@ python_classes = [
 ]
 log_format = "%(asctime)s - %(levelname)s - %(name)s - %(message)s"
 log_level = "DEBUG"
+markers = [
+    "nonci: mark test as not intended for CI runs"
+]
--- a/tests/test_coherency.py
+++ b/tests/test_coherency.py
@ -1,59 +0,0 @@
-import html
-import multiprocessing
-import os
-import time
-import unittest
-
-from olmocr.filter.coherency import get_document_coherency
-from olmocr.prompts.anchor import get_anchor_text
-
-
-class TestCoherencyScores(unittest.TestCase):
-    def testBadOcr1(self):
-        good_text = get_anchor_text(os.path.join(os.path.dirname(__file__), "gnarly_pdfs", "instructions_and_schematics.pdf"), 1, pdf_engine="pdftotext")
-        ocr1_text = get_anchor_text(os.path.join(os.path.dirname(__file__), "gnarly_pdfs", "handwriting_bad_ocr.pdf"), 1, pdf_engine="pdftotext")
-        ocr2_text = get_anchor_text(os.path.join(os.path.dirname(__file__), "gnarly_pdfs", "some_ocr1.pdf"), 1, pdf_engine="pdftotext")
-
-        print("Good", get_document_coherency(good_text))
-        print("Bad1", get_document_coherency(ocr1_text))
-        print("Bad2", get_document_coherency(ocr2_text))
-
-    @unittest.skip("This test is not necessary, it's just a helpful benchmark")
-    def testHugeBookCoherencySpeed(self):
-        base_text = get_anchor_text(os.path.join(os.path.dirname(__file__), "gnarly_pdfs", "ti89_guidebook.pdf"), 1, pdf_engine="pdftotext")
-        print(f"ti89 book length: {len(base_text):,}")
-
-        warmup = get_document_coherency(base_text[:1000])
-
-        base_text = base_text[:40000]
-
-        start = time.perf_counter()
-        score = get_document_coherency(base_text)
-        end = time.perf_counter()
-
-        char_per_sec = len(base_text) / (end - start)
-        char_per_sec = char_per_sec / multiprocessing.cpu_count()
-
-        print(f"ti89 book score {score:.2f}")
-        print(f"{char_per_sec:.2f} chars per second per core")
-
-    def testTwoColumnMisparse(self):
-        pdftotext_text = get_anchor_text(
-            os.path.join(os.path.dirname(__file__), "gnarly_pdfs", "pdftotext_two_column_issue.pdf"),
-            page=2,
-            pdf_engine="pdftotext",
-        )
-        pdfium_text = get_anchor_text(
-            os.path.join(os.path.dirname(__file__), "gnarly_pdfs", "pdftotext_two_column_issue.pdf"),
-            page=2,
-            pdf_engine="pdfium",
-        )
-
-        print("pdftotext_text", pdftotext_score := get_document_coherency(pdftotext_text))
-        print("pdfium_text", pdfium_score := get_document_coherency(pdfium_text))
-
-        self.assertLess(pdfium_score, pdftotext_score)
-
-        anchor_text = get_anchor_text(os.path.join(os.path.dirname(__file__), "gnarly_pdfs", "pdftotext_two_column_issue.pdf"), 2, pdf_engine="topcoherency")
-
-        self.assertEqual(anchor_text, pdfium_text)
--- a/tests/test_dataloader.py
+++ b/tests/test_dataloader.py
@ -1,6 +1,7 @@
 import unittest
 from functools import partial

+import pytest
 from torch.utils.data import DataLoader
 from tqdm import tqdm
 from transformers import AutoProcessor
@ -14,6 +15,7 @@ from olmocr.train.dataloader import (
 from olmocr.train.dataprep import batch_prepare_data_for_qwen2_training


+@pytest.mark.nonci
 class TestBatchQueryResponseDataset(unittest.TestCase):
    def testLoadS3(self):
        ds = load_jsonl_into_ds("s3://ai2-oe-data/jakep/openai_batch_data_v2/*.jsonl", first_n_files=3)
--- a/tests/test_dataprep.py
+++ b/tests/test_dataprep.py
@ -7,6 +7,7 @@ from io import BytesIO
 from unittest.mock import patch

 import numpy as np
+import pytest
 import requests
 import torch
 from PIL import Image
@ -27,6 +28,7 @@ from olmocr.train.dataprep import (
 from olmocr.train.utils import make_dataset


+@pytest.mark.nonci
 class TestDataprep(unittest.TestCase):
    def testFullDataloader(self):
        processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")
--- a/tests/test_molmo.py
+++ b/tests/test_molmo.py
@ -1,5 +1,6 @@
 import unittest

+import pytest
 import requests
 from PIL import Image
 from transformers import (
@ -10,6 +11,7 @@ from transformers import (
 )


+@pytest.mark.nonci
 class MolmoProcessorTest(unittest.TestCase):
    def test_molmo_demo(self):
        # load the processor
--- a/tests/test_sglang.py
+++ b/tests/test_sglang.py
@ -8,13 +8,13 @@ import base64
 import json
 import math
 import os
-import tempfile
 import unittest
 from io import BytesIO
 from pathlib import Path
 from unittest.mock import AsyncMock, patch

 import numpy as np
+import pytest
 import torch
 import torch.nn.functional as F
 from httpx import AsyncClient
@ -36,7 +36,7 @@ MODEL_FINETUNED_PATH = (
 )


-@unittest.skip("Skip these tests when running CI, they are mostly for experimentation")
+@pytest.mark.nonci
 class TestSglangServer(unittest.IsolatedAsyncioTestCase):
    async def asyncSetUp(self):
        # Mock arguments
@ -110,6 +110,7 @@ class TestSglangServer(unittest.IsolatedAsyncioTestCase):
        #     os.rmdir(self.args.workspace)


+@pytest.mark.nonci
 class TestHuggingFaceModel(unittest.IsolatedAsyncioTestCase):
    async def asyncSetUp(self):
        # Set up the Hugging Face model and tokenizer
@ -248,6 +249,7 @@ class TestHuggingFaceModel(unittest.IsolatedAsyncioTestCase):
        torch.cuda.empty_cache()


+@pytest.mark.nonci
 class RawSGLangTest(unittest.IsolatedAsyncioTestCase):
    def setUp(self):
        # Set up the Hugging Face model and tokenizer