diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index cf55040..b3cd798 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -31,7 +31,7 @@ jobs:
         task:
           - name: Test
             run: |
-              pytest -v --color=yes tests/
+              pytest -v --color=yes  -m "not nonci" tests/
 
         include:
           - python: "3.11"
@@ -39,7 +39,7 @@ jobs:
               name: Lint
               run: ruff check .
 
-          # Removing mypy for now, as it isn't handling async things correctly
+          # Removing mypy for now, as it isn't handling async things correctly and crashing
           # - python: "3.11"
           #   task:
           #     name: Type check
diff --git a/pyproject.toml b/pyproject.toml
index 2904ba3..7e82540 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -158,3 +158,6 @@ python_classes = [
 ]
 log_format = "%(asctime)s - %(levelname)s - %(name)s - %(message)s"
 log_level = "DEBUG"
+markers = [
+    "nonci: mark test as not intended for CI runs"
+]
\ No newline at end of file
diff --git a/tests/test_coherency.py b/tests/test_coherency.py
deleted file mode 100644
index 23ef47a..0000000
--- a/tests/test_coherency.py
+++ /dev/null
@@ -1,59 +0,0 @@
-import html
-import multiprocessing
-import os
-import time
-import unittest
-
-from olmocr.filter.coherency import get_document_coherency
-from olmocr.prompts.anchor import get_anchor_text
-
-
-class TestCoherencyScores(unittest.TestCase):
-    def testBadOcr1(self):
-        good_text = get_anchor_text(os.path.join(os.path.dirname(__file__), "gnarly_pdfs", "instructions_and_schematics.pdf"), 1, pdf_engine="pdftotext")
-        ocr1_text = get_anchor_text(os.path.join(os.path.dirname(__file__), "gnarly_pdfs", "handwriting_bad_ocr.pdf"), 1, pdf_engine="pdftotext")
-        ocr2_text = get_anchor_text(os.path.join(os.path.dirname(__file__), "gnarly_pdfs", "some_ocr1.pdf"), 1, pdf_engine="pdftotext")
-
-        print("Good", get_document_coherency(good_text))
-        print("Bad1", get_document_coherency(ocr1_text))
-        print("Bad2", get_document_coherency(ocr2_text))
-
-    @unittest.skip("This test is not necessary, it's just a helpful benchmark")
-    def testHugeBookCoherencySpeed(self):
-        base_text = get_anchor_text(os.path.join(os.path.dirname(__file__), "gnarly_pdfs", "ti89_guidebook.pdf"), 1, pdf_engine="pdftotext")
-        print(f"ti89 book length: {len(base_text):,}")
-
-        warmup = get_document_coherency(base_text[:1000])
-
-        base_text = base_text[:40000]
-
-        start = time.perf_counter()
-        score = get_document_coherency(base_text)
-        end = time.perf_counter()
-
-        char_per_sec = len(base_text) / (end - start)
-        char_per_sec = char_per_sec / multiprocessing.cpu_count()
-
-        print(f"ti89 book score {score:.2f}")
-        print(f"{char_per_sec:.2f} chars per second per core")
-
-    def testTwoColumnMisparse(self):
-        pdftotext_text = get_anchor_text(
-            os.path.join(os.path.dirname(__file__), "gnarly_pdfs", "pdftotext_two_column_issue.pdf"),
-            page=2,
-            pdf_engine="pdftotext",
-        )
-        pdfium_text = get_anchor_text(
-            os.path.join(os.path.dirname(__file__), "gnarly_pdfs", "pdftotext_two_column_issue.pdf"),
-            page=2,
-            pdf_engine="pdfium",
-        )
-
-        print("pdftotext_text", pdftotext_score := get_document_coherency(pdftotext_text))
-        print("pdfium_text", pdfium_score := get_document_coherency(pdfium_text))
-
-        self.assertLess(pdfium_score, pdftotext_score)
-
-        anchor_text = get_anchor_text(os.path.join(os.path.dirname(__file__), "gnarly_pdfs", "pdftotext_two_column_issue.pdf"), 2, pdf_engine="topcoherency")
-
-        self.assertEqual(anchor_text, pdfium_text)
diff --git a/tests/test_dataloader.py b/tests/test_dataloader.py
index a0b1a09..36e4021 100644
--- a/tests/test_dataloader.py
+++ b/tests/test_dataloader.py
@@ -1,6 +1,7 @@
 import unittest
 from functools import partial
 
+import pytest
 from torch.utils.data import DataLoader
 from tqdm import tqdm
 from transformers import AutoProcessor
@@ -14,6 +15,7 @@ from olmocr.train.dataloader import (
 from olmocr.train.dataprep import batch_prepare_data_for_qwen2_training
 
 
+@pytest.mark.nonci
 class TestBatchQueryResponseDataset(unittest.TestCase):
     def testLoadS3(self):
         ds = load_jsonl_into_ds("s3://ai2-oe-data/jakep/openai_batch_data_v2/*.jsonl", first_n_files=3)
diff --git a/tests/test_dataprep.py b/tests/test_dataprep.py
index 51033c6..11ce515 100644
--- a/tests/test_dataprep.py
+++ b/tests/test_dataprep.py
@@ -7,6 +7,7 @@ from io import BytesIO
 from unittest.mock import patch
 
 import numpy as np
+import pytest
 import requests
 import torch
 from PIL import Image
@@ -27,6 +28,7 @@ from olmocr.train.dataprep import (
 from olmocr.train.utils import make_dataset
 
 
+@pytest.mark.nonci
 class TestDataprep(unittest.TestCase):
     def testFullDataloader(self):
         processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")
diff --git a/tests/test_molmo.py b/tests/test_molmo.py
index 1f311ec..5a5e998 100644
--- a/tests/test_molmo.py
+++ b/tests/test_molmo.py
@@ -1,5 +1,6 @@
 import unittest
 
+import pytest
 import requests
 from PIL import Image
 from transformers import (
@@ -10,6 +11,7 @@ from transformers import (
 )
 
 
+@pytest.mark.nonci
 class MolmoProcessorTest(unittest.TestCase):
     def test_molmo_demo(self):
         # load the processor
diff --git a/tests/test_sglang.py b/tests/test_sglang.py
index 806ec41..7c351cf 100644
--- a/tests/test_sglang.py
+++ b/tests/test_sglang.py
@@ -8,13 +8,13 @@ import base64
 import json
 import math
 import os
-import tempfile
 import unittest
 from io import BytesIO
 from pathlib import Path
 from unittest.mock import AsyncMock, patch
 
 import numpy as np
+import pytest
 import torch
 import torch.nn.functional as F
 from httpx import AsyncClient
@@ -36,7 +36,7 @@ MODEL_FINETUNED_PATH = (
 )
 
 
-@unittest.skip("Skip these tests when running CI, they are mostly for experimentation")
+@pytest.mark.nonci
 class TestSglangServer(unittest.IsolatedAsyncioTestCase):
     async def asyncSetUp(self):
         # Mock arguments
@@ -110,6 +110,7 @@ class TestSglangServer(unittest.IsolatedAsyncioTestCase):
         #     os.rmdir(self.args.workspace)
 
 
+@pytest.mark.nonci
 class TestHuggingFaceModel(unittest.IsolatedAsyncioTestCase):
     async def asyncSetUp(self):
         # Set up the Hugging Face model and tokenizer
@@ -248,6 +249,7 @@ class TestHuggingFaceModel(unittest.IsolatedAsyncioTestCase):
         torch.cuda.empty_cache()
 
 
+@pytest.mark.nonci
 class RawSGLangTest(unittest.IsolatedAsyncioTestCase):
     def setUp(self):
         # Set up the Hugging Face model and tokenizer