diff --git a/olmocr/bench/test_tests.py b/olmocr/bench/test_tests.py index 2266e7a..89993ae 100644 --- a/olmocr/bench/test_tests.py +++ b/olmocr/bench/test_tests.py @@ -561,6 +561,17 @@ Some text before the table. result, explanation = test.run(table) self.assertTrue(result, explanation) + def test_mathematical_minus(self): + table = """| Response | Chinese experimenter | White experimenter | +|----------|----------------------|--------------------| +| | Divided attention | Full attention | Divided attention | Full attention | +| Nonverbal| −.34 (.22) | .54* (.17) | .12 (.27) | −.20 (.24) | +| Verbal | −.25 (.23) | .36 (.20) | .12 (.27) | −.34 (.22) | +""" + test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="-.34 (.22)") + result, explanation = test.run(table) + self.assertTrue(result, explanation) + class TestBaselineTest(unittest.TestCase): """Test the BaselineTest class""" diff --git a/olmocr/bench/tests.py b/olmocr/bench/tests.py index f568bef..7aa5892 100644 --- a/olmocr/bench/tests.py +++ b/olmocr/bench/tests.py @@ -45,7 +45,7 @@ def normalize_text(md_content: str) -> str: md_content = re.sub(r"\s+", " ", md_content) # Dictionary of characters to replace: keys are fancy characters, values are ASCII equivalents, unicode micro with greek mu comes up often enough too - replacements = {"‘": "'", "’": "'", "‚": "'", "“": '"', "”": '"', "„": '"', "_": "_", "–": "-", "—": "-", "‑": "-", "‒": "-", "\u00b5": "\u03bc"} + replacements = {"‘": "'", "’": "'", "‚": "'", "“": '"', "”": '"', "„": '"', "_": "_", "–": "-", "—": "-", "‑": "-", "‒": "-", "−": "-", "\u00b5": "\u03bc"} # Apply all replacements from the dictionary for fancy_char, ascii_char in replacements.items():