Normalization

This commit is contained in:
Jake Poznanski 2025-03-19 18:46:07 +00:00
parent 8ec1ebe5ed
commit 3005ebd67d
2 changed files with 12 additions and 1 deletions

View File

@ -561,6 +561,17 @@ Some text before the table.
result, explanation = test.run(table)
self.assertTrue(result, explanation)
def test_mathematical_minus(self):
table = """| Response | Chinese experimenter | White experimenter |
|----------|----------------------|--------------------|
| | Divided attention | Full attention | Divided attention | Full attention |
| Nonverbal| .34 (.22) | .54* (.17) | .12 (.27) | .20 (.24) |
| Verbal | .25 (.23) | .36 (.20) | .12 (.27) | .34 (.22) |
"""
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="-.34 (.22)")
result, explanation = test.run(table)
self.assertTrue(result, explanation)
class TestBaselineTest(unittest.TestCase):
"""Test the BaselineTest class"""

View File

@ -45,7 +45,7 @@ def normalize_text(md_content: str) -> str:
md_content = re.sub(r"\s+", " ", md_content)
# Dictionary of characters to replace: keys are fancy characters, values are ASCII equivalents, unicode micro with greek mu comes up often enough too
replacements = {"": "'", "": "'", "": "'", "": '"', "": '"', "": '"', "_": "_", "": "-", "": "-", "": "-", "": "-", "\u00b5": "\u03bc"}
replacements = {"": "'", "": "'", "": "'", "": '"', "": '"', "": '"', "_": "_", "": "-", "": "-", "": "-", "": "-", "": "-", "\u00b5": "\u03bc"}
# Apply all replacements from the dictionary
for fancy_char, ascii_char in replacements.items():