import unittest
from olmocr.bench.tests import (
BaselineTest,
BasePDFTest,
MathTest,
TableTest,
TestChecked,
TestType,
TextOrderTest,
TextPresenceTest,
ValidationError,
normalize_text,
parse_html_tables,
parse_markdown_tables,
)
class TestNormalizeText(unittest.TestCase):
"""Test the normalize_text function"""
def test_whitespace_normalization(self):
"""Test that whitespace is properly normalized"""
input_text = "This has\tmultiple spaces\nand\nnewlines"
expected = "This has multiple spaces and newlines"
self.assertEqual(normalize_text(input_text), expected)
def test_character_replacement(self):
"""Test that fancy characters are replaced with ASCII equivalents"""
input_text = "This has 'fancy' “quotes” and—dashes"
expected = "This has 'fancy' \"quotes\" and-dashes"
self.assertEqual(normalize_text(input_text), expected)
def test_markdown1(self):
"""Test that fancy characters are replaced with ASCII equivalents"""
input_text = "this is *bold*"
expected = "this is bold"
self.assertEqual(normalize_text(input_text), expected)
def test_markdown2(self):
"""Test that fancy characters are replaced with ASCII equivalents"""
input_text = "_italic__ is *bold*"
expected = "italic_ is bold"
self.assertEqual(normalize_text(input_text), expected)
def test_empty_input(self):
"""Test that empty input returns empty output"""
self.assertEqual(normalize_text(""), "")
def test_brs(self):
"""Test that empty input returns empty output"""
self.assertEqual(normalize_text("Hello
everyone"), "Hello everyone")
self.assertEqual(normalize_text("Hello
everyone"), normalize_text("Hello\neveryone"))
self.assertEqual(normalize_text("Hello
everyone"), "Hello everyone")
self.assertEqual(normalize_text("Hello
everyone"), normalize_text("Hello\neveryone"))
def test_two_stars(self):
self.assertEqual(
normalize_text(
"**Georges V.** (2007) – *Le Forez du VIe au IVe millénaire av. J.-C. Territoires, identités et stratégies des sociétés humaines du Massif central dans le bassin amont de la Loire (France)*, thèse de doctorat, université de Bourgogne, Dijon, 2 vol., 435 p."
),
"Georges V. (2007) - Le Forez du VIe au IVe millénaire av. J.-C. Territoires, identités et stratégies des sociétés humaines du Massif central dans le bassin amont de la Loire (France), thèse de doctorat, université de Bourgogne, Dijon, 2 vol., 435 p.",
)
class TestBasePDFTest(unittest.TestCase):
"""Test the BasePDFTest class"""
def test_valid_initialization(self):
"""Test that a valid initialization works"""
test = BasePDFTest(pdf="test.pdf", page=1, id="test_id", type=TestType.BASELINE.value)
self.assertEqual(test.pdf, "test.pdf")
self.assertEqual(test.page, 1)
self.assertEqual(test.id, "test_id")
self.assertEqual(test.type, TestType.BASELINE.value)
self.assertEqual(test.max_diffs, 0)
self.assertIsNone(test.checked)
self.assertIsNone(test.url)
def test_empty_pdf(self):
"""Test that empty PDF raises ValidationError"""
with self.assertRaises(ValidationError):
BasePDFTest(pdf="", page=1, id="test_id", type=TestType.BASELINE.value)
def test_empty_id(self):
"""Test that empty ID raises ValidationError"""
with self.assertRaises(ValidationError):
BasePDFTest(pdf="test.pdf", page=1, id="", type=TestType.BASELINE.value)
def test_negative_max_diffs(self):
"""Test that negative max_diffs raises ValidationError"""
with self.assertRaises(ValidationError):
BasePDFTest(pdf="test.pdf", page=1, id="test_id", type=TestType.BASELINE.value, max_diffs=-1)
def test_invalid_test_type(self):
"""Test that invalid test type raises ValidationError"""
with self.assertRaises(ValidationError):
BasePDFTest(pdf="test.pdf", page=1, id="test_id", type="invalid_type")
def test_run_method_not_implemented(self):
"""Test that run method raises NotImplementedError"""
test = BasePDFTest(pdf="test.pdf", page=1, id="test_id", type=TestType.BASELINE.value)
with self.assertRaises(NotImplementedError):
test.run("content")
def test_checked_enum(self):
"""Test that checked accepts valid TestChecked enums"""
test = BasePDFTest(pdf="test.pdf", page=1, id="test_id", type=TestType.BASELINE.value, checked=TestChecked.VERIFIED)
self.assertEqual(test.checked, TestChecked.VERIFIED)
class TestTextPresenceTest(unittest.TestCase):
"""Test the TextPresenceTest class"""
def test_valid_present_test(self):
"""Test that a valid PRESENT test initializes correctly"""
test = TextPresenceTest(pdf="test.pdf", page=1, id="test_id", type=TestType.PRESENT.value, text="test text")
self.assertEqual(test.text, "test text")
self.assertTrue(test.case_sensitive)
self.assertIsNone(test.first_n)
self.assertIsNone(test.last_n)
def test_valid_absent_test(self):
"""Test that a valid ABSENT test initializes correctly"""
test = TextPresenceTest(pdf="test.pdf", page=1, id="test_id", type=TestType.ABSENT.value, text="test text", case_sensitive=False)
self.assertEqual(test.text, "test text")
self.assertFalse(test.case_sensitive)
def test_empty_text(self):
"""Test that empty text raises ValidationError"""
with self.assertRaises(ValidationError):
TextPresenceTest(pdf="test.pdf", page=1, id="test_id", type=TestType.PRESENT.value, text="")
def test_present_text_exact_match(self):
"""Test that PRESENT test returns True for exact match"""
test = TextPresenceTest(pdf="test.pdf", page=1, id="test_id", type=TestType.PRESENT.value, text="target text")
result, _ = test.run("This is some target text in a document")
self.assertTrue(result)
def test_present_text_not_found(self):
"""Test that PRESENT test returns False when text not found"""
test = TextPresenceTest(pdf="test.pdf", page=1, id="test_id", type=TestType.PRESENT.value, text="missing text")
result, explanation = test.run("This document doesn't have the target")
self.assertFalse(result)
self.assertIn("missing text", explanation)
def test_present_text_with_max_diffs(self):
"""Test that PRESENT test with max_diffs handles fuzzy matching"""
test = TextPresenceTest(pdf="test.pdf", page=1, id="test_id", type=TestType.PRESENT.value, text="target text", max_diffs=2)
result, _ = test.run("This is some targett textt in a document")
self.assertTrue(result)
def test_absent_text_found(self):
"""Test that ABSENT test returns False when text is found"""
test = TextPresenceTest(pdf="test.pdf", page=1, id="test_id", type=TestType.ABSENT.value, text="target text")
result, explanation = test.run("This is some target text in a document")
self.assertFalse(result)
self.assertIn("target text", explanation)
def test_absent_text_found_diffs(self):
"""Test that ABSENT test returns False when text is found"""
test = TextPresenceTest(pdf="test.pdf", page=1, id="test_id", type=TestType.ABSENT.value, text="target text", max_diffs=2)
result, explanation = test.run("This is some target text in a document")
self.assertFalse(result)
result, explanation = test.run("This is some targett text in a document")
self.assertFalse(result)
result, explanation = test.run("This is some targettt text in a document")
self.assertFalse(result)
result, explanation = test.run("This is some targetttt text in a document")
self.assertTrue(result)
def test_absent_text_not_found(self):
"""Test that ABSENT test returns True when text is not found"""
test = TextPresenceTest(pdf="test.pdf", page=1, id="test_id", type=TestType.ABSENT.value, text="missing text")
result, _ = test.run("This document doesn't have the target")
self.assertTrue(result)
def test_case_insensitive_present(self):
"""Test that case_sensitive=False works for PRESENT test"""
test = TextPresenceTest(pdf="test.pdf", page=1, id="test_id", type=TestType.PRESENT.value, text="TARGET TEXT", case_sensitive=False)
result, _ = test.run("This is some target text in a document")
self.assertTrue(result)
def test_case_insensitive_absent(self):
"""Test that case_sensitive=False works for ABSENT test"""
test = TextPresenceTest(pdf="test.pdf", page=1, id="test_id", type=TestType.ABSENT.value, text="TARGET TEXT", case_sensitive=False)
result, explanation = test.run("This is some target text in a document")
self.assertFalse(result)
def test_first_n_limit(self):
"""Test that first_n parameter works correctly"""
test = TextPresenceTest(pdf="test.pdf", page=1, id="test_id", type=TestType.PRESENT.value, text="beginning", first_n=20)
result, _ = test.run("beginning of text, but not the end")
self.assertTrue(result)
# Test that text beyond first_n isn't matched
test = TextPresenceTest(pdf="test.pdf", page=1, id="test_id", type=TestType.PRESENT.value, text="end", first_n=20)
result, _ = test.run("beginning of text, but not the end")
self.assertFalse(result)
def test_last_n_limit(self):
"""Test that last_n parameter works correctly"""
test = TextPresenceTest(pdf="test.pdf", page=1, id="test_id", type=TestType.PRESENT.value, text="end", last_n=20)
result, _ = test.run("beginning of text, but not the end")
self.assertTrue(result)
# Test that text beyond last_n isn't matched
test = TextPresenceTest(pdf="test.pdf", page=1, id="test_id", type=TestType.PRESENT.value, text="beginning", last_n=20)
result, _ = test.run("beginning of text, but not the end")
self.assertFalse(result)
def test_both_first_and_last_n(self):
"""Test that combining first_n and last_n works correctly"""
test = TextPresenceTest(pdf="test.pdf", page=1, id="test_id", type=TestType.PRESENT.value, text="beginning", first_n=15, last_n=10)
result, _ = test.run("beginning of text, middle part, but not the end")
self.assertTrue(result)
# Text only in middle shouldn't be found
test = TextPresenceTest(pdf="test.pdf", page=1, id="test_id", type=TestType.PRESENT.value, text="middle", first_n=15, last_n=10)
result, _ = test.run("beginning of text, middle part, but not the end")
self.assertFalse(result)
def test_unicode_normalized_forms(self):
"""Test that e+accent == e_with_accent unicode chars"""
test = TextPresenceTest(pdf="test.pdf", page=1, id="test_id", type=TestType.PRESENT.value, text="I like to eat at a caf\u00e9")
result, _ = test.run("I like to eat at a caf\u00e9")
self.assertTrue(result)
result, _ = test.run("I like to eat at a cafe\u0301")
self.assertTrue(result)
class TestTextOrderTest(unittest.TestCase):
"""Test the TextOrderTest class"""
def test_valid_initialization(self):
"""Test that valid initialization works"""
test = TextOrderTest(pdf="test.pdf", page=1, id="test_id", type=TestType.ORDER.value, before="first text", after="second text")
self.assertEqual(test.before, "first text")
self.assertEqual(test.after, "second text")
def test_invalid_test_type(self):
"""Test that invalid test type raises ValidationError"""
with self.assertRaises(ValidationError):
TextOrderTest(pdf="test.pdf", page=1, id="test_id", type=TestType.PRESENT.value, before="first text", after="second text")
def test_empty_before(self):
"""Test that empty before text raises ValidationError"""
with self.assertRaises(ValidationError):
TextOrderTest(pdf="test.pdf", page=1, id="test_id", type=TestType.ORDER.value, before="", after="second text")
def test_empty_after(self):
"""Test that empty after text raises ValidationError"""
with self.assertRaises(ValidationError):
TextOrderTest(pdf="test.pdf", page=1, id="test_id", type=TestType.ORDER.value, before="first text", after="")
def test_correct_order(self):
"""Test that correct order returns True"""
test = TextOrderTest(pdf="test.pdf", page=1, id="test_id", type=TestType.ORDER.value, before="first", after="second")
result, _ = test.run("This has first and then second in correct order")
self.assertTrue(result)
def test_incorrect_order(self):
"""Test that incorrect order returns False"""
test = TextOrderTest(pdf="test.pdf", page=1, id="test_id", type=TestType.ORDER.value, before="second", after="first")
result, explanation = test.run("This has first and then second in correct order")
self.assertFalse(result)
def test_before_not_found(self):
"""Test that 'before' text not found returns False"""
test = TextOrderTest(pdf="test.pdf", page=1, id="test_id", type=TestType.ORDER.value, before="missing", after="present")
result, explanation = test.run("This text has present but not the other word")
self.assertFalse(result)
def test_after_not_found(self):
"""Test that 'after' text not found returns False"""
test = TextOrderTest(pdf="test.pdf", page=1, id="test_id", type=TestType.ORDER.value, before="present", after="missing")
result, explanation = test.run("This text has present but not the other word")
self.assertFalse(result)
def test_max_diffs(self):
"""Test that max_diffs parameter works correctly"""
test = TextOrderTest(pdf="test.pdf", page=1, id="test_id", type=TestType.ORDER.value, before="first", after="second", max_diffs=1)
result, _ = test.run("This has firsst and then secand in correct order")
self.assertTrue(result)
def test_multiple_occurrences(self):
"""Test that multiple occurrences are handled correctly"""
test = TextOrderTest(pdf="test.pdf", page=1, id="test_id", type=TestType.ORDER.value, before="target", after="target")
result, _ = test.run("This has target and then target again")
self.assertTrue(result)
# Test reverse direction fails
test = TextOrderTest(pdf="test.pdf", page=1, id="test_id", type=TestType.ORDER.value, before="B", after="A")
result, _ = test.run("A B A B") # A comes before B, but B also comes before second A
self.assertTrue(result)
class TestTableTest(unittest.TestCase):
"""Test the TableTest class"""
def setUp(self):
"""Set up test fixtures"""
self.markdown_table = """
| Header 1 | Header 2 | Header 3 |
| -------- | -------- | -------- |
| Cell A1 | Cell A2 | Cell A3 |
| Cell B1 | Cell B2 | Cell B3 |
"""
self.html_table = """
Header 1 |
Header 2 |
Header 3 |
Cell A1 |
Cell A2 |
Cell A3 |
Cell B1 |
Cell B2 |
Cell B3 |
"""
def test_valid_initialization(self):
"""Test that valid initialization works"""
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="target cell")
self.assertEqual(test.cell, "target cell")
self.assertEqual(test.up, "")
self.assertEqual(test.down, "")
self.assertEqual(test.left, "")
self.assertEqual(test.right, "")
self.assertEqual(test.top_heading, "")
self.assertEqual(test.left_heading, "")
def test_invalid_test_type(self):
"""Test that invalid test type raises ValidationError"""
with self.assertRaises(ValidationError):
TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.PRESENT.value, cell="target cell")
def test_parse_markdown_tables(self):
"""Test markdown table parsing"""
_test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="Cell A2")
tables = parse_markdown_tables(self.markdown_table)
self.assertEqual(len(tables), 1)
self.assertEqual(tables[0].data.shape, (3, 3)) # 3 rows, 3 columns
self.assertEqual(tables[0].data[0, 0], "Header 1")
self.assertEqual(tables[0].data[1, 1], "Cell A2")
self.assertEqual(tables[0].data[2, 2], "Cell B3")
def test_parse_html_tables(self):
"""Test HTML table parsing"""
_test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="Cell A2")
tables = parse_html_tables(self.html_table)
self.assertEqual(len(tables), 1)
self.assertEqual(tables[0].data.shape, (3, 3)) # 3 rows, 3 columns
self.assertEqual(tables[0].data[0, 0], "Header 1")
self.assertEqual(tables[0].data[1, 1], "Cell A2")
self.assertEqual(tables[0].data[2, 2], "Cell B3")
def test_match_cell(self):
"""Test finding a cell in a table"""
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="Cell A2")
result, _ = test.run(self.markdown_table)
self.assertTrue(result)
def test_cell_not_found(self):
"""Test cell not found in table"""
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="Missing Cell")
result, explanation = test.run(self.markdown_table)
self.assertFalse(result)
self.assertIn("No cell matching", explanation)
def test_up_relationship(self):
"""Test up relationship in table"""
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="Cell A2", up="Header 2")
result, _ = test.run(self.markdown_table)
self.assertTrue(result)
# Test incorrect up relationship
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="Cell A2", up="Wrong Header")
result, explanation = test.run(self.markdown_table)
self.assertFalse(result)
self.assertIn("doesn't match expected", explanation)
def test_down_relationship(self):
"""Test down relationship in table"""
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="Cell A2", down="Cell B2")
result, _ = test.run(self.markdown_table)
self.assertTrue(result)
# Test incorrect down relationship
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="Cell A2", down="Wrong Cell")
result, explanation = test.run(self.markdown_table)
self.assertFalse(result)
self.assertIn("doesn't match expected", explanation)
def test_left_relationship(self):
"""Test left relationship in table"""
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="Cell A2", left="Cell A1")
result, _ = test.run(self.markdown_table)
self.assertTrue(result)
# Test incorrect left relationship
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="Cell A2", left="Wrong Cell")
result, explanation = test.run(self.markdown_table)
self.assertFalse(result)
self.assertIn("doesn't match expected", explanation)
def test_right_relationship(self):
"""Test right relationship in table"""
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="Cell A2", right="Cell A3")
result, _ = test.run(self.markdown_table)
self.assertTrue(result)
# Test incorrect right relationship
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="Cell A2", right="Wrong Cell")
result, explanation = test.run(self.markdown_table)
self.assertFalse(result)
self.assertIn("doesn't match expected", explanation)
def test_top_heading_relationship(self):
"""Test top_heading relationship in table"""
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="Cell B2", top_heading="Header 2")
result, _ = test.run(self.markdown_table)
self.assertTrue(result)
# Test incorrect top_heading relationship
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="Cell B2", top_heading="Wrong Header")
result, explanation = test.run(self.markdown_table)
self.assertFalse(result)
self.assertIn("doesn't match expected", explanation)
def test_left_heading_relationship(self):
"""Test left_heading relationship in table"""
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="Cell A3", left_heading="Cell A1")
result, _ = test.run(self.markdown_table)
self.assertTrue(result)
# Test incorrect left_heading relationship
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="Cell A3", left_heading="Wrong Cell")
result, explanation = test.run(self.markdown_table)
self.assertFalse(result)
self.assertIn("doesn't match expected", explanation)
def test_multiple_relationships(self):
"""Test multiple relationships in table"""
test = TableTest(
pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="Cell A2", up="Header 2", down="Cell B2", left="Cell A1", right="Cell A3"
)
result, _ = test.run(self.markdown_table)
self.assertTrue(result)
# Test one incorrect relationship
test = TableTest(
pdf="test.pdf",
page=1,
id="test_id",
type=TestType.TABLE.value,
cell="Cell A2",
up="Header 2",
down="Cell B2",
left="Wrong Cell", # This is incorrect
right="Cell A3",
)
result, explanation = test.run(self.markdown_table)
self.assertFalse(result)
self.assertIn("doesn't match expected", explanation)
def test_no_tables_found(self):
"""Test behavior when no tables are found"""
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="Cell A2")
result, explanation = test.run("This is plain text with no tables")
self.assertFalse(result)
self.assertEqual(explanation, "No tables found in the content")
def test_fuzzy_matching(self):
"""Test fuzzy matching with max_diffs"""
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="Cell A2", max_diffs=1)
# Create table with slightly misspelled cell
misspelled_table = self.markdown_table.replace("Cell A2", "Cel A2")
result, _ = test.run(misspelled_table)
self.assertTrue(result)
def test_with_stripped_content(self):
"""Test table parsing with stripped content"""
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="Cell A2")
# Strip all leading/trailing whitespace from the markdown table
stripped_table = self.markdown_table.strip()
result, explanation = test.run(stripped_table)
self.assertTrue(result, f"Table test failed with stripped content: {explanation}")
def test_table_at_end_of_file(self):
"""Test that a table at the very end of the file is correctly detected"""
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="Cell A2")
# Create content with text followed by a table at the very end with no trailing newline
content_with_table_at_end = "Some text before the table.\n" + self.markdown_table.strip()
result, explanation = test.run(content_with_table_at_end)
self.assertTrue(result, f"Table at end of file not detected: {explanation}")
def test_table_at_end_with_no_trailing_newline(self):
"""Test that a table at the end with no trailing newline is detected"""
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="Cell A2")
# Remove the trailing newline from the markdown table
content_without_newline = self.markdown_table.rstrip()
result, explanation = test.run(content_without_newline)
self.assertTrue(result, f"Table without trailing newline not detected: {explanation}")
def test_table_at_end_with_extra_spaces(self):
"""Test that a table at the end with extra spaces is detected"""
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="Cell A2")
# Add extra spaces to the end of lines in the table
lines = self.markdown_table.split("\n")
content_with_extra_spaces = "\n".join([line + " " for line in lines])
result, explanation = test.run(content_with_extra_spaces)
self.assertTrue(result, f"Table with extra spaces not detected: {explanation}")
def test_table_at_end_with_mixed_whitespace(self):
"""Test that a table at the end with mixed whitespace is detected"""
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="Cell A2")
# Add various whitespace characters to the table
content_with_mixed_whitespace = "Some text before the table.\n" + self.markdown_table.strip() + " \t "
result, explanation = test.run(content_with_mixed_whitespace)
self.assertTrue(result, f"Table with mixed whitespace not detected: {explanation}")
def test_malformed_table_at_end(self):
"""Test that a slightly malformed table at the end is still detected"""
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="Cell A2")
# Create a table with irregular pipe placement at the end
malformed_table = """
Some text before the table.
| Header 1 | Header 2 | Header 3
| -------- | -------- | --------
| Cell A1 | Cell A2 | Cell A3 |
| Cell B1 | Cell B2 | Cell B3"""
result, explanation = test.run(malformed_table)
self.assertTrue(result, f"Malformed table at end not detected: {explanation}")
def test_incomplete_table_at_end(self):
"""Test that an incomplete table at the end still gets detected if it contains valid rows"""
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="Cell A2")
# Missing the separator row
incomplete_table = """
Some text before the table.
| Header 1 | Header 2 | Header 3 |
| Cell A1 | Cell A2 | Cell A3 |
| Cell B1 | Cell B2 | Cell B3 |"""
result, explanation = test.run(incomplete_table)
self.assertTrue(result, f"Incomplete table at end not detected: {explanation}")
def test_table_with_excessive_blank_lines_at_end(self):
"""Test that a table followed by many blank lines is detected"""
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="Cell A2")
# Add many blank lines after the table
table_with_blanks = self.markdown_table + "\n\n\n\n\n\n\n\n\n\n"
result, explanation = test.run(table_with_blanks)
self.assertTrue(result, f"Table with blank lines at end not detected: {explanation}")
def test_table_at_end_after_long_text(self):
"""Test that a table at the end after a very long text is detected"""
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="Cell A2")
# Create a very long text before the table
long_text = "Lorem ipsum dolor sit amet, " * 100
content_with_long_text = long_text + "\n" + self.markdown_table.strip()
result, explanation = test.run(content_with_long_text)
self.assertTrue(result, f"Table after long text not detected: {explanation}")
def test_valid_table_at_eof_without_newline(self):
"""Test that a valid table at EOF without a trailing newline is detected"""
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="Cell A2")
# Valid table but without trailing newline at the very end of the file
valid_table_eof = """
| Header 1 | Header 2 | Header 3 |
| -------- | -------- | -------- |
| Cell A1 | Cell A2 | Cell A3 |
| Cell B1 | Cell B2 | Cell B3 |""".strip()
result, explanation = test.run(valid_table_eof)
self.assertTrue(result, f"Valid table at EOF without newline not detected: {explanation}")
def test_normalizing(self):
table = """| Question - – Satisfaction on scale of 10 | Response | Resident Sample | Business Sample |
|----------------------------------------|----------|----------------|-----------------|
| Planning for and managing residential, commercial and industrial development | Rating of 8, 9 or 10 | 13% | 11% |
| | Average rating | 6.4 | 5.7 |
| | Don’t know responses | 11% | 6% |
| Environmental protection, support for green projects (e.g. green grants, building retrofits programs, zero waste) | Rating of 8, 9 or 10 | 35% | 34% |
| | Average rating | 8.0 | 7.5 |
| | Don’t know responses | 8% | 6% |
| Providing and maintaining parks and green spaces | Rating of 8, 9 or 10 | 42% | 41% |
| | Average rating | 7.7 | 7.3 |
| | Don’t know responses | 1% | 1% |"""
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="6%", top_heading="Business\nSample")
result, explanation = test.run(table)
self.assertTrue(result, explanation)
def test_mathematical_minus(self):
table = """| Response | Chinese experimenter | White experimenter |
|----------|----------------------|--------------------|
| | Divided attention | Full attention | Divided attention | Full attention |
| Nonverbal| −.34 (.22) | .54* (.17) | .12 (.27) | −.20 (.24) |
| Verbal | −.25 (.23) | .36 (.20) | .12 (.27) | −.34 (.22) |
"""
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="-.34 (.22)")
result, explanation = test.run(table)
self.assertTrue(result, explanation)
def test_markdown_marker(self):
table = """| CATEGORY | POINTS EARNED |
|------------------------------|------------------|
| Sustainable Sites | 9 |
| Water Efficiency | 3 |
| Energy & Atmosphere | 12 |
| Materials & Resources | 6 |
| Indoor Environmental Quality | 11 |
| Innovation & Design Process | 5 |
| TOTAL | 46 |"""
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="9", up="POINTS EARNED")
result, explanation = test.run(table)
self.assertTrue(result, explanation)
def test_diffs(self):
table = """| CATEGORY | POINTS EARNED |
|------------------------------|------------------|
| Sustainable Sites | 9 |
| Water Efficiency | 3 |
| Energy & Atmosphere | 12 |
| Materials & Resources | 6 |
| Indoor Environmental Quality | 11 |
| Innovation & Design Process | 5 |
| TOTAL | 46 |"""
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="9", left="Sustl Sie", max_diffs=2)
result, explanation = test.run(table)
self.assertFalse(result, explanation)
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="9", left="Sustainable Site", max_diffs=2)
result, explanation = test.run(table)
self.assertTrue(result, explanation)
def test_markdown_marker2(self):
table = """| Concentration
level | [CO] | [SO2] | [NOx] |
|------------------------|-----------|-------|----------|
| Control | 0 μM | 0 μM | 0 nM |
| Low | 250
μM | 8 μM | 0.002 nM |
| Medium | 625 μM | 20 μM | 0.005 nM |
| High | 1250 μM | 40 μM | 0.01 nM |"""
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="20 μM", up=".002 nM")
result, explanation = test.run(table)
self.assertFalse(result, explanation)
def test_marker3(self):
table = """| | N | Minimum | Maximum | Gemiddelde | Sd |
|-----------------------------------------------|-------|---------|---------|------------|-----|
| Slaapkwaliteit tijdens
gewone nachten | 2017 | 1,0 | 6,0 | 3,9 | 1,0 |
| Slaapkwaliteit tijdens
consignatiediensten | 19816 | 1,0 | 6,0 | 2,8 | 1,2 |
"""
test = TableTest(
pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="2,8", left_heading="Slaapkwaliteit tijdens\nconsignatiediensten"
)
result, explanation = test.run(table)
self.assertFalse(result, explanation)
def test_big_table(self):
table = """
Base: Resident respondents (n=1,315) and Business respondents (n=397)
Question – Satisfaction on scale of 10 |
Response |
Resident Sample |
Business Sample |
Planning for and managing residential, commercial and industrial development |
Rating of 8, 9 or 10 |
13% |
11% |
Average rating |
6.4 |
5.7 |
Don't know responses |
11% |
6% |
Environmental protection, support for green projects (e.g. green grants, building retrofits programs, zero waste) |
Rating of 8, 9 or 10 |
35% |
34% |
Average rating |
8.0 |
7.5 |
Don't know responses |
8% |
6% |
Providing and maintaining parks and green spaces |
Rating of 8, 9 or 10 |
42% |
41% |
Average rating |
7.7 |
7.3 |
Don't know responses |
1% |
1% |
"""
test = TableTest(
pdf="test.pdf",
page=1,
id="test_id",
type=TestType.TABLE.value,
max_diffs=5,
cell="Planning for and managing residential, commercial and industrial development",
down="Environmental protection,\nsupport for green projects\n(e.g. green grants,\nbuilding retrofits programs,\nzero waste)",
)
result, explanation = test.run(table)
self.assertTrue(result, explanation)
def test_html_rowspans_colspans(self):
table = """
Product Category |
Product Subcategory |
Quarterly Sales ($000s) |
Annual Total |
Q1 |
Q2 |
Q3 |
Q4 |
Electronics |
Smartphones |
245 |
278 |
312 |
389 |
1,224 |
Laptops |
187 |
192 |
243 |
297 |
919 |
Tablets |
95 |
123 |
135 |
156 |
509 |
Accessories |
64 |
72 |
87 |
105 |
328 |
Home Appliances |
Refrigerators |
132 |
145 |
151 |
162 |
590 |
Washing Machines |
98 |
112 |
127 |
143 |
480 |
Microwaves |
54 |
67 |
72 |
84 |
277 |
Furniture |
Sofas |
112 |
128 |
134 |
142 |
516 |
Tables |
87 |
95 |
103 |
124 |
409 |
Chairs |
76 |
84 |
92 |
110 |
362 |
Quarterly Totals |
1,150 |
1,296 |
1,456 |
1,712 |
5,614 |
"""
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="Refrigerators", left="Home Appliances")
result, explanation = test.run(table)
self.assertTrue(result, explanation)
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="Washing Machines", left="Home Appliances")
result, explanation = test.run(table)
self.assertTrue(result, explanation)
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="Microwaves", left="Home Appliances")
result, explanation = test.run(table)
self.assertTrue(result, explanation)
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="Sofas", top_heading="Product Subcategory")
result, explanation = test.run(table)
self.assertTrue(result, explanation)
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="135", top_heading="Q3")
result, explanation = test.run(table)
self.assertTrue(result, explanation)
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="135", top_heading="Quarterly Sales ($000s)")
result, explanation = test.run(table)
self.assertTrue(result, explanation)
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="1,712", top_heading="Quarterly Sales ($000s)")
result, explanation = test.run(table)
self.assertTrue(result, explanation)
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="135", top_heading="Q2")
result, explanation = test.run(table)
self.assertFalse(result, explanation)
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="135", top_heading="Q1")
result, explanation = test.run(table)
self.assertFalse(result, explanation)
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="135", top_heading="Q4")
result, explanation = test.run(table)
self.assertFalse(result, explanation)
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="Home Appliances", top_heading="Product Category")
result, explanation = test.run(table)
self.assertTrue(result, explanation)
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="Washing Machines", top_heading="Product Category")
result, explanation = test.run(table)
self.assertFalse(result, explanation)
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="Washing Machines", top_heading="Q3")
result, explanation = test.run(table)
self.assertFalse(result, explanation)
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="Washing Machines", top_heading="Quarterly Sales ($000s)")
result, explanation = test.run(table)
self.assertFalse(result, explanation)
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="Electronics", right="Laptops")
result, explanation = test.run(table)
self.assertTrue(result, explanation)
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="Electronics", right="Accessories")
result, explanation = test.run(table)
self.assertTrue(result, explanation)
# TODO Skipping these for now
# test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="Quarterly Sales ($000s)", down="Q2")
# result, explanation = test.run(table)
# self.assertTrue(result, explanation)
# test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="Q2", up="Quarterly Sales ($000s)")
# result, explanation = test.run(table)
# self.assertTrue(result, explanation)
def test_multiple_markdown_tables(self):
"""Test that we can find and verify cells in multiple markdown tables in one document"""
content = """
# First Table
| Name | Age | Role |
| ---- | --- | ---- |
| John | 28 | Developer |
| Jane | 32 | Designer |
| Bob | 45 | Manager |
Some text between tables...
# Second Table
| Department | Budget | Employees |
| ---------- | ------ | --------- |
| Engineering | 1.2M | 15 |
| Design | 0.8M | 8 |
| Marketing | 1.5M | 12 |
| HR | 0.5M | 5 |
"""
# Test cells in the first table
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="John", right="28")
result, explanation = test.run(content)
self.assertTrue(result, explanation)
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="32", left="Jane")
result, explanation = test.run(content)
self.assertTrue(result, explanation)
# Test cells in the second table
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="Engineering", right="1.2M")
result, explanation = test.run(content)
self.assertTrue(result, explanation)
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="12", left="1.5M")
result, explanation = test.run(content)
self.assertTrue(result, explanation)
# Verify top headings work correctly across tables
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="Bob", top_heading="Name")
result, explanation = test.run(content)
self.assertTrue(result, explanation)
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="HR", top_heading="Department")
result, explanation = test.run(content)
self.assertTrue(result, explanation)
def test_multiple_html_tables(self):
"""Test that we can find and verify cells in multiple HTML tables in one document"""
content = """
First Table
Country |
Capital |
Population |
USA |
Washington DC |
331M |
France |
Paris |
67M |
Japan |
Tokyo |
126M |
Some text between tables...
Second Table
Company |
Industry |
Revenue |
Employees |
ABC Corp |
Technology |
$5B |
10,000 |
XYZ Inc |
Healthcare |
$2.5B |
8,500 |
Acme Co |
Manufacturing |
$1.8B |
15,000 |
Global LLC |
Finance |
$3.2B |
6,200 |
"""
# Test cells in the first table
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="USA", right="Washington DC")
result, explanation = test.run(content)
self.assertTrue(result, explanation)
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="126M", left="Tokyo")
result, explanation = test.run(content)
self.assertTrue(result, explanation)
# Test cells in the second table
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="XYZ Inc", right="Healthcare")
result, explanation = test.run(content)
self.assertTrue(result, explanation)
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="15,000", left="$1.8B")
result, explanation = test.run(content)
self.assertTrue(result, explanation)
# Verify top headings work correctly across tables
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="Tokyo", top_heading="Capital")
result, explanation = test.run(content)
self.assertTrue(result, explanation)
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="Finance", top_heading="Industry")
result, explanation = test.run(content)
self.assertTrue(result, explanation)
def test_mixed_markdown_and_html_tables(self):
"""Test that we can find and verify cells in mixed markdown and HTML tables in one document"""
content = """
# Markdown Table
| Product | Price | Quantity |
| ------- | ----- | -------- |
| Apple | $1.20 | 100 |
| Orange | $0.80 | 150 |
| Banana | $0.60 | 200 |
HTML Table
Month |
Income |
Expenses |
Profit |
January |
$10,000 |
$8,000 |
$2,000 |
February |
$12,000 |
$9,500 |
$2,500 |
March |
$15,000 |
$10,200 |
$4,800 |
"""
# Test cells in the markdown table
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="Orange", right="$0.80")
result, explanation = test.run(content)
self.assertTrue(result, explanation)
# Test cells in the HTML table
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="February", right="$12,000")
result, explanation = test.run(content)
self.assertTrue(result, explanation)
# Verify we can find cells with specific top headings
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="100", top_heading="Quantity")
result, explanation = test.run(content)
self.assertTrue(result, explanation)
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="$4,800", top_heading="Profit")
result, explanation = test.run(content)
self.assertTrue(result, explanation)
def test_br_tags_replacement(self):
"""Test that
and
tags are correctly replaced with newlines"""
table = """
Header 1 |
Header 2 |
Line 1 Line 2 Line 3 |
Single line |
"""
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="Line 1 Line 2 Line 3")
result, explanation = test.run(table)
self.assertTrue(result, explanation)
def test_real_complicated_table(self):
table = """
Table 1 Differences in diagnoses, gender and family status for participants with a suicide attempt and those without a suicide attempt within the 12-month follow-up interval |
|
|
|
|
|
|
|
F0 |
1 |
0.76 |
0 |
0.00 |
0.00 |
1 |
1.00 |
F1 |
17 |
12.88 |
12 |
27.91 |
4.39 |
1 |
0.04 |
F2 |
1 |
0.76 |
0 |
0.00 |
0.00 |
1 |
1.00 |
F3 |
106 |
80.30 |
31 |
72.09 |
0.74 |
1 |
0.39 |
F4 |
42 |
31.82 |
17 |
39.53 |
0.61 |
1 |
0.43 |
F5 |
5 |
3.79 |
5 |
11.63 |
2.44 |
1 |
0.12 |
F6 |
20 |
15.15 |
19 |
44.19 |
14.48 |
1 |
0.00 |
F7 |
0 |
0.00 |
0 |
0.00 |
— |
— |
— |
F8 |
1 |
0.76 |
0 |
0.00 |
0.00 |
1 |
1.00 |
F9 |
2 |
1.52 |
1 |
2.33 |
0.00 |
1 |
1.00 |
|
|
|
|
3.09 |
2 |
0.21 |
Female |
75 |
56.8 |
24 |
55.8 |
|
|
|
Male |
57 |
43.2 |
18 |
41.9 |
|
|
|
Diverse |
0 |
0 |
1 |
2.3 |
|
|
|
|
|
|
|
4.87 |
4 |
0.30 |
Single |
55 |
41.7 |
14 |
32.6 |
|
|
|
Partnership |
25 |
18.9 |
9 |
20.9 |
|
|
|
Married |
27 |
20.5 |
5 |
11.6 |
|
|
|
Divorced |
20 |
15.2 |
11 |
25.6 |
|
|
|
Widowed |
1 |
0.8 |
1 |
2.3 |
|
|
|
"""
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="4.39", top_heading="χ2")
result, explanation = test.run(table)
self.assertTrue(result, explanation)
test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="12.88", top_heading="%")
result, explanation = test.run(table)
self.assertTrue(result, explanation)
# Account for the superscript in the header
test = TableTest(
pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="12.88", top_heading="Participants with no suicide attempt (n = 132)a"
)
result, explanation = test.run(table)
self.assertTrue(result, explanation)
test = TableTest(
pdf="test.pdf",
page=1,
id="test_id",
type=TestType.TABLE.value,
cell="12.88",
top_heading="Table 1 Differences in diagnoses, gender and family status for participants with a suicide attempt and those without a suicide attempt within the 12-month follow-up interval",
)
result, explanation = test.run(table)
self.assertTrue(result, explanation)
class TestBaselineTest(unittest.TestCase):
"""Test the BaselineTest class"""
def test_valid_initialization(self):
"""Test that valid initialization works"""
test = BaselineTest(pdf="test.pdf", page=1, id="test_id", type=TestType.BASELINE.value, max_repeats=50)
self.assertEqual(test.max_repeats, 50)
def test_non_empty_content(self):
"""Test that non-empty content passes"""
test = BaselineTest(pdf="test.pdf", page=1, id="test_id", type=TestType.BASELINE.value)
result, _ = test.run("This is some normal content")
self.assertTrue(result)
def test_empty_content(self):
"""Test that empty content fails"""
test = BaselineTest(pdf="test.pdf", page=1, id="test_id", type=TestType.BASELINE.value)
result, explanation = test.run(" \n\t ")
self.assertFalse(result)
self.assertIn("no alpha numeric characters", explanation)
def test_repeating_content(self):
"""Test that highly repeating content fails"""
test = BaselineTest(pdf="test.pdf", page=1, id="test_id", type=TestType.BASELINE.value, max_repeats=2)
# Create highly repeating content - repeat "abc" many times
repeating_content = "abc" * 10
result, explanation = test.run(repeating_content)
self.assertFalse(result)
self.assertIn("repeating", explanation)
def test_content_with_disallowed_characters(self):
"""Test that content with disallowed characters fails"""
test = BaselineTest(pdf="test.pdf", page=1, id="test_id", type=TestType.BASELINE.value)
result, explanation = test.run("This has Chinese characters: 你好")
self.assertFalse(result)
self.assertIn("disallowed characters", explanation)
def test_content_with_emoji(self):
"""Test that content with emoji fails"""
test = BaselineTest(pdf="test.pdf", page=1, id="test_id", type=TestType.BASELINE.value)
result, explanation = test.run("This has emoji: 😊")
self.assertFalse(result)
self.assertIn("disallowed characters", explanation)
self.assertIn("😊", explanation)
def test_content_with_mandarin(self):
test = BaselineTest(pdf="test.pdf", page=1, id="test_id", type=TestType.BASELINE.value)
result, explanation = test.run("asdfasdfas維基百科/中文asdfw")
self.assertFalse(result)
self.assertIn("disallowed characters", explanation)
def test_valid_content(self):
"""Test that valid content passes all checks"""
test = BaselineTest(pdf="test.pdf", page=1, id="test_id", type=TestType.BASELINE.value)
content = "This is some normal content with proper English letters and no suspicious repetition."
result, _ = test.run(content)
self.assertTrue(result)
class TestMathTest(unittest.TestCase):
"""Test the MathTest class"""
def test_valid_initialization(self):
"""Test that valid initialization works"""
try:
test = MathTest(pdf="test.pdf", page=1, id="test_id", type=TestType.MATH.value, math="a + b = c")
self.assertEqual(test.math, "a + b = c")
except Exception as e:
self.fail(f"Valid initialization failed with: {e}")
def test_invalid_test_type(self):
"""Test that invalid test type raises ValidationError"""
with self.assertRaises(ValidationError):
MathTest(pdf="test.pdf", page=1, id="test_id", type=TestType.PRESENT.value, math="a + b = c")
def test_empty_math(self):
"""Test that empty math raises ValidationError"""
with self.assertRaises(ValidationError):
MathTest(pdf="test.pdf", page=1, id="test_id", type=TestType.MATH.value, math="")
def test_exact_math_match(self):
"""Test exact match of math equation"""
try:
test = MathTest(pdf="test.pdf", page=1, id="test_id", type=TestType.MATH.value, math="a + b = c")
# Test content with exact math match
content = "Here is an equation: $$a + b = c$$"
result, _ = test.run(content)
self.assertTrue(result)
except Exception as e:
self.fail(f"Test failed with: {e}")
def test_rendered_math_match(self):
"""Test rendered match of math equation"""
try:
test = MathTest(pdf="test.pdf", page=1, id="test_id", type=TestType.MATH.value, math="a + b = c")
# Test content with different but equivalent math
content = "Here is an equation: $$a+b=c$$"
result, _ = test.run(content)
self.assertTrue(result)
except Exception as e:
self.fail(f"Test failed with: {e}")
def test_no_math_match(self):
"""Test no match of math equation"""
try:
test = MathTest(pdf="test.pdf", page=1, id="test_id", type=TestType.MATH.value, math="a + b = c")
# Test content with no matching math
content = "Here is an equation: $$x + y = z$$"
result, explanation = test.run(content)
self.assertFalse(result)
self.assertIn("No match found", explanation)
except Exception as e:
self.fail(f"Test failed with: {e}")
def test_different_math_delimiters(self):
"""Test different math delimiters"""
try:
test = MathTest(pdf="test.pdf", page=1, id="test_id", type=TestType.MATH.value, math="a + b = c")
# Test different delimiters
delimiters = [
"$$a + b = c$$", # $$...$$
"$a + b = c$", # $...$
"\\(a + b = c\\)", # \(...\)
"\\[a + b = c\\]", # \[...\]
]
for delim in delimiters:
content = f"Here is an equation: {delim}"
result, _ = test.run(content)
self.assertTrue(result)
except Exception as e:
self.fail(f"Test failed with: {e}")
if __name__ == "__main__":
unittest.main()