From d4d87f7c65a12c80eae5cc60573901cbbfdb0f99 Mon Sep 17 00:00:00 2001 From: Jake Poznanski Date: Thu, 3 Apr 2025 20:27:01 +0000 Subject: [PATCH] Force flag for review app, tests fixed for difference comparison in tables --- olmocr/bench/review_app.py | 43 +++++++++++++++++++------------------- olmocr/bench/test_tests.py | 18 ++++++++++++++++ olmocr/bench/tests.py | 21 ++++++++++--------- 3 files changed, 50 insertions(+), 32 deletions(-) diff --git a/olmocr/bench/review_app.py b/olmocr/bench/review_app.py index 9c00fcb..e07e9c6 100644 --- a/olmocr/bench/review_app.py +++ b/olmocr/bench/review_app.py @@ -18,7 +18,7 @@ DATASET_FILE = None CURRENT_PDF = None PDF_TESTS = {} ALL_PDFS = [] - +FORCE = False # New global flag def find_next_unchecked_pdf() -> Optional[str]: """Find the next PDF with at least one unchecked test.""" @@ -31,7 +31,6 @@ def find_next_unchecked_pdf() -> Optional[str]: return pdf_name return None - def calculate_stats() -> dict: """Calculate statistics for all tests in the dataset.""" global PDF_TESTS @@ -59,7 +58,6 @@ def calculate_stats() -> dict: return {"total": total_tests, "null": null_status, "verified": verified_status, "rejected": rejected_status, "completion": completion} - def save_dataset(jsonl_file: str) -> None: """Save the tests to a JSONL file, using temp file for atomic write.""" global PDF_TESTS @@ -77,26 +75,27 @@ def save_dataset(jsonl_file: str) -> None: # Atomic replace shutil.move(temp_file.name, jsonl_file) - @app.route("/pdf/") def serve_pdf(pdf_name): """Serve the PDF file directly.""" pdf_path = os.path.join(DATASET_DIR, "pdfs", pdf_name) return send_file(pdf_path, mimetype="application/pdf") - @app.route("/") def index(): """Main page displaying the current PDF and its tests.""" - global CURRENT_PDF, PDF_TESTS, DATASET_DIR + global CURRENT_PDF, PDF_TESTS, DATASET_DIR, ALL_PDFS, FORCE # If no current PDF is set, find the next one with unchecked tests if CURRENT_PDF is None: CURRENT_PDF = find_next_unchecked_pdf() - # If still no PDF, all tests have been checked + # If still no PDF, either show the "All done" page or force display the first PDF if CURRENT_PDF is None: - return render_template("all_done.html") + if FORCE and ALL_PDFS: + CURRENT_PDF = ALL_PDFS[0] + else: + return render_template("all_done.html") # Get the tests for the current PDF current_tests = PDF_TESTS.get(CURRENT_PDF, []) @@ -117,7 +116,6 @@ def index(): stats=stats, ) - @app.route("/update_test", methods=["POST"]) def update_test(): """API endpoint to update a test.""" @@ -140,7 +138,6 @@ def update_test(): return jsonify({"status": "success"}) - @app.route("/reject_all", methods=["POST"]) def reject_all(): """API endpoint to reject all tests for a PDF.""" @@ -161,24 +158,29 @@ def reject_all(): return jsonify({"status": "error", "message": "PDF not found"}) - @app.route("/next_pdf", methods=["POST"]) def next_pdf(): """Move to the next PDF in the list.""" - global CURRENT_PDF, ALL_PDFS + global CURRENT_PDF, ALL_PDFS, FORCE if CURRENT_PDF in ALL_PDFS: current_index = ALL_PDFS.index(CURRENT_PDF) if current_index < len(ALL_PDFS) - 1: CURRENT_PDF = ALL_PDFS[current_index + 1] else: - CURRENT_PDF = find_next_unchecked_pdf() + # If in force mode, cycle back to the beginning instead of checking for an unchecked PDF + if FORCE and ALL_PDFS: + CURRENT_PDF = ALL_PDFS[0] + else: + CURRENT_PDF = find_next_unchecked_pdf() else: - CURRENT_PDF = find_next_unchecked_pdf() + if FORCE and ALL_PDFS: + CURRENT_PDF = ALL_PDFS[0] + else: + CURRENT_PDF = find_next_unchecked_pdf() return redirect(url_for("index")) - @app.route("/prev_pdf", methods=["POST"]) def prev_pdf(): """Move to the previous PDF in the list.""" @@ -191,7 +193,6 @@ def prev_pdf(): return redirect(url_for("index")) - @app.route("/goto_pdf/", methods=["POST"]) def goto_pdf(index): """Go to a specific PDF by index.""" @@ -202,7 +203,6 @@ def goto_pdf(index): return redirect(url_for("index")) - def load_dataset(dataset_file: str) -> Tuple[Dict[str, List[Dict]], List[str]]: """Load tests from the dataset file and organize them by PDF.""" if not os.path.exists(dataset_file): @@ -228,24 +228,24 @@ def load_dataset(dataset_file: str) -> Tuple[Dict[str, List[Dict]], List[str]]: return pdf_tests, all_pdfs - def create_templates_directory(): """Create templates directory for Flask if it doesn't exist.""" templates_dir = os.path.join(os.path.dirname(__file__), "templates") os.makedirs(templates_dir, exist_ok=True) - def main(): """Main entry point with command-line arguments.""" - global DATASET_DIR, DATASET_FILE, PDF_TESTS, ALL_PDFS, CURRENT_PDF + global DATASET_DIR, DATASET_FILE, PDF_TESTS, ALL_PDFS, CURRENT_PDF, FORCE parser = argparse.ArgumentParser(description="Interactive Test Review App") parser.add_argument("dataset_file", help="Path to the dataset jsonl file") parser.add_argument("--port", type=int, default=5000, help="Port for the Flask app") parser.add_argument("--host", default="127.0.0.1", help="Host for the Flask app") parser.add_argument("--debug", action="store_true", help="Run Flask in debug mode") - + parser.add_argument("--force", action="store_true", help="Force show each file one by one and never do the 'All done' page") + args = parser.parse_args() + FORCE = args.force # Set the global FORCE flag # Validate dataset directory if not os.path.exists(args.dataset_file): @@ -280,6 +280,5 @@ def main(): return 0 - if __name__ == "__main__": sys.exit(main()) diff --git a/olmocr/bench/test_tests.py b/olmocr/bench/test_tests.py index d5f89e3..5b7da6d 100644 --- a/olmocr/bench/test_tests.py +++ b/olmocr/bench/test_tests.py @@ -600,6 +600,24 @@ Some text before the table. result, explanation = test.run(table) self.assertTrue(result, explanation) + def test_diffs(self): + table = """| CATEGORY | POINTS EARNED | +|------------------------------|------------------| +| Sustainable Sites | 9 | +| Water Efficiency | 3 | +| Energy & Atmosphere | 12 | +| Materials & Resources | 6 | +| Indoor Environmental Quality | 11 | +| Innovation & Design Process | 5 | +| TOTAL | 46 |""" + test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="9", left="Sustl Sie", max_diffs=2) + result, explanation = test.run(table) + self.assertFalse(result, explanation) + + test = TableTest(pdf="test.pdf", page=1, id="test_id", type=TestType.TABLE.value, cell="9", left="Sustainable Site", max_diffs=2) + result, explanation = test.run(table) + self.assertTrue(result, explanation) + def test_markdown_marker2(self): table = """| Concentration level | [CO] | [SO2] | [NOx] | diff --git a/olmocr/bench/tests.py b/olmocr/bench/tests.py index e06f7d1..aea2774 100644 --- a/olmocr/bench/tests.py +++ b/olmocr/bench/tests.py @@ -658,6 +658,7 @@ class TableTest(BasePDFTest): # Threshold for fuzzy matching derived from max_diffs threshold = 1.0 - (self.max_diffs / (len(self.cell) if len(self.cell) > 0 else 1)) + threshold = max(0.5, threshold) # Parse tables based on content_type md_tables = parse_markdown_tables(content) @@ -700,7 +701,7 @@ class TableTest(BasePDFTest): if self.up and row_idx > 0: up_cell = normalize_text(table_array[row_idx - 1, col_idx]) up_similarity = fuzz.ratio(self.up, up_cell) / 100.0 - if up_similarity < threshold: + if up_similarity < max(0.5, 1.0 - (self.max_diffs / (len(self.up) if len(self.up) > 0 else 1))): all_relationships_satisfied = False current_failed_reasons.append(f"Cell above '{up_cell}' doesn't match expected '{self.up}' (similarity: {up_similarity:.2f})") @@ -708,7 +709,7 @@ class TableTest(BasePDFTest): if self.down and row_idx < table_array.shape[0] - 1: down_cell = normalize_text(table_array[row_idx + 1, col_idx]) down_similarity = fuzz.ratio(self.down, down_cell) / 100.0 - if down_similarity < threshold: + if down_similarity < max(0.5, 1.0 - (self.max_diffs / (len(self.down) if len(self.down) > 0 else 1))): all_relationships_satisfied = False current_failed_reasons.append(f"Cell below '{down_cell}' doesn't match expected '{self.down}' (similarity: {down_similarity:.2f})") @@ -716,7 +717,7 @@ class TableTest(BasePDFTest): if self.left and col_idx > 0: left_cell = normalize_text(table_array[row_idx, col_idx - 1]) left_similarity = fuzz.ratio(self.left, left_cell) / 100.0 - if left_similarity < threshold: + if left_similarity < max(0.5, 1.0 - (self.max_diffs / (len(self.left) if len(self.left) > 0 else 1))): all_relationships_satisfied = False current_failed_reasons.append( f"Cell to the left '{left_cell}' doesn't match expected '{self.left}' (similarity: {left_similarity:.2f})" @@ -726,7 +727,7 @@ class TableTest(BasePDFTest): if self.right and col_idx < table_array.shape[1] - 1: right_cell = normalize_text(table_array[row_idx, col_idx + 1]) right_similarity = fuzz.ratio(self.right, right_cell) / 100.0 - if right_similarity < threshold: + if right_similarity < max(0.5, 1.0 - (self.max_diffs / (len(self.right) if len(self.right) > 0 else 1))): all_relationships_satisfied = False current_failed_reasons.append( f"Cell to the right '{right_cell}' doesn't match expected '{self.right}' (similarity: {right_similarity:.2f})" @@ -747,7 +748,7 @@ class TableTest(BasePDFTest): if similarity > best_similarity: best_similarity = similarity best_match = header_text - if best_similarity >= threshold: + if best_similarity >= max(0.5, 1.0 - (self.max_diffs / (len(self.top_heading) if len(self.top_heading) > 0 else 1))): top_heading_found = True break @@ -760,7 +761,7 @@ class TableTest(BasePDFTest): if similarity > best_similarity: best_similarity = similarity best_match = header_text - if best_similarity >= threshold: + if best_similarity >= max(0.5, 1.0 - (self.max_diffs / (len(self.top_heading) if len(self.top_heading) > 0 else 1))): top_heading_found = True break @@ -777,7 +778,7 @@ class TableTest(BasePDFTest): if not best_match: all_relationships_satisfied = False current_failed_reasons.append(f"No top heading found for cell at ({row_idx}, {col_idx})") - elif best_similarity < threshold: + elif best_similarity < max(0.5, 1.0 - (self.max_diffs / (len(self.top_heading) if len(self.top_heading) > 0 else 1))): all_relationships_satisfied = False current_failed_reasons.append( f"Top heading '{best_match}' doesn't match expected '{self.top_heading}' (similarity: {best_similarity:.2f})" @@ -798,7 +799,7 @@ class TableTest(BasePDFTest): if similarity > best_similarity: best_similarity = similarity best_match = header_text - if best_similarity >= threshold: + if best_similarity >= max(0.5, 1.0 - (self.max_diffs / (len(self.left_heading) if len(self.left_heading) > 0 else 1))): left_heading_found = True break @@ -811,7 +812,7 @@ class TableTest(BasePDFTest): if similarity > best_similarity: best_similarity = similarity best_match = header_text - if best_similarity >= threshold: + if best_similarity >= max(0.5, 1.0 - (self.max_diffs / (len(self.left_heading) if len(self.left_heading) > 0 else 1))): left_heading_found = True break @@ -828,7 +829,7 @@ class TableTest(BasePDFTest): if not best_match: all_relationships_satisfied = False current_failed_reasons.append(f"No left heading found for cell at ({row_idx}, {col_idx})") - elif best_similarity < threshold: + elif best_similarity < max(0.5, 1.0 - (self.max_diffs / (len(self.left_heading) if len(self.left_heading) > 0 else 1))): all_relationships_satisfied = False current_failed_reasons.append( f"Left heading '{best_match}' doesn't match expected '{self.left_heading}' (similarity: {best_similarity:.2f})"