diff --git a/olmocr/bench/synth/mine_html_templates.py b/olmocr/bench/synth/mine_html_templates.py
index e23c5d2..2318812 100644
--- a/olmocr/bench/synth/mine_html_templates.py
+++ b/olmocr/bench/synth/mine_html_templates.py
@@ -2,6 +2,7 @@ import argparse
 import asyncio
 import concurrent.futures
 import json
+import logging
 import os
 import random
 import re
@@ -18,7 +19,7 @@ from playwright.async_api import async_playwright
 from syntok.segmenter import process
 from tqdm import tqdm
 
-from olmocr.bench.tests import TableTest, TestType, parse_html_tables
+from olmocr.bench.tests import TableTest, TestType, parse_html_tables, load_single_test
 from olmocr.data.renderpdf import (
     get_png_dimensions_from_base64,
     render_pdf_to_base64png,
@@ -969,7 +970,36 @@ def generate_tests_from_html(html_content: str, pdf_id: str, page_num: int, verb
             test_signatures.add(test_signature)
             unique_tests.append(test)
 
-    return unique_tests
+    # Validate each test against the markdown content
+    validated_tests = []
+    failed_test_count = 0
+    
+    # Get the markdown content for validation
+    validation_markdown = markdown_content
+    
+    for test in unique_tests:
+        try:
+            # Create test object from the dictionary
+            test_obj = load_single_test(test)
+            
+            # Run the test on the markdown content
+            passed, error_msg = test_obj.run(validation_markdown)
+            
+            if passed:
+                validated_tests.append(test)
+            else:
+                failed_test_count += 1
+                if verbose_table_testing:
+                    print(f"Test {test['id']} (type: {test['type']}) failed validation: {error_msg}")
+        except Exception as e:
+            failed_test_count += 1
+            if verbose_table_testing:
+                print(f"Test {test['id']} (type: {test['type']}) errored during validation: {str(e)}")
+    
+    if failed_test_count > 0:
+        print(f"Filtered out {failed_test_count} tests that failed validation against markdown content for {pdf_id}")
+    
+    return validated_tests
 
 
 def process_pdf(pdf_info, args, client, pdf_filter=None):