diff --git a/olmocr/bench/viewer.py b/olmocr/bench/viewer.py index d094aeb..a5d472c 100644 --- a/olmocr/bench/viewer.py +++ b/olmocr/bench/viewer.py @@ -18,6 +18,10 @@ def parse_rules_file(file_path): try: rule = json.loads(line) + # Add checked field if it doesn't exist + if 'checked' not in rule: + rule['checked'] = None + if 'pdf' in rule: pdf_rules[rule['pdf']].append(rule) except json.JSONDecodeError: @@ -25,40 +29,93 @@ def parse_rules_file(file_path): return pdf_rules -def get_rule_html(rule): - """Generate HTML representation for a rule.""" +def get_rule_html(rule, rule_index): + """Generate HTML representation for a rule with interactive elements.""" rule_type = rule.get('type', 'unknown') + rule_id = f"rule-{rule_index}" + # Determine status button class based on 'checked' value + checked_status = rule.get('checked') + if checked_status == "verified": + status_class = "status-verified" + elif checked_status == "rejected": + status_class = "status-rejected" + else: + status_class = "status-unchecked" + + # Create thumbs up/down buttons + status_button = f""" +
Before: "{rule.get('before', '')}"
-After: "{rule.get('after', '')}"
+Before: + {rule.get('before', '')} +
+After: + {rule.get('after', '')} +
Status | Type | Content | Parameters | @@ -225,7 +370,8 @@ def generate_html(pdf_rules, rules_file_path): """ for rule in rules: - html += get_rule_html(rule) + html += get_rule_html(rule, rule_index) + rule_index += 1 html += """ @@ -235,8 +381,71 @@ def generate_html(pdf_rules, rules_file_path): """ - html += """ + # Add JavaScript to manage interactivity + html += f""" + + """ @@ -244,9 +453,9 @@ def generate_html(pdf_rules, rules_file_path): return html def main(): - parser = argparse.ArgumentParser(description='Generate an HTML visualization of PDF rules.') + parser = argparse.ArgumentParser(description='Generate an interactive HTML visualization of PDF rules.') parser.add_argument('rules_file', help='Path to the rules file (JSON lines format)') - parser.add_argument('-o', '--output', help='Output HTML file path', default='pdf_rules_visualization.html') + parser.add_argument('-o', '--output', help='Output HTML file path', default='interactive_pdf_rules.html') args = parser.parse_args() @@ -260,7 +469,7 @@ def main(): with open(args.output, 'w') as f: f.write(html) - print(f"HTML visualization created: {args.output}") + print(f"Interactive HTML visualization created: {args.output}") if __name__ == "__main__": main() \ No newline at end of file
---|