mirror of
https://github.com/allenai/olmocr.git
synced 2025-10-02 20:09:08 +00:00
Working on viewer/editor for rules
This commit is contained in:
parent
ab13ac6054
commit
8061aacd58
@ -18,6 +18,10 @@ def parse_rules_file(file_path):
|
||||
|
||||
try:
|
||||
rule = json.loads(line)
|
||||
# Add checked field if it doesn't exist
|
||||
if 'checked' not in rule:
|
||||
rule['checked'] = None
|
||||
|
||||
if 'pdf' in rule:
|
||||
pdf_rules[rule['pdf']].append(rule)
|
||||
except json.JSONDecodeError:
|
||||
@ -25,40 +29,93 @@ def parse_rules_file(file_path):
|
||||
|
||||
return pdf_rules
|
||||
|
||||
def get_rule_html(rule):
|
||||
"""Generate HTML representation for a rule."""
|
||||
def get_rule_html(rule, rule_index):
|
||||
"""Generate HTML representation for a rule with interactive elements."""
|
||||
rule_type = rule.get('type', 'unknown')
|
||||
rule_id = f"rule-{rule_index}"
|
||||
|
||||
# Determine status button class based on 'checked' value
|
||||
checked_status = rule.get('checked')
|
||||
if checked_status == "verified":
|
||||
status_class = "status-verified"
|
||||
elif checked_status == "rejected":
|
||||
status_class = "status-rejected"
|
||||
else:
|
||||
status_class = "status-unchecked"
|
||||
|
||||
# Create thumbs up/down buttons
|
||||
status_button = f"""
|
||||
<div class="status-control">
|
||||
<button class="status-button thumbs-up {checked_status == 'verified' and 'active' or ''}"
|
||||
data-rule-id="{rule_id}"
|
||||
data-action="verified"
|
||||
onclick="toggleStatus(this)"></button>
|
||||
<button class="status-button thumbs-down {checked_status == 'rejected' and 'active' or ''}"
|
||||
data-rule-id="{rule_id}"
|
||||
data-action="rejected"
|
||||
onclick="toggleStatus(this)"></button>
|
||||
</div>
|
||||
"""
|
||||
|
||||
# Create HTML based on rule type
|
||||
if rule_type == 'present':
|
||||
return f"""
|
||||
<tr class="rule-row present-rule">
|
||||
<tr class="rule-row present-rule" data-rule-id="{rule_id}" data-rule-index="{rule_index}">
|
||||
<td>{status_button}</td>
|
||||
<td><span class="rule-type present">PRESENT</span></td>
|
||||
<td>"{rule.get('text', '')}"</td>
|
||||
<td>
|
||||
<div class="editable-text"
|
||||
contenteditable="true"
|
||||
data-rule-id="{rule_id}"
|
||||
data-field="text"
|
||||
onblur="updateRuleText(this)">{rule.get('text', '')}</div>
|
||||
</td>
|
||||
<td>Threshold: {rule.get('threshold', 'N/A')}</td>
|
||||
</tr>
|
||||
"""
|
||||
elif rule_type == 'absent':
|
||||
return f"""
|
||||
<tr class="rule-row absent-rule">
|
||||
<tr class="rule-row absent-rule" data-rule-id="{rule_id}" data-rule-index="{rule_index}">
|
||||
<td>{status_button}</td>
|
||||
<td><span class="rule-type absent">ABSENT</span></td>
|
||||
<td>"{rule.get('text', '')}"</td>
|
||||
<td>
|
||||
<div class="editable-text"
|
||||
contenteditable="true"
|
||||
data-rule-id="{rule_id}"
|
||||
data-field="text"
|
||||
onblur="updateRuleText(this)">{rule.get('text', '')}</div>
|
||||
</td>
|
||||
<td>Threshold: {rule.get('threshold', 'N/A')}</td>
|
||||
</tr>
|
||||
"""
|
||||
elif rule_type == 'order':
|
||||
return f"""
|
||||
<tr class="rule-row order-rule">
|
||||
<tr class="rule-row order-rule" data-rule-id="{rule_id}" data-rule-index="{rule_index}">
|
||||
<td>{status_button}</td>
|
||||
<td><span class="rule-type order">ORDER</span></td>
|
||||
<td>
|
||||
<p><strong>Before:</strong> "{rule.get('before', '')}"</p>
|
||||
<p><strong>After:</strong> "{rule.get('after', '')}"</p>
|
||||
<p><strong>Before:</strong>
|
||||
<span class="editable-text"
|
||||
contenteditable="true"
|
||||
data-rule-id="{rule_id}"
|
||||
data-field="before"
|
||||
onblur="updateRuleText(this)">{rule.get('before', '')}</span>
|
||||
</p>
|
||||
<p><strong>After:</strong>
|
||||
<span class="editable-text"
|
||||
contenteditable="true"
|
||||
data-rule-id="{rule_id}"
|
||||
data-field="after"
|
||||
onblur="updateRuleText(this)">{rule.get('after', '')}</span>
|
||||
</p>
|
||||
</td>
|
||||
<td>Threshold: {rule.get('threshold', 'N/A')}</td>
|
||||
</tr>
|
||||
"""
|
||||
else:
|
||||
return f"""
|
||||
<tr class="rule-row unknown-rule">
|
||||
<tr class="rule-row unknown-rule" data-rule-id="{rule_id}" data-rule-index="{rule_index}">
|
||||
<td>{status_button}</td>
|
||||
<td><span class="rule-type unknown">UNKNOWN</span></td>
|
||||
<td>Unknown rule type: {rule_type}</td>
|
||||
<td></td>
|
||||
@ -66,17 +123,24 @@ def get_rule_html(rule):
|
||||
"""
|
||||
|
||||
def generate_html(pdf_rules, rules_file_path):
|
||||
"""Generate the HTML page with PDF renderings and rules."""
|
||||
"""Generate the HTML page with PDF renderings and interactive rules."""
|
||||
# Limit to 10 unique PDFs
|
||||
pdf_names = list(pdf_rules.keys())[:10]
|
||||
|
||||
# Prepare rules data for JavaScript
|
||||
all_rules = []
|
||||
for pdf_name in pdf_names:
|
||||
all_rules.extend(pdf_rules[pdf_name])
|
||||
|
||||
rules_json = json.dumps(all_rules)
|
||||
|
||||
html = """
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>PDF Rules Visualizer</title>
|
||||
<title>Interactive PDF Rules Visualizer</title>
|
||||
<style>
|
||||
body {
|
||||
font-family: Arial, sans-serif;
|
||||
@ -86,7 +150,7 @@ def generate_html(pdf_rules, rules_file_path):
|
||||
}
|
||||
|
||||
.container {
|
||||
max-width: 1600px;
|
||||
max-width: 1920px;
|
||||
margin: 0 auto;
|
||||
}
|
||||
|
||||
@ -187,13 +251,93 @@ def generate_html(pdf_rules, rules_file_path):
|
||||
.rule-row:hover {
|
||||
background-color: #f8f9fa;
|
||||
}
|
||||
|
||||
/* New styles for interactive elements */
|
||||
.editable-text {
|
||||
min-height: 20px;
|
||||
padding: 5px;
|
||||
border-radius: 4px;
|
||||
border: 1px solid transparent;
|
||||
transition: border-color 0.2s;
|
||||
}
|
||||
|
||||
.editable-text:hover {
|
||||
border-color: #ccc;
|
||||
background-color: #f8f9fa;
|
||||
}
|
||||
|
||||
.editable-text:focus {
|
||||
outline: none;
|
||||
border-color: #4a6fa5;
|
||||
background-color: #fff;
|
||||
}
|
||||
|
||||
.status-control {
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
gap: 8px;
|
||||
}
|
||||
|
||||
.status-button {
|
||||
width: 36px;
|
||||
height: 36px;
|
||||
border-radius: 4px;
|
||||
border: 1px solid #ccc;
|
||||
background-color: #f8f9fa;
|
||||
cursor: pointer;
|
||||
transition: all 0.2s;
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
.status-button:hover {
|
||||
border-color: #999;
|
||||
background-color: #e9ecef;
|
||||
}
|
||||
|
||||
.thumbs-up:before {
|
||||
content: "👍";
|
||||
font-size: 18px;
|
||||
opacity: 0.5;
|
||||
}
|
||||
|
||||
.thumbs-down:before {
|
||||
content: "👎";
|
||||
font-size: 18px;
|
||||
opacity: 0.5;
|
||||
}
|
||||
|
||||
.thumbs-up.active {
|
||||
background-color: #28a745;
|
||||
border-color: #28a745;
|
||||
}
|
||||
|
||||
.thumbs-up.active:before {
|
||||
opacity: 1;
|
||||
color: white;
|
||||
}
|
||||
|
||||
.thumbs-down.active {
|
||||
background-color: #dc3545;
|
||||
border-color: #dc3545;
|
||||
}
|
||||
|
||||
.thumbs-down.active:before {
|
||||
opacity: 1;
|
||||
color: white;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<h1>PDF Rules Visualizer</h1>
|
||||
<h1>Interactive PDF Rules Visualizer</h1>
|
||||
"""
|
||||
|
||||
# Global rule index for unique IDs
|
||||
rule_index = 0
|
||||
|
||||
for pdf_name in pdf_names:
|
||||
rules = pdf_rules[pdf_name]
|
||||
|
||||
@ -216,6 +360,7 @@ def generate_html(pdf_rules, rules_file_path):
|
||||
<table class="rules-table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Status</th>
|
||||
<th>Type</th>
|
||||
<th>Content</th>
|
||||
<th>Parameters</th>
|
||||
@ -225,7 +370,8 @@ def generate_html(pdf_rules, rules_file_path):
|
||||
"""
|
||||
|
||||
for rule in rules:
|
||||
html += get_rule_html(rule)
|
||||
html += get_rule_html(rule, rule_index)
|
||||
rule_index += 1
|
||||
|
||||
html += """
|
||||
</tbody>
|
||||
@ -235,8 +381,71 @@ def generate_html(pdf_rules, rules_file_path):
|
||||
</div>
|
||||
"""
|
||||
|
||||
html += """
|
||||
# Add JavaScript to manage interactivity
|
||||
html += f"""
|
||||
</div>
|
||||
|
||||
<script>
|
||||
// Store all rules data
|
||||
let rulesData = {rules_json};
|
||||
|
||||
// Function to toggle status button
|
||||
function toggleStatus(button) {{
|
||||
const ruleId = button.dataset.ruleId;
|
||||
const ruleIndex = parseInt(document.querySelector(`[data-rule-id="${{ruleId}}"]`).dataset.ruleIndex);
|
||||
const currentStatus = button.dataset.status;
|
||||
|
||||
let newStatus;
|
||||
if (currentStatus === 'null') {{
|
||||
newStatus = 'verified';
|
||||
button.classList.remove('status-unchecked');
|
||||
button.classList.add('status-verified');
|
||||
}} else if (currentStatus === 'verified') {{
|
||||
newStatus = 'rejected';
|
||||
button.classList.remove('status-verified');
|
||||
button.classList.add('status-rejected');
|
||||
}} else {{
|
||||
newStatus = null;
|
||||
button.classList.remove('status-rejected');
|
||||
button.classList.add('status-unchecked');
|
||||
}}
|
||||
|
||||
// Update button status
|
||||
button.dataset.status = newStatus === null ? 'null' : newStatus;
|
||||
|
||||
// Update rules data
|
||||
rulesData[ruleIndex].checked = newStatus;
|
||||
|
||||
// Output updated JSONL to console
|
||||
outputJSON();
|
||||
}}
|
||||
|
||||
// Function to update rule text
|
||||
function updateRuleText(element) {{
|
||||
const ruleId = element.dataset.ruleId;
|
||||
const field = element.dataset.field;
|
||||
const ruleIndex = parseInt(document.querySelector(`[data-rule-id="${{ruleId}}"]`).dataset.ruleIndex);
|
||||
const newText = element.innerText.trim();
|
||||
|
||||
// Update rules data
|
||||
rulesData[ruleIndex][field] = newText;
|
||||
|
||||
// Output updated JSONL to console
|
||||
outputJSON();
|
||||
}}
|
||||
|
||||
// Function to output JSONL to console
|
||||
function outputJSON() {{
|
||||
console.clear();
|
||||
console.log("Updated JSONL:");
|
||||
rulesData.forEach(rule => {{
|
||||
console.log(JSON.stringify(rule));
|
||||
}});
|
||||
}}
|
||||
|
||||
// Output initial JSONL when page loads
|
||||
document.addEventListener('DOMContentLoaded', outputJSON);
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
@ -244,9 +453,9 @@ def generate_html(pdf_rules, rules_file_path):
|
||||
return html
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Generate an HTML visualization of PDF rules.')
|
||||
parser = argparse.ArgumentParser(description='Generate an interactive HTML visualization of PDF rules.')
|
||||
parser.add_argument('rules_file', help='Path to the rules file (JSON lines format)')
|
||||
parser.add_argument('-o', '--output', help='Output HTML file path', default='pdf_rules_visualization.html')
|
||||
parser.add_argument('-o', '--output', help='Output HTML file path', default='interactive_pdf_rules.html')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
@ -260,7 +469,7 @@ def main():
|
||||
with open(args.output, 'w') as f:
|
||||
f.write(html)
|
||||
|
||||
print(f"HTML visualization created: {args.output}")
|
||||
print(f"Interactive HTML visualization created: {args.output}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
x
Reference in New Issue
Block a user