mirror of
https://github.com/allenai/olmocr.git
synced 2025-10-11 16:22:29 +00:00
Adjusted the dolma viewer so I can more easily vibe check some new model outputs
This commit is contained in:
parent
0516ff035f
commit
3ae0f30f98
@ -130,7 +130,83 @@ def process_document(data, s3_client, template, output_dir):
|
||||
print(f"Error writing HTML file for document ID {id_}: {e}")
|
||||
|
||||
|
||||
def main(jsonl_paths, output_dir, template_path, s3_profile_name):
|
||||
def process_document_for_merge(data, s3_client):
|
||||
"""Process a single document and return data for merging into a single HTML."""
|
||||
id_ = data.get("id")
|
||||
text = data.get("text", "")
|
||||
attributes = data.get("attributes", {})
|
||||
pdf_page_numbers = attributes.get("pdf_page_numbers", [])
|
||||
metadata = data.get("metadata", {})
|
||||
|
||||
# Extract additional fields for display
|
||||
source = data.get("source", "")
|
||||
added = data.get("added", "")
|
||||
created = data.get("created", "")
|
||||
source_file = metadata.get("Source-File")
|
||||
|
||||
# Generate base64 image of the corresponding PDF page
|
||||
local_pdf = tempfile.NamedTemporaryFile("wb+", suffix=".pdf", delete=False)
|
||||
try:
|
||||
pdf_bytes = get_s3_bytes(s3_client, source_file)
|
||||
if pdf_bytes is None:
|
||||
print(f"Failed to retrieve PDF from {source_file}")
|
||||
return None
|
||||
local_pdf.write(pdf_bytes)
|
||||
local_pdf.flush()
|
||||
|
||||
pages = []
|
||||
for span in pdf_page_numbers:
|
||||
start_index, end_index, page_num = span
|
||||
page_text = text[start_index:end_index]
|
||||
|
||||
# Escape only dangerous HTML characters, preserving curly braces for LaTeX
|
||||
# Don't escape curly braces {} as they're needed for LaTeX
|
||||
page_text = page_text.replace('&', '&')
|
||||
page_text = page_text.replace('<', '<')
|
||||
page_text = page_text.replace('>', '>')
|
||||
page_text = page_text.replace('"', '"')
|
||||
page_text = page_text.replace("'", ''')
|
||||
|
||||
base64_image = render_pdf_to_base64webp(local_pdf.name, page_num)
|
||||
|
||||
pages.append({"page_num": page_num, "text": page_text, "image": base64_image})
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error processing document ID {id_}: {e}")
|
||||
return None
|
||||
finally:
|
||||
local_pdf.close()
|
||||
os.unlink(local_pdf.name)
|
||||
|
||||
# Generate pre-signed URL if source_file is an S3 path
|
||||
s3_link = None
|
||||
if source_file and source_file.startswith("s3://"):
|
||||
bucket_name, key_name = parse_s3_path(source_file)
|
||||
s3_link = generate_presigned_url(s3_client, bucket_name, key_name)
|
||||
|
||||
# Prepare metadata for display
|
||||
display_metadata = {
|
||||
"id": id_,
|
||||
"source": source,
|
||||
"added": added,
|
||||
"created": created,
|
||||
"pdf_pages": metadata.get("pdf-total-pages", ""),
|
||||
"tokens_in": metadata.get("total-input-tokens", ""),
|
||||
"tokens_out": metadata.get("total-output-tokens", ""),
|
||||
"olmocr_version": metadata.get("olmocr-version", ""),
|
||||
"source_file": source_file
|
||||
}
|
||||
|
||||
return {
|
||||
"id": id_,
|
||||
"pages": pages,
|
||||
"s3_link": s3_link,
|
||||
"metadata": display_metadata,
|
||||
"attributes": attributes
|
||||
}
|
||||
|
||||
|
||||
def main(jsonl_paths, output_dir, template_path, s3_profile_name, merge=False):
|
||||
if not os.path.exists(output_dir):
|
||||
os.makedirs(output_dir)
|
||||
|
||||
@ -150,8 +226,9 @@ def main(jsonl_paths, output_dir, template_path, s3_profile_name):
|
||||
return
|
||||
|
||||
# Load the Jinja template
|
||||
template_file_name = "dolmaviewer_merged_template.html" if merge else template_path
|
||||
try:
|
||||
with open(os.path.join(os.path.dirname(__file__), template_path), "r", encoding="utf-8") as template_file:
|
||||
with open(os.path.join(os.path.dirname(__file__), template_file_name), "r", encoding="utf-8") as template_file:
|
||||
template_content = template_file.read()
|
||||
template = Template(template_content)
|
||||
except Exception as e:
|
||||
@ -166,24 +243,69 @@ def main(jsonl_paths, output_dir, template_path, s3_profile_name):
|
||||
print(f"Error initializing S3 client: {e}")
|
||||
return
|
||||
|
||||
# Create ThreadPoolExecutor
|
||||
with ThreadPoolExecutor() as executor:
|
||||
futures = []
|
||||
for line in read_jsonl(expanded_paths):
|
||||
if not line:
|
||||
continue
|
||||
try:
|
||||
data = json.loads(line)
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"Invalid JSON line: {e}")
|
||||
continue
|
||||
future = executor.submit(process_document, data, s3_client, template, output_dir)
|
||||
futures.append(future)
|
||||
if merge:
|
||||
# Process all documents from each JSONL file into a single HTML
|
||||
for jsonl_path in expanded_paths:
|
||||
documents = []
|
||||
print(f"Processing {jsonl_path}...")
|
||||
|
||||
# Process documents sequentially for each file
|
||||
with ThreadPoolExecutor() as executor:
|
||||
futures = []
|
||||
for line in read_jsonl([jsonl_path]):
|
||||
if not line:
|
||||
continue
|
||||
try:
|
||||
data = json.loads(line)
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"Invalid JSON line: {e}")
|
||||
continue
|
||||
future = executor.submit(process_document_for_merge, data, s3_client)
|
||||
futures.append(future)
|
||||
|
||||
# Collect results
|
||||
for future in tqdm(as_completed(futures), total=len(futures), desc=f"Processing documents from {os.path.basename(jsonl_path)}"):
|
||||
result = future.result()
|
||||
if result:
|
||||
documents.append(result)
|
||||
|
||||
if documents:
|
||||
# Generate merged HTML
|
||||
try:
|
||||
html_content = template.render(documents=documents)
|
||||
|
||||
# Create output filename based on JSONL filename
|
||||
jsonl_basename = os.path.basename(jsonl_path)
|
||||
if jsonl_basename.endswith('.jsonl'):
|
||||
output_filename = jsonl_basename[:-6] + '_merged.html'
|
||||
else:
|
||||
output_filename = jsonl_basename + '_merged.html'
|
||||
|
||||
output_path = os.path.join(output_dir, output_filename)
|
||||
with open(output_path, "w", encoding="utf-8") as f:
|
||||
f.write(html_content)
|
||||
print(f"Created merged HTML: {output_path}")
|
||||
except Exception as e:
|
||||
print(f"Error writing merged HTML for {jsonl_path}: {e}")
|
||||
else:
|
||||
# Original behavior: create separate HTML files for each document
|
||||
with ThreadPoolExecutor() as executor:
|
||||
futures = []
|
||||
for line in read_jsonl(expanded_paths):
|
||||
if not line:
|
||||
continue
|
||||
try:
|
||||
data = json.loads(line)
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"Invalid JSON line: {e}")
|
||||
continue
|
||||
future = executor.submit(process_document, data, s3_client, template, output_dir)
|
||||
futures.append(future)
|
||||
|
||||
for _ in tqdm(as_completed(futures), total=len(futures), desc="Processing documents"):
|
||||
pass # Progress bar updates automatically
|
||||
for _ in tqdm(as_completed(futures), total=len(futures), desc="Processing documents"):
|
||||
pass # Progress bar updates automatically
|
||||
|
||||
print(f"Output HTML-viewable pages to directory: {args.output_dir}")
|
||||
print(f"Output HTML-viewable pages to directory: {output_dir}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
@ -192,6 +314,7 @@ if __name__ == "__main__":
|
||||
parser.add_argument("--output_dir", default="dolma_previews", help="Directory to save HTML files")
|
||||
parser.add_argument("--template_path", default="dolmaviewer_template.html", help="Path to the Jinja2 template file")
|
||||
parser.add_argument("--s3_profile", default=None, help="S3 profile to use for accessing the source documents to render them in the viewer.")
|
||||
parser.add_argument("--merge", action="store_true", help="Output a single HTML file for each JSONL file with all documents merged")
|
||||
args = parser.parse_args()
|
||||
|
||||
main(args.jsonl_paths, args.output_dir, args.template_path, args.s3_profile)
|
||||
main(args.jsonl_paths, args.output_dir, args.template_path, args.s3_profile, args.merge)
|
||||
|
506
olmocr/viewer/dolmaviewer_merged_template.html
Normal file
506
olmocr/viewer/dolmaviewer_merged_template.html
Normal file
@ -0,0 +1,506 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>Merged Documents</title>
|
||||
|
||||
<!-- KaTeX CSS -->
|
||||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.16.9/dist/katex.min.css" integrity="sha384-n8MVd4RsNIU0tAv4ct0nTaAbDJwPJzDEaqSD1odI+WdtXRGWt2kTvGFasHpSy3SV" crossorigin="anonymous">
|
||||
|
||||
<style>
|
||||
/* CSS styles */
|
||||
body {
|
||||
font-family: Arial, sans-serif;
|
||||
background-color: #f0f0f0;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
}
|
||||
.container {
|
||||
background-color: #fff;
|
||||
padding: 40px;
|
||||
margin: 20px;
|
||||
width: 60%;
|
||||
box-shadow: 0px 0px 10px rgba(0, 0, 0, 0.1);
|
||||
line-height: 1.8;
|
||||
position: relative;
|
||||
}
|
||||
|
||||
/* Navigation */
|
||||
.nav-container {
|
||||
position: sticky;
|
||||
top: 0;
|
||||
background: white;
|
||||
z-index: 1001;
|
||||
padding: 15px 0;
|
||||
margin-bottom: 20px;
|
||||
border-bottom: 2px solid #dee2e6;
|
||||
}
|
||||
|
||||
.nav-controls {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
gap: 20px;
|
||||
}
|
||||
|
||||
.nav-select {
|
||||
flex: 1;
|
||||
max-width: 400px;
|
||||
}
|
||||
|
||||
.nav-select select {
|
||||
width: 100%;
|
||||
padding: 8px 12px;
|
||||
border: 1px solid #ccc;
|
||||
border-radius: 4px;
|
||||
font-size: 14px;
|
||||
}
|
||||
|
||||
/* Toggle button styles */
|
||||
.toggle-button {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
gap: 10px;
|
||||
padding: 8px 16px;
|
||||
background: #4CAF50;
|
||||
color: white;
|
||||
border: none;
|
||||
border-radius: 4px;
|
||||
cursor: pointer;
|
||||
font-size: 14px;
|
||||
transition: background 0.3s;
|
||||
}
|
||||
|
||||
.toggle-button:hover {
|
||||
background: #45a049;
|
||||
}
|
||||
|
||||
.toggle-button.raw-mode {
|
||||
background: #2196F3;
|
||||
}
|
||||
|
||||
.toggle-button.raw-mode:hover {
|
||||
background: #0b7dda;
|
||||
}
|
||||
|
||||
/* Document separator */
|
||||
.document-separator {
|
||||
margin: 40px 0;
|
||||
padding: 20px 0;
|
||||
border-top: 3px solid #dee2e6;
|
||||
position: relative;
|
||||
}
|
||||
|
||||
.document-separator::before {
|
||||
content: attr(data-doc-number);
|
||||
position: absolute;
|
||||
top: -15px;
|
||||
left: 50%;
|
||||
transform: translateX(-50%);
|
||||
background: white;
|
||||
padding: 0 15px;
|
||||
color: #6c757d;
|
||||
font-weight: bold;
|
||||
font-size: 14px;
|
||||
}
|
||||
|
||||
.document {
|
||||
margin-bottom: 40px;
|
||||
}
|
||||
|
||||
.page-section {
|
||||
display: flex;
|
||||
flex-direction: row;
|
||||
margin-bottom: 20px;
|
||||
transition: background-color 0.3s ease;
|
||||
clear: both;
|
||||
}
|
||||
.page-section:hover {
|
||||
background-color: #f5f5f5;
|
||||
}
|
||||
.page-section .text {
|
||||
flex: 2;
|
||||
padding: 10px;
|
||||
text-align: justify;
|
||||
}
|
||||
.page-section .image {
|
||||
flex: 1;
|
||||
padding: 10px;
|
||||
}
|
||||
.page-section img {
|
||||
max-width: 100%;
|
||||
height: auto;
|
||||
border: 1px solid #ccc;
|
||||
}
|
||||
|
||||
/* Raw text display */
|
||||
.text-content.raw pre {
|
||||
white-space: pre-wrap;
|
||||
word-wrap: break-word;
|
||||
font-family: 'Courier New', monospace;
|
||||
font-size: 14px;
|
||||
line-height: 1.5;
|
||||
background: #f5f5f5;
|
||||
padding: 10px;
|
||||
border-radius: 4px;
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
/* Markdown rendered content */
|
||||
.text-content.markdown {
|
||||
font-family: Arial, sans-serif;
|
||||
}
|
||||
|
||||
.text-content.markdown h1 { margin-top: 24px; margin-bottom: 16px; }
|
||||
.text-content.markdown h2 { margin-top: 20px; margin-bottom: 14px; }
|
||||
.text-content.markdown h3 { margin-top: 18px; margin-bottom: 12px; }
|
||||
.text-content.markdown h4 { margin-top: 16px; margin-bottom: 10px; }
|
||||
.text-content.markdown h5 { margin-top: 14px; margin-bottom: 8px; }
|
||||
.text-content.markdown h6 { margin-top: 12px; margin-bottom: 6px; }
|
||||
|
||||
.text-content.markdown p {
|
||||
margin-bottom: 1em;
|
||||
}
|
||||
|
||||
.text-content.markdown ul, .text-content.markdown ol {
|
||||
margin-bottom: 1em;
|
||||
padding-left: 2em;
|
||||
}
|
||||
|
||||
.text-content.markdown blockquote {
|
||||
border-left: 4px solid #ddd;
|
||||
padding-left: 1em;
|
||||
margin: 1em 0;
|
||||
color: #666;
|
||||
}
|
||||
|
||||
.text-content.markdown code {
|
||||
background-color: #f4f4f4;
|
||||
padding: 2px 4px;
|
||||
border-radius: 3px;
|
||||
font-family: 'Courier New', monospace;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
|
||||
.text-content.markdown pre {
|
||||
background-color: #f4f4f4;
|
||||
padding: 10px;
|
||||
border-radius: 4px;
|
||||
overflow-x: auto;
|
||||
margin: 1em 0;
|
||||
}
|
||||
|
||||
.text-content.markdown pre code {
|
||||
background: none;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
table {
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
|
||||
th, td {
|
||||
border: 1px solid #ddd;
|
||||
padding: 12px 15px;
|
||||
text-align: left;
|
||||
vertical-align: top;
|
||||
font-size: 14px;
|
||||
}
|
||||
|
||||
th {
|
||||
background-color: #f4f4f4;
|
||||
font-weight: bold;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.05em;
|
||||
border-bottom: 2px solid #ccc;
|
||||
}
|
||||
|
||||
tr:nth-child(even) {
|
||||
background-color: #f9f9f9;
|
||||
}
|
||||
|
||||
tr:hover {
|
||||
background-color: #f1f1f1;
|
||||
}
|
||||
|
||||
td img {
|
||||
max-width: 100%;
|
||||
height: auto;
|
||||
display: block;
|
||||
}
|
||||
|
||||
table caption {
|
||||
caption-side: bottom;
|
||||
text-align: right;
|
||||
font-size: 12px;
|
||||
color: #777;
|
||||
padding: 5px 0;
|
||||
}
|
||||
|
||||
/* KaTeX display math centering */
|
||||
.katex-display {
|
||||
margin: 1em 0;
|
||||
}
|
||||
|
||||
/* Metadata styles */
|
||||
.metadata-container {
|
||||
background: #f8f9fa;
|
||||
border: 1px solid #dee2e6;
|
||||
border-radius: 8px;
|
||||
padding: 15px;
|
||||
margin-bottom: 20px;
|
||||
font-size: 13px;
|
||||
color: #495057;
|
||||
}
|
||||
|
||||
.metadata-grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
|
||||
gap: 10px;
|
||||
}
|
||||
|
||||
.metadata-item {
|
||||
display: flex;
|
||||
align-items: baseline;
|
||||
}
|
||||
|
||||
.metadata-label {
|
||||
font-weight: 600;
|
||||
color: #6c757d;
|
||||
margin-right: 5px;
|
||||
min-width: fit-content;
|
||||
}
|
||||
|
||||
.metadata-value {
|
||||
color: #212529;
|
||||
word-break: break-word;
|
||||
}
|
||||
|
||||
.metadata-source-file {
|
||||
grid-column: 1 / -1;
|
||||
margin-top: 5px;
|
||||
padding-top: 10px;
|
||||
border-top: 1px solid #dee2e6;
|
||||
}
|
||||
|
||||
/* Document count */
|
||||
.doc-count {
|
||||
text-align: center;
|
||||
color: #6c757d;
|
||||
font-size: 14px;
|
||||
margin: 10px 0;
|
||||
}
|
||||
|
||||
</style>
|
||||
|
||||
<!-- Marked.js for Markdown parsing -->
|
||||
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
|
||||
|
||||
<!-- KaTeX JavaScript -->
|
||||
<script src="https://cdn.jsdelivr.net/npm/katex@0.16.9/dist/katex.min.js" integrity="sha384-XjKyOOlGwcjNTAIQHIpgOno0Hl1YQqzUOEleOLALmuqehneUG+vnGctmUb0ZY0l8" crossorigin="anonymous"></script>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<!-- Navigation controls -->
|
||||
<div class="nav-container">
|
||||
<div class="nav-controls">
|
||||
<div class="nav-select">
|
||||
<select id="documentSelect">
|
||||
<option value="">Jump to document...</option>
|
||||
{% for doc in documents %}
|
||||
<option value="doc-{{ loop.index }}">Document {{ loop.index }}: {{ doc.id[:50] }}...</option>
|
||||
{% endfor %}
|
||||
</select>
|
||||
</div>
|
||||
<button class="toggle-button" id="toggleView">
|
||||
<span id="toggleText">📝 Markdown View</span>
|
||||
</button>
|
||||
</div>
|
||||
<div class="doc-count">
|
||||
Total documents: {{ documents|length }}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{% for doc in documents %}
|
||||
{% if loop.index > 1 %}
|
||||
<div class="document-separator" data-doc-number="Document {{ loop.index }}"></div>
|
||||
{% endif %}
|
||||
|
||||
<div class="document" id="doc-{{ loop.index }}">
|
||||
<!-- Metadata Section -->
|
||||
<div class="metadata-container">
|
||||
<div class="metadata-grid">
|
||||
{% if doc.metadata.source %}
|
||||
<div class="metadata-item">
|
||||
<span class="metadata-label">Source:</span>
|
||||
<span class="metadata-value">{{ doc.metadata.source }}</span>
|
||||
</div>
|
||||
{% endif %}
|
||||
{% if doc.metadata.olmocr_version %}
|
||||
<div class="metadata-item">
|
||||
<span class="metadata-label">OlmOCR:</span>
|
||||
<span class="metadata-value">v{{ doc.metadata.olmocr_version }}</span>
|
||||
</div>
|
||||
{% endif %}
|
||||
{% if doc.metadata.created %}
|
||||
<div class="metadata-item">
|
||||
<span class="metadata-label">Created:</span>
|
||||
<span class="metadata-value">{{ doc.metadata.created }}</span>
|
||||
</div>
|
||||
{% endif %}
|
||||
{% if doc.metadata.pdf_pages %}
|
||||
<div class="metadata-item">
|
||||
<span class="metadata-label">Pages:</span>
|
||||
<span class="metadata-value">{{ doc.metadata.pdf_pages }}</span>
|
||||
</div>
|
||||
{% endif %}
|
||||
{% if doc.metadata.tokens_in %}
|
||||
<div class="metadata-item">
|
||||
<span class="metadata-label">Tokens In:</span>
|
||||
<span class="metadata-value">{{ doc.metadata.tokens_in }}</span>
|
||||
</div>
|
||||
{% endif %}
|
||||
{% if doc.metadata.tokens_out %}
|
||||
<div class="metadata-item">
|
||||
<span class="metadata-label">Tokens Out:</span>
|
||||
<span class="metadata-value">{{ doc.metadata.tokens_out }}</span>
|
||||
</div>
|
||||
{% endif %}
|
||||
{% if doc.attributes.primary_language %}
|
||||
<div class="metadata-item">
|
||||
<span class="metadata-label">Language:</span>
|
||||
<span class="metadata-value">{{ doc.attributes.primary_language[0] }}</span>
|
||||
</div>
|
||||
{% endif %}
|
||||
{% if doc.attributes.rotation_correction %}
|
||||
<div class="metadata-item">
|
||||
<span class="metadata-label">Rotation:</span>
|
||||
<span class="metadata-value">{{ doc.attributes.rotation_correction[0] }}°</span>
|
||||
</div>
|
||||
{% endif %}
|
||||
{% if doc.metadata.source_file %}
|
||||
<div class="metadata-item metadata-source-file">
|
||||
<span class="metadata-label">File:</span>
|
||||
<span class="metadata-value">{{ doc.metadata.source_file }}</span>
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{% for page in doc.pages %}
|
||||
<div class="page-section" id="doc-{{ loop.index0 }}-page-{{ page.page_num }}">
|
||||
<div class="text">
|
||||
<div class="text-content markdown" data-raw-text="{{ page.text }}">
|
||||
<!-- Content will be rendered by JavaScript -->
|
||||
</div>
|
||||
</div>
|
||||
{% if page.image %}
|
||||
<div class="image">
|
||||
<a href="{{ doc.s3_link }}#page={{ page.page_num }}" target="_blank">
|
||||
<img src="data:image/webp;base64,{{ page.image }}" alt="Page {{ page.page_num }} Image">
|
||||
</a>
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
|
||||
<script>
|
||||
// Store the current view mode
|
||||
let isMarkdownView = true;
|
||||
|
||||
// Configure marked options
|
||||
marked.setOptions({
|
||||
breaks: true,
|
||||
gfm: true,
|
||||
tables: true,
|
||||
headerIds: false,
|
||||
mangle: false
|
||||
});
|
||||
|
||||
// Function to render LaTeX expressions to HTML
|
||||
function renderLatexToHtml(text) {
|
||||
const patterns = [
|
||||
{ regex: /\$\$([\s\S]+?)\$\$/g, display: true }, // Display math $$...$$
|
||||
{ regex: /\\\[([\s\S]+?)\\\]/g, display: true }, // Display math \[...\]
|
||||
{ regex: /\$([^\$\n]+?)\$/g, display: false }, // Inline math $...$
|
||||
{ regex: /\\\((.+?)\\\)/g, display: false } // Inline math \(...\)
|
||||
];
|
||||
|
||||
let result = text;
|
||||
patterns.forEach(({ regex, display }) => {
|
||||
result = result.replace(regex, (match, latex) => {
|
||||
try {
|
||||
return katex.renderToString(latex, {
|
||||
displayMode: display,
|
||||
throwOnError: false
|
||||
});
|
||||
} catch (e) {
|
||||
return match; // Return original if error
|
||||
}
|
||||
});
|
||||
});
|
||||
return result;
|
||||
}
|
||||
|
||||
// Function to render markdown and LaTeX
|
||||
function renderMarkdown() {
|
||||
document.querySelectorAll('.text-content').forEach(element => {
|
||||
const rawText = element.getAttribute('data-raw-text');
|
||||
|
||||
if (isMarkdownView) {
|
||||
element.className = 'text-content markdown';
|
||||
// Render LaTeX first, then markdown
|
||||
element.innerHTML = marked.parse(renderLatexToHtml(rawText));
|
||||
} else {
|
||||
element.className = 'text-content raw';
|
||||
element.innerHTML = '<pre>' + rawText + '</pre>';
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// Toggle button functionality
|
||||
document.getElementById('toggleView').addEventListener('click', function() {
|
||||
isMarkdownView = !isMarkdownView;
|
||||
const button = this;
|
||||
const toggleText = document.getElementById('toggleText');
|
||||
|
||||
if (isMarkdownView) {
|
||||
button.className = 'toggle-button';
|
||||
toggleText.textContent = '📝 Markdown View';
|
||||
} else {
|
||||
button.className = 'toggle-button raw-mode';
|
||||
toggleText.textContent = '📄 Raw Text View';
|
||||
}
|
||||
|
||||
renderMarkdown();
|
||||
});
|
||||
|
||||
// Document navigation
|
||||
document.getElementById('documentSelect').addEventListener('change', function() {
|
||||
if (this.value) {
|
||||
const element = document.getElementById(this.value);
|
||||
if (element) {
|
||||
element.scrollIntoView({ behavior: 'smooth', block: 'start' });
|
||||
// Reset the select after navigation
|
||||
setTimeout(() => {
|
||||
this.value = '';
|
||||
}, 100);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Initial render when page loads
|
||||
document.addEventListener('DOMContentLoaded', function() {
|
||||
renderMarkdown();
|
||||
});
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
Loading…
x
Reference in New Issue
Block a user