mirror of
https://github.com/allenai/olmocr.git
synced 2025-10-15 10:12:14 +00:00
Adjusting annotation script
This commit is contained in:
parent
2f74a2a996
commit
cdc7fae4f9
@ -243,6 +243,9 @@ def create_html_output(random_pages, pdf_s3_client, output_path, workspace_path,
|
||||
color: var(--text-color);
|
||||
background-color: var(--bg-color);
|
||||
padding: 2rem;
|
||||
display: flex;
|
||||
flex-direction: row;
|
||||
gap: 2rem;
|
||||
}}
|
||||
|
||||
ul {{
|
||||
@ -250,15 +253,23 @@ def create_html_output(random_pages, pdf_s3_client, output_path, workspace_path,
|
||||
}}
|
||||
|
||||
.container {{
|
||||
max-width: 1200px;
|
||||
margin: 0 auto;
|
||||
flex: 2;
|
||||
max-width: 750px;
|
||||
}}
|
||||
|
||||
header {{
|
||||
margin-bottom: 2rem;
|
||||
border-bottom: 1px solid var(--border-color);
|
||||
padding-bottom: 1rem;
|
||||
|
||||
position: sticky;
|
||||
top: 2rem;
|
||||
flex: 1;
|
||||
min-width: 380px;
|
||||
max-width: 420px;
|
||||
max-height: calc(100vh - 4rem);
|
||||
overflow-y: auto;
|
||||
padding: 1.5rem;
|
||||
background-color: white;
|
||||
border-radius: 0.5rem;
|
||||
box-shadow: var(--card-shadow);
|
||||
align-self: flex-start;
|
||||
font-size: small;
|
||||
}}
|
||||
|
||||
@ -296,7 +307,7 @@ def create_html_output(random_pages, pdf_s3_client, output_path, workspace_path,
|
||||
|
||||
.page-grid {{
|
||||
display: grid;
|
||||
grid-template-columns: repeat(2, 1fr);
|
||||
grid-template-columns: 1fr;
|
||||
gap: 2rem;
|
||||
}}
|
||||
|
||||
@ -494,16 +505,23 @@ def create_html_output(random_pages, pdf_s3_client, output_path, workspace_path,
|
||||
@media (max-width: 768px) {{
|
||||
body {{
|
||||
padding: 1rem;
|
||||
flex-direction: column;
|
||||
}}
|
||||
|
||||
.page-grid {{
|
||||
grid-template-columns: 1fr;
|
||||
header {{
|
||||
position: static;
|
||||
max-width: 100%;
|
||||
margin-left: 0;
|
||||
margin-bottom: 2rem;
|
||||
}}
|
||||
|
||||
.container {{
|
||||
max-width: 100%;
|
||||
}}
|
||||
}}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<header>
|
||||
<h2>Task Instructions</h2>
|
||||
<p>Your task is to review {len(random_pages)} document pages and determine whether they contain any <strong>Personally Identifiable Information (PII)</strong>. Carefully but efficiently inspect each page and select the appropriate response. You do not need to read every word - quickly scan the page and look for any obvious PII. The time expected to complete this task is 10-15 minutes.</p>
|
||||
@ -541,6 +559,7 @@ def create_html_output(random_pages, pdf_s3_client, output_path, workspace_path,
|
||||
<p><strong>Author names, researcher names, citations, or references from published research papers</strong> should NOT be marked as PII. These names are part of the normal publication process and are not considered private or sensitive information for the purposes of this task.
|
||||
Only mark information as PII if it relates to private, sensitive, or personal details about an individual outside the context of the publication.</p>
|
||||
</header>
|
||||
<div class="container">
|
||||
|
||||
<div class="info-bar">
|
||||
<div class="info-item">
|
||||
@ -591,7 +610,7 @@ def create_html_output(random_pages, pdf_s3_client, output_path, workspace_path,
|
||||
html_content += f"""
|
||||
<div class="page-container" data-index="{i}">
|
||||
<div class="page-info">
|
||||
<h2 title="{pdf_path}"><a href="{original_url}" target="_blank">{original_url}</a></h2>
|
||||
<h2 title="{pdf_path}">{original_url}</h2>
|
||||
<p>Page {page_num}</p>
|
||||
<p>{f'<a href="{presigned_url}" target="_blank">View Cached PDF</a>' if presigned_url else pdf_path}</p>
|
||||
<p>
|
||||
@ -623,7 +642,7 @@ def create_html_output(random_pages, pdf_s3_client, output_path, workspace_path,
|
||||
html_content += f"""
|
||||
<div class="page-container" data-index="{i}">
|
||||
<div class="page-info">
|
||||
<h2 title="{pdf_path}"><a href="{original_url}" target="_blank">{original_url}</a></h2>
|
||||
<h2 title="{pdf_path}">original_url</h2>
|
||||
<p>Page {page_num}</p>
|
||||
<p>{f'<a href="{presigned_url}" target="_blank">View Cached PDF</a>' if presigned_url else pdf_path}</p>
|
||||
<p>
|
||||
|
Loading…
x
Reference in New Issue
Block a user