Adjusting annotation script

This commit is contained in:
Jake Poznanski 2025-04-08 20:50:00 +00:00
parent 2f74a2a996
commit cdc7fae4f9

View File

@ -243,6 +243,9 @@ def create_html_output(random_pages, pdf_s3_client, output_path, workspace_path,
color: var(--text-color);
background-color: var(--bg-color);
padding: 2rem;
display: flex;
flex-direction: row;
gap: 2rem;
}}
ul {{
@ -250,15 +253,23 @@ def create_html_output(random_pages, pdf_s3_client, output_path, workspace_path,
}}
.container {{
max-width: 1200px;
margin: 0 auto;
flex: 2;
max-width: 750px;
}}
header {{
margin-bottom: 2rem;
border-bottom: 1px solid var(--border-color);
padding-bottom: 1rem;
position: sticky;
top: 2rem;
flex: 1;
min-width: 380px;
max-width: 420px;
max-height: calc(100vh - 4rem);
overflow-y: auto;
padding: 1.5rem;
background-color: white;
border-radius: 0.5rem;
box-shadow: var(--card-shadow);
align-self: flex-start;
font-size: small;
}}
@ -296,7 +307,7 @@ def create_html_output(random_pages, pdf_s3_client, output_path, workspace_path,
.page-grid {{
display: grid;
grid-template-columns: repeat(2, 1fr);
grid-template-columns: 1fr;
gap: 2rem;
}}
@ -494,16 +505,23 @@ def create_html_output(random_pages, pdf_s3_client, output_path, workspace_path,
@media (max-width: 768px) {{
body {{
padding: 1rem;
flex-direction: column;
}}
.page-grid {{
grid-template-columns: 1fr;
header {{
position: static;
max-width: 100%;
margin-left: 0;
margin-bottom: 2rem;
}}
.container {{
max-width: 100%;
}}
}}
</style>
</head>
<body>
<div class="container">
<header>
<h2>Task Instructions</h2>
<p>Your task is to review {len(random_pages)} document pages and determine whether they contain any <strong>Personally Identifiable Information (PII)</strong>. Carefully but efficiently inspect each page and select the appropriate response. You do not need to read every word - quickly scan the page and look for any obvious PII. The time expected to complete this task is 10-15 minutes.</p>
@ -541,6 +559,7 @@ def create_html_output(random_pages, pdf_s3_client, output_path, workspace_path,
<p><strong>Author names, researcher names, citations, or references from published research papers</strong> should NOT be marked as PII. These names are part of the normal publication process and are not considered private or sensitive information for the purposes of this task.
Only mark information as PII if it relates to private, sensitive, or personal details about an individual outside the context of the publication.</p>
</header>
<div class="container">
<div class="info-bar">
<div class="info-item">
@ -591,7 +610,7 @@ def create_html_output(random_pages, pdf_s3_client, output_path, workspace_path,
html_content += f"""
<div class="page-container" data-index="{i}">
<div class="page-info">
<h2 title="{pdf_path}"><a href="{original_url}" target="_blank">{original_url}</a></h2>
<h2 title="{pdf_path}">{original_url}</h2>
<p>Page {page_num}</p>
<p>{f'<a href="{presigned_url}" target="_blank">View Cached PDF</a>' if presigned_url else pdf_path}</p>
<p>
@ -623,7 +642,7 @@ def create_html_output(random_pages, pdf_s3_client, output_path, workspace_path,
html_content += f"""
<div class="page-container" data-index="{i}">
<div class="page-info">
<h2 title="{pdf_path}"><a href="{original_url}" target="_blank">{original_url}</a></h2>
<h2 title="{pdf_path}">original_url</h2>
<p>Page {page_num}</p>
<p>{f'<a href="{presigned_url}" target="_blank">View Cached PDF</a>' if presigned_url else pdf_path}</p>
<p>