mirror of
https://github.com/allenai/olmocr.git
synced 2025-10-28 16:41:09 +00:00
Fix pipeline bug with indexing
This commit is contained in:
parent
31becaf7e4
commit
7c7867626f
@ -162,6 +162,10 @@ class DatabaseManager:
|
|||||||
)
|
)
|
||||||
for row in rows
|
for row in rows
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def delete_index_entries_by_inference_s3_path(self, inference_s3_path: str):
|
||||||
|
self.cursor.execute("DELETE FROM page_results WHERE inference_s3_path = ?", (inference_s3_path,))
|
||||||
|
self.conn.commit()
|
||||||
|
|
||||||
def get_last_indexed_round(self) -> int:
|
def get_last_indexed_round(self) -> int:
|
||||||
self.cursor.execute("""
|
self.cursor.execute("""
|
||||||
@ -617,6 +621,7 @@ if __name__ == '__main__':
|
|||||||
try:
|
try:
|
||||||
inference_records = future.result()
|
inference_records = future.result()
|
||||||
|
|
||||||
|
db.delete_index_entries_by_inference_s3_path(s3_path)
|
||||||
db.add_index_entries(inference_records)
|
db.add_index_entries(inference_records)
|
||||||
db.update_processed_file(s3_path, etag=etag)
|
db.update_processed_file(s3_path, etag=etag)
|
||||||
except urllib3.exceptions.SSLError:
|
except urllib3.exceptions.SSLError:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user