mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-08-18 05:37:53 +00:00
fix: process_document
behavior when exception is raised (#298)
This commit is contained in:
parent
c7eba1636d
commit
a74d389fa7
@ -6,6 +6,8 @@
|
||||
|
||||
### Fixes
|
||||
|
||||
* Fix `process_document` file cleaning on failure
|
||||
|
||||
## 0.4.16
|
||||
|
||||
### Enhancements
|
||||
|
@ -1,9 +1,12 @@
|
||||
"""Process aribritrary files with the Unstructured library"""
|
||||
|
||||
import logging
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from unstructured_inference.models.detectron2 import MODEL_TYPES
|
||||
|
||||
from unstructured.ingest.interfaces import BaseIngestDoc as IngestDoc
|
||||
from unstructured.logger import logger
|
||||
|
||||
|
||||
def initialize():
|
||||
"""Download models (avoids subprocesses all doing the same)"""
|
||||
@ -14,7 +17,7 @@ def initialize():
|
||||
MODEL_TYPES[None]["config_path"]
|
||||
|
||||
|
||||
def process_document(doc):
|
||||
def process_document(doc: "IngestDoc") -> Optional[List[Dict[str, Any]]]:
|
||||
"""Process any IngestDoc-like class of document with Unstructured's auto partition logic."""
|
||||
isd_elems_no_filename = None
|
||||
try:
|
||||
@ -28,11 +31,9 @@ def process_document(doc):
|
||||
# the results. Instead, the MainProcess (caller) may work with the aggregate
|
||||
# results across all docs in memory.
|
||||
doc.write_result()
|
||||
|
||||
except Exception:
|
||||
# TODO(crag) save the exception instead of print?
|
||||
logging.error(f"Failed to process {doc}", exc_info=True)
|
||||
else:
|
||||
doc.cleanup_file()
|
||||
logger.error(f"Failed to process {doc}", exc_info=True)
|
||||
finally:
|
||||
doc.cleanup_file()
|
||||
return isd_elems_no_filename
|
||||
|
Loading…
x
Reference in New Issue
Block a user