mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-08-18 05:37:53 +00:00
fix: process_document
behavior when exception is raised (#298)
This commit is contained in:
parent
c7eba1636d
commit
a74d389fa7
@ -6,6 +6,8 @@
|
|||||||
|
|
||||||
### Fixes
|
### Fixes
|
||||||
|
|
||||||
|
* Fix `process_document` file cleaning on failure
|
||||||
|
|
||||||
## 0.4.16
|
## 0.4.16
|
||||||
|
|
||||||
### Enhancements
|
### Enhancements
|
||||||
|
@ -1,9 +1,12 @@
|
|||||||
"""Process aribritrary files with the Unstructured library"""
|
"""Process aribritrary files with the Unstructured library"""
|
||||||
|
|
||||||
import logging
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
from unstructured_inference.models.detectron2 import MODEL_TYPES
|
from unstructured_inference.models.detectron2 import MODEL_TYPES
|
||||||
|
|
||||||
|
from unstructured.ingest.interfaces import BaseIngestDoc as IngestDoc
|
||||||
|
from unstructured.logger import logger
|
||||||
|
|
||||||
|
|
||||||
def initialize():
|
def initialize():
|
||||||
"""Download models (avoids subprocesses all doing the same)"""
|
"""Download models (avoids subprocesses all doing the same)"""
|
||||||
@ -14,7 +17,7 @@ def initialize():
|
|||||||
MODEL_TYPES[None]["config_path"]
|
MODEL_TYPES[None]["config_path"]
|
||||||
|
|
||||||
|
|
||||||
def process_document(doc):
|
def process_document(doc: "IngestDoc") -> Optional[List[Dict[str, Any]]]:
|
||||||
"""Process any IngestDoc-like class of document with Unstructured's auto partition logic."""
|
"""Process any IngestDoc-like class of document with Unstructured's auto partition logic."""
|
||||||
isd_elems_no_filename = None
|
isd_elems_no_filename = None
|
||||||
try:
|
try:
|
||||||
@ -28,11 +31,9 @@ def process_document(doc):
|
|||||||
# the results. Instead, the MainProcess (caller) may work with the aggregate
|
# the results. Instead, the MainProcess (caller) may work with the aggregate
|
||||||
# results across all docs in memory.
|
# results across all docs in memory.
|
||||||
doc.write_result()
|
doc.write_result()
|
||||||
|
|
||||||
except Exception:
|
except Exception:
|
||||||
# TODO(crag) save the exception instead of print?
|
# TODO(crag) save the exception instead of print?
|
||||||
logging.error(f"Failed to process {doc}", exc_info=True)
|
logger.error(f"Failed to process {doc}", exc_info=True)
|
||||||
else:
|
|
||||||
doc.cleanup_file()
|
|
||||||
finally:
|
finally:
|
||||||
|
doc.cleanup_file()
|
||||||
return isd_elems_no_filename
|
return isd_elems_no_filename
|
||||||
|
Loading…
x
Reference in New Issue
Block a user