diff --git a/CHANGELOG.md b/CHANGELOG.md index ad5bc6844..ed050e006 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -## 0.10.29-dev1 +## 0.10.29-dev2 ### Enhancements @@ -10,6 +10,7 @@ ### Fixes * **Ingest session handler not being shared correctly** All ingest docs that leverage the session handler should only need to set it once per process. It was recreating it each time because the right values weren't being set nor available given how dataclasses work in python. +* **Ingest download-only fix** Previously the download only flag was being checked after the doc factory pipeline step, which occurs before the files are actually downloaded by the source node. This check was moved after the source node to allow for the files to be downloaded first before exiting the pipeline. ## 0.10.28 diff --git a/unstructured/__version__.py b/unstructured/__version__.py index 9322a4ba7..13626425c 100644 --- a/unstructured/__version__.py +++ b/unstructured/__version__.py @@ -1 +1 @@ -__version__ = "0.10.29-dev1" # pragma: no cover +__version__ = "0.10.29-dev2" # pragma: no cover diff --git a/unstructured/ingest/pipeline/pipeline.py b/unstructured/ingest/pipeline/pipeline.py index a8a46ede5..f3459f8ee 100644 --- a/unstructured/ingest/pipeline/pipeline.py +++ b/unstructured/ingest/pipeline/pipeline.py @@ -59,10 +59,10 @@ class Pipeline(DataClassJsonMixin): ) for doc in dict_docs: self.pipeline_context.ingest_docs_map[get_ingest_doc_hash(doc)] = doc + fetched_filenames = self.source_node(iterable=dict_docs) if self.source_node.read_config.download_only: logger.info("stopping pipeline after downloading files") return - fetched_filenames = self.source_node(iterable=dict_docs) if not fetched_filenames: logger.info("No files to run partition over") return