diff --git a/CHANGELOG.md b/CHANGELOG.md index 06c108f98..59b96f7fb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -## 0.14.9-dev7 +## 0.14.9-dev8 ### Enhancements diff --git a/unstructured/__version__.py b/unstructured/__version__.py index 9a5c41d6d..9802f1139 100644 --- a/unstructured/__version__.py +++ b/unstructured/__version__.py @@ -1 +1 @@ -__version__ = "0.14.9-dev7" # pragma: no cover +__version__ = "0.14.9-dev8" # pragma: no cover diff --git a/unstructured/ingest/v2/interfaces/uploader.py b/unstructured/ingest/v2/interfaces/uploader.py index 520628e5a..b8c282983 100644 --- a/unstructured/ingest/v2/interfaces/uploader.py +++ b/unstructured/ingest/v2/interfaces/uploader.py @@ -26,6 +26,7 @@ class UploadContent: @dataclass class Uploader(BaseProcess, BaseConnector, ABC): upload_config: UploaderConfigT + connector_type: str def is_async(self) -> bool: return False diff --git a/unstructured/ingest/v2/pipeline/pipeline.py b/unstructured/ingest/v2/pipeline/pipeline.py index 96e5a5ceb..60edc9b52 100644 --- a/unstructured/ingest/v2/pipeline/pipeline.py +++ b/unstructured/ingest/v2/pipeline/pipeline.py @@ -87,17 +87,8 @@ class Pipeline: # Make sure that if the set destination connector expects a stager, one is also set if not self.uploader_step: return - matching_registry_entry = [ - v - for v in destination_registry.values() - if isinstance(self.uploader_step.process, v.uploader) - ] - if len(matching_registry_entry) > 1: - raise ValueError( - f"More than one entry found in destination registry " - f"for uploader type: {self.uploader_step.process}" - ) - registry_entry = matching_registry_entry[0] + uploader_connector_type = self.uploader_step.process.connector_type + registry_entry = destination_registry[uploader_connector_type] if registry_entry.upload_stager and self.stager_step is None: raise ValueError( f"pipeline with uploader type {self.uploader_step.process.__class__.__name__} " diff --git a/unstructured/ingest/v2/processes/connectors/astra.py b/unstructured/ingest/v2/processes/connectors/astra.py index 59c33d54b..33207207d 100644 --- a/unstructured/ingest/v2/processes/connectors/astra.py +++ b/unstructured/ingest/v2/processes/connectors/astra.py @@ -92,6 +92,7 @@ class AstraUploaderConfig(UploaderConfig): class AstraUploader(Uploader): connection_config: AstraConnectionConfig upload_config: AstraUploaderConfig + connector_type: str = CONNECTOR_TYPE @requires_dependencies(["astrapy"], extras="astra") def get_collection(self) -> "AstraDBCollection": diff --git a/unstructured/ingest/v2/processes/connectors/chroma.py b/unstructured/ingest/v2/processes/connectors/chroma.py index cf10045a7..3a0b865ad 100644 --- a/unstructured/ingest/v2/processes/connectors/chroma.py +++ b/unstructured/ingest/v2/processes/connectors/chroma.py @@ -114,6 +114,7 @@ class ChromaUploaderConfig(UploaderConfig): @dataclass class ChromaUploader(Uploader): + connector_type: str = CONNECTOR_TYPE upload_config: ChromaUploaderConfig connection_config: ChromaConnectionConfig client: Optional["Client"] = field(init=False) diff --git a/unstructured/ingest/v2/processes/connectors/elasticsearch.py b/unstructured/ingest/v2/processes/connectors/elasticsearch.py index 2412984c2..50b74677c 100644 --- a/unstructured/ingest/v2/processes/connectors/elasticsearch.py +++ b/unstructured/ingest/v2/processes/connectors/elasticsearch.py @@ -327,6 +327,7 @@ class ElasticsearchUploaderConfig(UploaderConfig): @dataclass class ElasticsearchUploader(Uploader): + connector_type: str = CONNECTOR_TYPE upload_config: ElasticsearchUploaderConfig connection_config: ElasticsearchConnectionConfig diff --git a/unstructured/ingest/v2/processes/connectors/fsspec/azure.py b/unstructured/ingest/v2/processes/connectors/fsspec/azure.py index fc893ce1f..4ff1d2f9a 100644 --- a/unstructured/ingest/v2/processes/connectors/fsspec/azure.py +++ b/unstructured/ingest/v2/processes/connectors/fsspec/azure.py @@ -114,6 +114,7 @@ class AzureUploaderConfig(FsspecUploaderConfig): @dataclass class AzureUploader(FsspecUploader): + connector_type: str = CONNECTOR_TYPE connection_config: AzureConnectionConfig upload_config: AzureUploaderConfig = field(default=None) diff --git a/unstructured/ingest/v2/processes/connectors/fsspec/box.py b/unstructured/ingest/v2/processes/connectors/fsspec/box.py index b5b973ef4..ce2907c15 100644 --- a/unstructured/ingest/v2/processes/connectors/fsspec/box.py +++ b/unstructured/ingest/v2/processes/connectors/fsspec/box.py @@ -100,7 +100,8 @@ class BoxUploaderConfig(FsspecUploaderConfig): @dataclass -class BoxUpload(FsspecUploader): +class BoxUploader(FsspecUploader): + connector_type: str = CONNECTOR_TYPE connection_config: BoxConnectionConfig upload_config: BoxUploaderConfig = field(default=None) @@ -131,6 +132,8 @@ add_source_entry( add_destination_entry( destination_type=CONNECTOR_TYPE, entry=DestinationRegistryEntry( - uploader=BoxUpload, uploader_config=BoxUploaderConfig, connection_config=BoxConnectionConfig + uploader=BoxUploader, + uploader_config=BoxUploaderConfig, + connection_config=BoxConnectionConfig, ), ) diff --git a/unstructured/ingest/v2/processes/connectors/fsspec/dropbox.py b/unstructured/ingest/v2/processes/connectors/fsspec/dropbox.py index b7c3bd169..78fd6ec54 100644 --- a/unstructured/ingest/v2/processes/connectors/fsspec/dropbox.py +++ b/unstructured/ingest/v2/processes/connectors/fsspec/dropbox.py @@ -99,7 +99,8 @@ class DropboxUploaderConfig(FsspecUploaderConfig): @dataclass -class DropboxUpload(FsspecUploader): +class DropboxUploader(FsspecUploader): + connector_type: str = CONNECTOR_TYPE connection_config: DropboxConnectionConfig upload_config: DropboxUploaderConfig = field(default=None) @@ -130,7 +131,7 @@ add_source_entry( add_destination_entry( destination_type=CONNECTOR_TYPE, entry=DestinationRegistryEntry( - uploader=DropboxUpload, + uploader=DropboxUploader, uploader_config=DropboxUploaderConfig, connection_config=DropboxConnectionConfig, ), diff --git a/unstructured/ingest/v2/processes/connectors/fsspec/fsspec.py b/unstructured/ingest/v2/processes/connectors/fsspec/fsspec.py index ac1ee975d..7d4dbaaba 100644 --- a/unstructured/ingest/v2/processes/connectors/fsspec/fsspec.py +++ b/unstructured/ingest/v2/processes/connectors/fsspec/fsspec.py @@ -304,6 +304,7 @@ FsspecUploaderConfigT = TypeVar("FsspecUploaderConfigT", bound=FsspecUploaderCon @dataclass class FsspecUploader(Uploader): + connector_type: str = CONNECTOR_TYPE upload_config: FsspecUploaderConfigT = field(default=None) @property diff --git a/unstructured/ingest/v2/processes/connectors/fsspec/gcs.py b/unstructured/ingest/v2/processes/connectors/fsspec/gcs.py index 52ff677b7..e3bebbae4 100644 --- a/unstructured/ingest/v2/processes/connectors/fsspec/gcs.py +++ b/unstructured/ingest/v2/processes/connectors/fsspec/gcs.py @@ -111,6 +111,7 @@ class GcsUploaderConfig(FsspecUploaderConfig): @dataclass class GcsUploader(FsspecUploader): + connector_type: str = CONNECTOR_TYPE connection_config: GcsConnectionConfig upload_config: GcsUploaderConfig = field(default=None) diff --git a/unstructured/ingest/v2/processes/connectors/fsspec/s3.py b/unstructured/ingest/v2/processes/connectors/fsspec/s3.py index d52b38f54..ceb99e3c9 100644 --- a/unstructured/ingest/v2/processes/connectors/fsspec/s3.py +++ b/unstructured/ingest/v2/processes/connectors/fsspec/s3.py @@ -125,7 +125,8 @@ class S3UploaderConfig(FsspecUploaderConfig): @dataclass -class S3Upload(FsspecUploader): +class S3Uploader(FsspecUploader): + connector_type: str = CONNECTOR_TYPE connection_config: S3ConnectionConfig upload_config: S3UploaderConfig = field(default=None) @@ -156,7 +157,7 @@ add_source_entry( add_destination_entry( destination_type=CONNECTOR_TYPE, entry=DestinationRegistryEntry( - uploader=S3Upload, + uploader=S3Uploader, uploader_config=S3UploaderConfig, connection_config=S3ConnectionConfig, ), diff --git a/unstructured/ingest/v2/processes/connectors/fsspec/sftp.py b/unstructured/ingest/v2/processes/connectors/fsspec/sftp.py index e77853c99..d4403dd34 100644 --- a/unstructured/ingest/v2/processes/connectors/fsspec/sftp.py +++ b/unstructured/ingest/v2/processes/connectors/fsspec/sftp.py @@ -136,6 +136,7 @@ class SftpUploaderConfig(FsspecUploaderConfig): @dataclass class SftpUploader(FsspecUploader): + connector_type: str = CONNECTOR_TYPE connection_config: SftpConnectionConfig upload_config: SftpUploaderConfig = field(default=None) diff --git a/unstructured/ingest/v2/processes/connectors/local.py b/unstructured/ingest/v2/processes/connectors/local.py index 2ffbb04f7..7715fc1f8 100644 --- a/unstructured/ingest/v2/processes/connectors/local.py +++ b/unstructured/ingest/v2/processes/connectors/local.py @@ -160,6 +160,7 @@ class LocalUploaderConfig(UploaderConfig): @dataclass class LocalUploader(Uploader): + connector_type: str = CONNECTOR_TYPE upload_config: LocalUploaderConfig = field(default_factory=lambda: LocalUploaderConfig()) connection_config: LocalConnectionConfig = field( default_factory=lambda: LocalConnectionConfig() diff --git a/unstructured/ingest/v2/processes/connectors/weaviate.py b/unstructured/ingest/v2/processes/connectors/weaviate.py index 474d2b601..8e8672b3a 100644 --- a/unstructured/ingest/v2/processes/connectors/weaviate.py +++ b/unstructured/ingest/v2/processes/connectors/weaviate.py @@ -154,6 +154,7 @@ class WeaviateUploaderConfig(UploaderConfig): @dataclass class WeaviateUploader(Uploader): + connector_type: str = CONNECTOR_TYPE upload_config: WeaviateUploaderConfig connection_config: WeaviateConnectionConfig client: Optional["Client"] = field(init=False)