diff --git a/CHANGELOG.md b/CHANGELOG.md index c904f3963..8fb975686 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -## 0.12.1-dev4 +## 0.12.1-dev5 ### Enhancements @@ -12,6 +12,7 @@ ### Fixes * **Fix GCS connector converting JSON to string with single quotes.** FSSpec serialization caused conversion of JSON token to string with single quotes. GCS requires token in form of dict so this format is now assured. +* **Fix the serialization of the Pinecone destination connector.** Presence of the PineconeIndex object breaks serialization due to TypeError: cannot pickle '_thread.lock' object. This removes that object before serialization. ## 0.12.0 diff --git a/unstructured/__version__.py b/unstructured/__version__.py index 2a145450a..291e14b02 100644 --- a/unstructured/__version__.py +++ b/unstructured/__version__.py @@ -1 +1 @@ -__version__ = "0.12.1-dev4" # pragma: no cover +__version__ = "0.12.1-dev5" # pragma: no cover diff --git a/unstructured/ingest/connector/pinecone.py b/unstructured/ingest/connector/pinecone.py index fbe54310f..e117043e1 100644 --- a/unstructured/ingest/connector/pinecone.py +++ b/unstructured/ingest/connector/pinecone.py @@ -1,3 +1,4 @@ +import copy import json import multiprocessing as mp import typing as t @@ -5,6 +6,7 @@ import uuid from dataclasses import dataclass from unstructured.ingest.enhanced_dataclass import enhanced_field +from unstructured.ingest.enhanced_dataclass.core import _asdict from unstructured.ingest.error import DestinationConnectionError, WriteError from unstructured.ingest.interfaces import ( AccessConfig, @@ -47,6 +49,18 @@ class PineconeDestinationConnector(IngestDocSessionHandleMixin, BaseDestinationC connector_config: SimplePineconeConfig _index: t.Optional["PineconeIndex"] = None + def to_dict(self, **kwargs): + """ + The _index variable in this dataclass breaks deepcopy due to: + TypeError: cannot pickle '_thread.lock' object + When serializing, remove it, meaning client data will need to be reinitialized + when deserialized + """ + self_cp = copy.copy(self) + if hasattr(self_cp, "_index"): + setattr(self_cp, "_index", None) + return _asdict(self_cp, **kwargs) + @property def pinecone_index(self): if self._index is None: