fix: pinecone serialization issue (#2394)

This fixes the serialization of the Pinecone destination connector.
Presence of the PineconeIndex object breaks serialization due to
TypeError: cannot pickle '_thread.lock' object. This removes that object
before serialization.
This commit is contained in:
ryannikolaidis 2024-01-12 16:08:33 -08:00 committed by GitHub
parent 2f2c48acd5
commit 018cd7f71b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 17 additions and 2 deletions

View File

@ -1,4 +1,4 @@
## 0.12.1-dev4
## 0.12.1-dev5
### Enhancements
@ -12,6 +12,7 @@
### Fixes
* **Fix GCS connector converting JSON to string with single quotes.** FSSpec serialization caused conversion of JSON token to string with single quotes. GCS requires token in form of dict so this format is now assured.
* **Fix the serialization of the Pinecone destination connector.** Presence of the PineconeIndex object breaks serialization due to TypeError: cannot pickle '_thread.lock' object. This removes that object before serialization.
## 0.12.0

View File

@ -1 +1 @@
__version__ = "0.12.1-dev4" # pragma: no cover
__version__ = "0.12.1-dev5" # pragma: no cover

View File

@ -1,3 +1,4 @@
import copy
import json
import multiprocessing as mp
import typing as t
@ -5,6 +6,7 @@ import uuid
from dataclasses import dataclass
from unstructured.ingest.enhanced_dataclass import enhanced_field
from unstructured.ingest.enhanced_dataclass.core import _asdict
from unstructured.ingest.error import DestinationConnectionError, WriteError
from unstructured.ingest.interfaces import (
AccessConfig,
@ -47,6 +49,18 @@ class PineconeDestinationConnector(IngestDocSessionHandleMixin, BaseDestinationC
connector_config: SimplePineconeConfig
_index: t.Optional["PineconeIndex"] = None
def to_dict(self, **kwargs):
"""
The _index variable in this dataclass breaks deepcopy due to:
TypeError: cannot pickle '_thread.lock' object
When serializing, remove it, meaning client data will need to be reinitialized
when deserialized
"""
self_cp = copy.copy(self)
if hasattr(self_cp, "_index"):
setattr(self_cp, "_index", None)
return _asdict(self_cp, **kwargs)
@property
def pinecone_index(self):
if self._index is None: