From f23f20c1dc1941f9f672c0983a96e23cdaba4387 Mon Sep 17 00:00:00 2001 From: ryannikolaidis <1208590+ryannikolaidis@users.noreply.github.com> Date: Wed, 17 Jan 2024 09:39:32 -0800 Subject: [PATCH] fix: postgres destination connector serialization (#2411) This fixes the serialization of the Elasticsearch destination connector. Presence of the _client object breaks serialization due to TypeError: cannot pickle '_thread.lock' object. This removes that object before serialization. --- CHANGELOG.md | 3 ++- unstructured/__version__.py | 2 +- unstructured/ingest/connector/sql.py | 14 ++++++++++++++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5dc331744..252d6de0c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -## 0.12.1-dev10 +## 0.12.1-dev11 ### Enhancements @@ -21,6 +21,7 @@ * **Pin version of unstructured-client** Set minimum version of unstructured-client to avoid raising a TypeError when passing `api_key_auth` to `UnstructuredClient` * **Fix the serialization of the Pinecone destination connector.** Presence of the PineconeIndex object breaks serialization due to TypeError: cannot pickle '_thread.lock' object. This removes that object before serialization. * **Fix the serialization of the Elasticsearch destination connector.** Presence of the _client object breaks serialization due to TypeError: cannot pickle '_thread.lock' object. This removes that object before serialization. +* **Fix the serialization of the Postgres destination connector.** Presence of the _client object breaks serialization due to TypeError: cannot pickle '_thread.lock' object. This removes that object before serialization. * **Fix documentation and sample code for Chroma.** Was pointing to wrong examples.. ## 0.12.0 diff --git a/unstructured/__version__.py b/unstructured/__version__.py index 843228d06..43ad13892 100644 --- a/unstructured/__version__.py +++ b/unstructured/__version__.py @@ -1 +1 @@ -__version__ = "0.12.1-dev10" # pragma: no cover +__version__ = "0.12.1-dev11" # pragma: no cover diff --git a/unstructured/ingest/connector/sql.py b/unstructured/ingest/connector/sql.py index f63090537..4c8dafdd4 100644 --- a/unstructured/ingest/connector/sql.py +++ b/unstructured/ingest/connector/sql.py @@ -1,9 +1,11 @@ +import copy import json import typing as t import uuid from dataclasses import dataclass, field from unstructured.ingest.enhanced_dataclass import enhanced_field +from unstructured.ingest.enhanced_dataclass.core import _asdict from unstructured.ingest.error import DestinationConnectionError from unstructured.ingest.interfaces import ( AccessConfig, @@ -68,6 +70,18 @@ class SqlDestinationConnector(BaseDestinationConnector): connector_config: SimpleSqlConfig _client: t.Optional[t.Any] = field(init=False, default=None) + def to_dict(self, **kwargs): + """ + The _client variable in this dataclass breaks deepcopy due to: + TypeError: cannot pickle '_thread.lock' object + When serializing, remove it, meaning client data will need to be reinitialized + when deserialized + """ + self_cp = copy.copy(self) + if hasattr(self_cp, "_client"): + setattr(self_cp, "_client", None) + return _asdict(self_cp, **kwargs) + @property def client(self): if self._client is None: