feat: add attribution for pinecone (#3067)

### Summary

- Updates the `pinecone-client` from v2 to v4 using the [client
migration
guide](https://canyon-quilt-082.notion.site/Pinecone-Python-SDK-v3-0-0-Migration-Guide-056d3897d7634bf7be399676a4757c7b#932ad98a2d33432cac4229e1df34d3d5).
Version bump was required to [add
attribution](https://pinecone-2-partner-integration-guide.mintlify.app/integrations/build-integration/attribute-api-activity)
and will also enable use to support [serverless
indexes](https://docs.pinecone.io/reference/pinecone-clients#initialize)
- Adds `"unstructured.{version}"` as the source tag for the connector

### Testing

Destination connection tests
[pass](https://github.com/Unstructured-IO/unstructured/actions/runs/9180305080/job/25244484432?pr=3067)
with the updates.
This commit is contained in:
Matt Robinson 2024-05-21 16:56:08 -04:00 committed by GitHub
parent b0d8a779da
commit 7832dfc723
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 26 additions and 46 deletions

View File

@ -1,3 +1,13 @@
## 0.14.2-dev0
### Enhancements
### Features
* Add attribution to the `pinecone` connector
### Fixes
## 0.14.1 ## 0.14.1
### Enhancements ### Enhancements
@ -13,7 +23,8 @@
* Use the python client when calling the unstructured api for partitioning or chunking * Use the python client when calling the unstructured api for partitioning or chunking
* Saving the final content is now a dedicated destination connector (local) set as the default if none are provided. Avoids adding new files locally if uploading elsewhere. * Saving the final content is now a dedicated destination connector (local) set as the default if none are provided. Avoids adding new files locally if uploading elsewhere.
* Leverage last modified date when deciding if new files should be downloaded and reprocessed. * Leverage last modified date when deciding if new files should be downloaded and reprocessed.
* **Add support for Python 3.12**. `unstructured` now works with Python 3.12! * Add attribution to the `pinecone` connector
* **Add support for Python 3.12**. `unstructured` now works with Python 3.12!
### Fixes ### Fixes
@ -47,7 +58,7 @@
* **Fix disk-space leak in `partition_odt()`.** Remove temporary file created but not removed when `file` argument is passed to `partition_odt()`. * **Fix disk-space leak in `partition_odt()`.** Remove temporary file created but not removed when `file` argument is passed to `partition_odt()`.
* **AstraDB: option to prevent indexing metadata** * **AstraDB: option to prevent indexing metadata**
* **Fix Missing py.typed** * **Fix Missing py.typed**
## 0.13.7 ## 0.13.7
### Enhancements ### Enhancements

View File

@ -152,7 +152,7 @@ requests==2.32.1
# transformers # transformers
safetensors==0.4.3 safetensors==0.4.3
# via transformers # via transformers
scikit-learn==1.4.2 scikit-learn==1.5.0
# via sentence-transformers # via sentence-transformers
scipy==1.11.3 scipy==1.11.3
# via # via

View File

@ -1,3 +1,3 @@
-c ../deps/constraints.txt -c ../deps/constraints.txt
-c ../base.txt -c ../base.txt
pinecone-client==2.2.4 pinecone-client>=3.7.1

View File

@ -5,43 +5,12 @@
# pip-compile ./ingest/pinecone.in # pip-compile ./ingest/pinecone.in
# #
certifi==2024.2.2 certifi==2024.2.2
# via
# -c ./ingest/../base.txt
# -c ./ingest/../deps/constraints.txt
# requests
charset-normalizer==3.3.2
# via
# -c ./ingest/../base.txt
# requests
dnspython==2.6.1
# via pinecone-client
idna==3.7
# via
# -c ./ingest/../base.txt
# requests
loguru==0.7.2
# via pinecone-client
numpy==1.26.4
# via # via
# -c ./ingest/../base.txt # -c ./ingest/../base.txt
# -c ./ingest/../deps/constraints.txt # -c ./ingest/../deps/constraints.txt
# pinecone-client # pinecone-client
pinecone-client==2.2.4 pinecone-client==4.1.0
# via -r ./ingest/pinecone.in # via -r ./ingest/pinecone.in
python-dateutil==2.9.0.post0
# via
# -c ./ingest/../base.txt
# pinecone-client
pyyaml==6.0.1
# via pinecone-client
requests==2.32.1
# via
# -c ./ingest/../base.txt
# pinecone-client
six==1.16.0
# via
# -c ./ingest/../base.txt
# python-dateutil
tqdm==4.66.4 tqdm==4.66.4
# via # via
# -c ./ingest/../base.txt # -c ./ingest/../base.txt
@ -55,4 +24,3 @@ urllib3==1.26.18
# -c ./ingest/../base.txt # -c ./ingest/../base.txt
# -c ./ingest/../deps/constraints.txt # -c ./ingest/../deps/constraints.txt
# pinecone-client # pinecone-client
# requests

View File

@ -48,7 +48,7 @@ requests==2.32.1
# requests-toolbelt # requests-toolbelt
# simple-salesforce # simple-salesforce
# zeep # zeep
requests-file==2.0.0 requests-file==2.1.0
# via zeep # via zeep
requests-toolbelt==1.0.0 requests-toolbelt==1.0.0
# via zeep # via zeep

View File

@ -1 +1 @@
__version__ = "0.14.1" # pragma: no cover __version__ = "0.14.2-dev0" # pragma: no cover

View File

@ -72,16 +72,17 @@ class PineconeDestinationConnector(IngestDocSessionHandleMixin, BaseDestinationC
@requires_dependencies(["pinecone"], extras="pinecone") @requires_dependencies(["pinecone"], extras="pinecone")
def create_index(self) -> "PineconeIndex": def create_index(self) -> "PineconeIndex":
import pinecone from pinecone import Pinecone
pinecone.init( from unstructured import __version__ as unstructured_version
pc = Pinecone(
api_key=self.connector_config.access_config.api_key, api_key=self.connector_config.access_config.api_key,
environment=self.connector_config.environment, source_tag=f"unstructured=={unstructured_version}",
)
index = pinecone.Index(self.connector_config.index_name)
logger.debug(
f"Connected to index: {pinecone.describe_index(self.connector_config.index_name)}"
) )
index = pc.Index(self.connector_config.index_name)
logger.debug(f"Connected to index: {pc.describe_index(self.connector_config.index_name)}")
return index return index
@DestinationConnectionError.wrap @DestinationConnectionError.wrap