mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-11-09 06:57:26 +00:00
feat/singlestore dest connector (#3320)
### Description Adds [SingleStore](https://www.singlestore.com/) database destination connector with associated ingest test.
This commit is contained in:
parent
0046f58a4f
commit
f1a28600d9
@ -1,4 +1,4 @@
|
|||||||
## 0.14.10-dev6
|
## 0.14.10-dev7
|
||||||
|
|
||||||
### Enhancements
|
### Enhancements
|
||||||
* **Update unstructured-client dependency** Change unstructured-client dependency pin back to
|
* **Update unstructured-client dependency** Change unstructured-client dependency pin back to
|
||||||
|
|||||||
@ -53,5 +53,6 @@ include requirements/ingest/salesforce.in
|
|||||||
include requirements/ingest/sftp.in
|
include requirements/ingest/sftp.in
|
||||||
include requirements/ingest/sharepoint.in
|
include requirements/ingest/sharepoint.in
|
||||||
include requirements/ingest/slack.in
|
include requirements/ingest/slack.in
|
||||||
|
include requirements/ingest/singlestore.in
|
||||||
include requirements/ingest/weaviate.in
|
include requirements/ingest/weaviate.in
|
||||||
include requirements/ingest/wikipedia.in
|
include requirements/ingest/wikipedia.in
|
||||||
|
|||||||
4
Makefile
4
Makefile
@ -197,6 +197,10 @@ install-ingest-airtable:
|
|||||||
install-ingest-sharepoint:
|
install-ingest-sharepoint:
|
||||||
python3 -m pip install -r requirements/ingest/sharepoint.txt
|
python3 -m pip install -r requirements/ingest/sharepoint.txt
|
||||||
|
|
||||||
|
.PHONY: install-ingest-singlestore
|
||||||
|
install-ingest-singlestore:
|
||||||
|
python3 -m pip install -r requirements/ingest/singlestore.txt
|
||||||
|
|
||||||
.PHONY: install-ingest-weaviate
|
.PHONY: install-ingest-weaviate
|
||||||
install-ingest-weaviate:
|
install-ingest-weaviate:
|
||||||
python3 -m pip install -r requirements/ingest/weaviate.txt
|
python3 -m pip install -r requirements/ingest/weaviate.txt
|
||||||
|
|||||||
3
requirements/ingest/singlestore.in
Normal file
3
requirements/ingest/singlestore.in
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
-c ../deps/constraints.txt
|
||||||
|
-c ../base.txt
|
||||||
|
singlestoredb
|
||||||
66
requirements/ingest/singlestore.txt
Normal file
66
requirements/ingest/singlestore.txt
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
#
|
||||||
|
# This file is autogenerated by pip-compile with Python 3.9
|
||||||
|
# by the following command:
|
||||||
|
#
|
||||||
|
# pip-compile singlestore.in
|
||||||
|
#
|
||||||
|
build==1.2.1
|
||||||
|
# via singlestoredb
|
||||||
|
certifi==2024.6.2
|
||||||
|
# via
|
||||||
|
# -c ../base.txt
|
||||||
|
# -c ../deps/constraints.txt
|
||||||
|
# requests
|
||||||
|
charset-normalizer==3.3.2
|
||||||
|
# via
|
||||||
|
# -c ../base.txt
|
||||||
|
# requests
|
||||||
|
idna==3.7
|
||||||
|
# via
|
||||||
|
# -c ../base.txt
|
||||||
|
# requests
|
||||||
|
importlib-metadata==7.1.0
|
||||||
|
# via
|
||||||
|
# -c ../deps/constraints.txt
|
||||||
|
# build
|
||||||
|
packaging==23.2
|
||||||
|
# via
|
||||||
|
# -c ../base.txt
|
||||||
|
# -c ../deps/constraints.txt
|
||||||
|
# build
|
||||||
|
parsimonious==0.10.0
|
||||||
|
# via singlestoredb
|
||||||
|
pyjwt==2.8.0
|
||||||
|
# via singlestoredb
|
||||||
|
pyproject-hooks==1.1.0
|
||||||
|
# via build
|
||||||
|
regex==2024.5.15
|
||||||
|
# via
|
||||||
|
# -c ../base.txt
|
||||||
|
# parsimonious
|
||||||
|
requests==2.32.3
|
||||||
|
# via
|
||||||
|
# -c ../base.txt
|
||||||
|
# singlestoredb
|
||||||
|
singlestoredb==1.4.0
|
||||||
|
# via -r singlestore.in
|
||||||
|
sqlparams==6.0.1
|
||||||
|
# via singlestoredb
|
||||||
|
tomli==2.0.1
|
||||||
|
# via
|
||||||
|
# build
|
||||||
|
# singlestoredb
|
||||||
|
urllib3==1.26.19
|
||||||
|
# via
|
||||||
|
# -c ../base.txt
|
||||||
|
# -c ../deps/constraints.txt
|
||||||
|
# requests
|
||||||
|
wheel==0.43.0
|
||||||
|
# via
|
||||||
|
# -c ../deps/constraints.txt
|
||||||
|
# singlestoredb
|
||||||
|
zipp==3.19.2
|
||||||
|
# via importlib-metadata
|
||||||
|
|
||||||
|
# The following packages are considered to be unsafe in a requirements file:
|
||||||
|
# setuptools
|
||||||
21
scripts/singlestore-test-helpers/docker-compose.yml
Normal file
21
scripts/singlestore-test-helpers/docker-compose.yml
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
services:
|
||||||
|
singlestore:
|
||||||
|
container_name: "singlestore"
|
||||||
|
image: ghcr.io/singlestore-labs/singlestoredb-dev:latest
|
||||||
|
platform: linux/amd64
|
||||||
|
ports:
|
||||||
|
- 3306:3306
|
||||||
|
- 8080:8080
|
||||||
|
- 9000:9000
|
||||||
|
environment:
|
||||||
|
- ROOT_PASSWORD=password
|
||||||
|
volumes:
|
||||||
|
- ./schema.sql:/init.sql
|
||||||
|
|
||||||
|
# Allow docker compose up --wait to exit only when singlestore is healthy
|
||||||
|
wait:
|
||||||
|
image: hello-world:latest
|
||||||
|
container_name: singlestore-waiter
|
||||||
|
depends_on:
|
||||||
|
singlestore:
|
||||||
|
condition: service_healthy
|
||||||
49
scripts/singlestore-test-helpers/schema.sql
Normal file
49
scripts/singlestore-test-helpers/schema.sql
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
CREATE DATABASE ingest_test;
|
||||||
|
USE ingest_test;
|
||||||
|
|
||||||
|
CREATE TABLE elements (
|
||||||
|
id INT PRIMARY KEY NOT NULL AUTO_INCREMENT,
|
||||||
|
element_id TEXT,
|
||||||
|
text TEXT,
|
||||||
|
embeddings Vector(384),
|
||||||
|
type TEXT,
|
||||||
|
url TEXT,
|
||||||
|
version TEXT,
|
||||||
|
data_source_date_created TIMESTAMP,
|
||||||
|
data_source_date_modified TIMESTAMP,
|
||||||
|
data_source_date_processed TIMESTAMP,
|
||||||
|
data_source_permissions_data TEXT,
|
||||||
|
data_source_url TEXT,
|
||||||
|
data_source_version TEXT,
|
||||||
|
data_source_record_locator JSON,
|
||||||
|
category_depth INTEGER,
|
||||||
|
parent_id TEXT,
|
||||||
|
attached_filename TEXT,
|
||||||
|
filetype TEXT,
|
||||||
|
last_modified TIMESTAMP,
|
||||||
|
file_directory TEXT,
|
||||||
|
filename TEXT,
|
||||||
|
languages TEXT,
|
||||||
|
page_number TEXT,
|
||||||
|
links TEXT,
|
||||||
|
page_name TEXT,
|
||||||
|
link_urls TEXT,
|
||||||
|
link_texts TEXT,
|
||||||
|
sent_from TEXT,
|
||||||
|
sent_to TEXT,
|
||||||
|
subject TEXT,
|
||||||
|
section TEXT,
|
||||||
|
header_footer_type TEXT,
|
||||||
|
emphasized_text_contents TEXT,
|
||||||
|
emphasized_text_tags TEXT,
|
||||||
|
text_as_html TEXT,
|
||||||
|
regex_metadata TEXT,
|
||||||
|
detection_class_prob DECIMAL,
|
||||||
|
is_continuation BOOLEAN,
|
||||||
|
orig_elements TEXT,
|
||||||
|
coordinates_points TEXT,
|
||||||
|
coordinates_system TEXT,
|
||||||
|
coordinates_layout_width DECIMAL,
|
||||||
|
coordinates_layout_height DECIMAL
|
||||||
|
);
|
||||||
|
|
||||||
56
scripts/singlestore-test-helpers/test_outputs.py
Executable file
56
scripts/singlestore-test-helpers/test_outputs.py
Executable file
@ -0,0 +1,56 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import click
|
||||||
|
import singlestoredb as s2
|
||||||
|
from singlestoredb.connection import Connection
|
||||||
|
|
||||||
|
|
||||||
|
def get_connection(
|
||||||
|
host: str = None, port: int = None, database: str = None, user: str = None, password: str = None
|
||||||
|
) -> Connection:
|
||||||
|
conn = s2.connect(
|
||||||
|
host=host,
|
||||||
|
port=port,
|
||||||
|
database=database,
|
||||||
|
user=user,
|
||||||
|
password=password,
|
||||||
|
)
|
||||||
|
return conn
|
||||||
|
|
||||||
|
|
||||||
|
def validate(table_name: str, conn: Connection, num_elements: int):
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
stmt = f"select * from {table_name}"
|
||||||
|
count = cur.execute(stmt)
|
||||||
|
assert (
|
||||||
|
count == num_elements
|
||||||
|
), f"found count ({count}) doesn't match expected value: {num_elements}"
|
||||||
|
print("validation successful")
|
||||||
|
|
||||||
|
|
||||||
|
@click.command()
|
||||||
|
@click.option("--host", type=str, default="localhost", show_default=True)
|
||||||
|
@click.option("--port", type=int, default=3306, show_default=True)
|
||||||
|
@click.option("--user", type=str, default="root", show_default=True)
|
||||||
|
@click.option("--password", type=str, default="password")
|
||||||
|
@click.option("--database", type=str, required=True)
|
||||||
|
@click.option("--table-name", type=str, required=True)
|
||||||
|
@click.option(
|
||||||
|
"--num-elements", type=int, required=True, help="The expected number of elements to exist"
|
||||||
|
)
|
||||||
|
def run_validation(
|
||||||
|
host: str,
|
||||||
|
port: int,
|
||||||
|
user: str,
|
||||||
|
database: str,
|
||||||
|
password: str,
|
||||||
|
table_name: str,
|
||||||
|
num_elements: int,
|
||||||
|
):
|
||||||
|
print(f"Validating that table {table_name} in database {database} has {num_elements} entries")
|
||||||
|
conn = get_connection(host=host, port=port, database=database, user=user, password=password)
|
||||||
|
validate(table_name=table_name, conn=conn, num_elements=num_elements)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
run_validation()
|
||||||
1
setup.py
1
setup.py
@ -177,6 +177,7 @@ setup(
|
|||||||
"openai": load_requirements("requirements/ingest/embed-openai.in"),
|
"openai": load_requirements("requirements/ingest/embed-openai.in"),
|
||||||
"bedrock": load_requirements("requirements/ingest/embed-aws-bedrock.in"),
|
"bedrock": load_requirements("requirements/ingest/embed-aws-bedrock.in"),
|
||||||
"databricks-volumes": load_requirements("requirements/ingest/databricks-volumes.in"),
|
"databricks-volumes": load_requirements("requirements/ingest/databricks-volumes.in"),
|
||||||
|
"singlestore": load_requirements("requirements/ingest/singlestore.in"),
|
||||||
},
|
},
|
||||||
package_dir={"unstructured": "unstructured"},
|
package_dir={"unstructured": "unstructured"},
|
||||||
package_data={"unstructured": ["nlp/*.txt", "py.typed"]},
|
package_data={"unstructured": ["nlp/*.txt", "py.typed"]},
|
||||||
|
|||||||
65
test_unstructured_ingest/dest/singlestore.sh
Executable file
65
test_unstructured_ingest/dest/singlestore.sh
Executable file
@ -0,0 +1,65 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
DEST_PATH=$(dirname "$(realpath "$0")")
|
||||||
|
SCRIPT_DIR=$(dirname "$DEST_PATH")
|
||||||
|
cd "$SCRIPT_DIR"/.. || exit 1
|
||||||
|
OUTPUT_FOLDER_NAME=singlestore-dest
|
||||||
|
OUTPUT_ROOT=${OUTPUT_ROOT:-$SCRIPT_DIR}
|
||||||
|
OUTPUT_DIR=$OUTPUT_ROOT/structured-output/$OUTPUT_FOLDER_NAME
|
||||||
|
WORK_DIR=$OUTPUT_ROOT/workdir/$OUTPUT_FOLDER_NAME
|
||||||
|
CI=${CI:-"false"}
|
||||||
|
max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")}
|
||||||
|
|
||||||
|
# shellcheck disable=SC1091
|
||||||
|
source "$SCRIPT_DIR"/cleanup.sh
|
||||||
|
function cleanup {
|
||||||
|
# Index cleanup
|
||||||
|
echo "Stopping Singlestore Docker container"
|
||||||
|
docker compose -f scripts/singlestore-test-helpers/docker-compose.yml down --remove-orphans -v
|
||||||
|
|
||||||
|
# Local file cleanup
|
||||||
|
cleanup_dir "$WORK_DIR"
|
||||||
|
cleanup_dir "$OUTPUT_DIR"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
trap cleanup EXIT
|
||||||
|
|
||||||
|
# Create singlestore instance and create `elements` class
|
||||||
|
echo "Creating singlestore instance"
|
||||||
|
# shellcheck source=/dev/null
|
||||||
|
docker compose -f scripts/singlestore-test-helpers/docker-compose.yml up -d --wait-timeout 60
|
||||||
|
|
||||||
|
DATABASE=ingest_test
|
||||||
|
USER=root
|
||||||
|
HOST=localhost
|
||||||
|
PASSWORD=password
|
||||||
|
PORT=3306
|
||||||
|
TABLE=elements
|
||||||
|
|
||||||
|
PYTHONPATH=. ./unstructured/ingest/main.py \
|
||||||
|
local \
|
||||||
|
--num-processes "$max_processes" \
|
||||||
|
--output-dir "$OUTPUT_DIR" \
|
||||||
|
--strategy fast \
|
||||||
|
--verbose \
|
||||||
|
--reprocess \
|
||||||
|
--input-path example-docs/fake-memo.pdf \
|
||||||
|
--work-dir "$WORK_DIR" \
|
||||||
|
--embedding-provider "langchain-huggingface" \
|
||||||
|
singlestore \
|
||||||
|
--host $HOST \
|
||||||
|
--user $USER \
|
||||||
|
--password $PASSWORD \
|
||||||
|
--database $DATABASE \
|
||||||
|
--port $PORT \
|
||||||
|
--table-name $TABLE \
|
||||||
|
--drop-empty-cols
|
||||||
|
|
||||||
|
expected_num_elements=$(cat "$WORK_DIR"/embed/* | jq 'length')
|
||||||
|
./scripts/singlestore-test-helpers/test_outputs.py \
|
||||||
|
--table-name $TABLE \
|
||||||
|
--database $DATABASE \
|
||||||
|
--num-elements "$expected_num_elements"
|
||||||
@ -35,6 +35,7 @@ all_tests=(
|
|||||||
'sharepoint-embed-cog-index.sh'
|
'sharepoint-embed-cog-index.sh'
|
||||||
'sqlite.sh'
|
'sqlite.sh'
|
||||||
'vectara.sh'
|
'vectara.sh'
|
||||||
|
'singlestore.sh'
|
||||||
'weaviate.sh'
|
'weaviate.sh'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@ -1 +1 @@
|
|||||||
__version__ = "0.14.10-dev6" # pragma: no cover
|
__version__ = "0.14.10-dev7" # pragma: no cover
|
||||||
|
|||||||
@ -14,7 +14,7 @@ from unstructured.ingest.interfaces import (
|
|||||||
WriteConfig,
|
WriteConfig,
|
||||||
)
|
)
|
||||||
from unstructured.ingest.logger import logger
|
from unstructured.ingest.logger import logger
|
||||||
from unstructured.ingest.utils.data_prep import chunk_generator
|
from unstructured.ingest.utils.data_prep import batch_generator
|
||||||
from unstructured.utils import requires_dependencies
|
from unstructured.utils import requires_dependencies
|
||||||
|
|
||||||
if t.TYPE_CHECKING:
|
if t.TYPE_CHECKING:
|
||||||
@ -114,7 +114,7 @@ class AstraDestinationConnector(BaseDestinationConnector):
|
|||||||
|
|
||||||
astra_batch_size = self.write_config.batch_size
|
astra_batch_size = self.write_config.batch_size
|
||||||
|
|
||||||
for chunk in chunk_generator(elements_dict, astra_batch_size):
|
for chunk in batch_generator(elements_dict, astra_batch_size):
|
||||||
self._astra_db_collection.insert_many(chunk)
|
self._astra_db_collection.insert_many(chunk)
|
||||||
|
|
||||||
def normalize_dict(self, element_dict: dict) -> dict:
|
def normalize_dict(self, element_dict: dict) -> dict:
|
||||||
|
|||||||
@ -12,7 +12,7 @@ from unstructured.ingest.interfaces import (
|
|||||||
WriteConfig,
|
WriteConfig,
|
||||||
)
|
)
|
||||||
from unstructured.ingest.logger import logger
|
from unstructured.ingest.logger import logger
|
||||||
from unstructured.ingest.utils.data_prep import chunk_generator
|
from unstructured.ingest.utils.data_prep import batch_generator
|
||||||
from unstructured.staging.base import flatten_dict
|
from unstructured.staging.base import flatten_dict
|
||||||
from unstructured.utils import requires_dependencies
|
from unstructured.utils import requires_dependencies
|
||||||
|
|
||||||
@ -144,7 +144,7 @@ class ChromaDestinationConnector(BaseDestinationConnector):
|
|||||||
|
|
||||||
chroma_batch_size = self.write_config.batch_size
|
chroma_batch_size = self.write_config.batch_size
|
||||||
|
|
||||||
for chunk in chunk_generator(elements_dict, chroma_batch_size):
|
for chunk in batch_generator(elements_dict, chroma_batch_size):
|
||||||
self.upsert_batch(self.prepare_chroma_list(chunk))
|
self.upsert_batch(self.prepare_chroma_list(chunk))
|
||||||
|
|
||||||
def normalize_dict(self, element_dict: dict) -> dict:
|
def normalize_dict(self, element_dict: dict) -> dict:
|
||||||
|
|||||||
@ -21,7 +21,7 @@ from unstructured.ingest.interfaces import (
|
|||||||
WriteConfig,
|
WriteConfig,
|
||||||
)
|
)
|
||||||
from unstructured.ingest.logger import logger
|
from unstructured.ingest.logger import logger
|
||||||
from unstructured.ingest.utils.data_prep import chunk_generator
|
from unstructured.ingest.utils.data_prep import batch_generator
|
||||||
from unstructured.utils import requires_dependencies
|
from unstructured.utils import requires_dependencies
|
||||||
|
|
||||||
if t.TYPE_CHECKING:
|
if t.TYPE_CHECKING:
|
||||||
@ -270,7 +270,7 @@ class KafkaDestinationConnector(IngestDocSessionHandleMixin, BaseDestinationConn
|
|||||||
logger.info(f"Writing {len(dict_list)} documents to Kafka")
|
logger.info(f"Writing {len(dict_list)} documents to Kafka")
|
||||||
num_uploaded = 0
|
num_uploaded = 0
|
||||||
|
|
||||||
for chunk in chunk_generator(dict_list, self.write_config.batch_size):
|
for chunk in batch_generator(dict_list, self.write_config.batch_size):
|
||||||
num_uploaded += self.upload_msg(chunk) # noqa: E203
|
num_uploaded += self.upload_msg(chunk) # noqa: E203
|
||||||
|
|
||||||
producer = self.kafka_producer
|
producer = self.kafka_producer
|
||||||
|
|||||||
@ -17,7 +17,7 @@ from unstructured.ingest.interfaces import (
|
|||||||
WriteConfig,
|
WriteConfig,
|
||||||
)
|
)
|
||||||
from unstructured.ingest.logger import logger
|
from unstructured.ingest.logger import logger
|
||||||
from unstructured.ingest.utils.data_prep import chunk_generator
|
from unstructured.ingest.utils.data_prep import batch_generator
|
||||||
from unstructured.staging.base import flatten_dict
|
from unstructured.staging.base import flatten_dict
|
||||||
from unstructured.utils import requires_dependencies
|
from unstructured.utils import requires_dependencies
|
||||||
|
|
||||||
@ -111,7 +111,7 @@ class PineconeDestinationConnector(IngestDocSessionHandleMixin, BaseDestinationC
|
|||||||
|
|
||||||
logger.info(f"using {self.write_config.num_processes} processes to upload")
|
logger.info(f"using {self.write_config.num_processes} processes to upload")
|
||||||
if self.write_config.num_processes == 1:
|
if self.write_config.num_processes == 1:
|
||||||
for chunk in chunk_generator(elements_dict, pinecone_batch_size):
|
for chunk in batch_generator(elements_dict, pinecone_batch_size):
|
||||||
self.upsert_batch(chunk) # noqa: E203
|
self.upsert_batch(chunk) # noqa: E203
|
||||||
|
|
||||||
else:
|
else:
|
||||||
@ -119,7 +119,7 @@ class PineconeDestinationConnector(IngestDocSessionHandleMixin, BaseDestinationC
|
|||||||
processes=self.write_config.num_processes,
|
processes=self.write_config.num_processes,
|
||||||
) as pool:
|
) as pool:
|
||||||
pool.map(
|
pool.map(
|
||||||
self.upsert_batch, list(chunk_generator(elements_dict, pinecone_batch_size))
|
self.upsert_batch, list(batch_generator(elements_dict, pinecone_batch_size))
|
||||||
)
|
)
|
||||||
|
|
||||||
def normalize_dict(self, element_dict: dict) -> dict:
|
def normalize_dict(self, element_dict: dict) -> dict:
|
||||||
|
|||||||
@ -15,7 +15,7 @@ from unstructured.ingest.interfaces import (
|
|||||||
WriteConfig,
|
WriteConfig,
|
||||||
)
|
)
|
||||||
from unstructured.ingest.logger import logger
|
from unstructured.ingest.logger import logger
|
||||||
from unstructured.ingest.utils.data_prep import chunk_generator
|
from unstructured.ingest.utils.data_prep import batch_generator
|
||||||
from unstructured.staging.base import flatten_dict
|
from unstructured.staging.base import flatten_dict
|
||||||
from unstructured.utils import requires_dependencies
|
from unstructured.utils import requires_dependencies
|
||||||
|
|
||||||
@ -120,14 +120,14 @@ class QdrantDestinationConnector(IngestDocSessionHandleMixin, BaseDestinationCon
|
|||||||
|
|
||||||
logger.info(f"using {self.write_config.num_processes} processes to upload")
|
logger.info(f"using {self.write_config.num_processes} processes to upload")
|
||||||
if self.write_config.num_processes == 1:
|
if self.write_config.num_processes == 1:
|
||||||
for chunk in chunk_generator(elements_dict, qdrant_batch_size):
|
for chunk in batch_generator(elements_dict, qdrant_batch_size):
|
||||||
self.upsert_batch(chunk)
|
self.upsert_batch(chunk)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
with mp.Pool(
|
with mp.Pool(
|
||||||
processes=self.write_config.num_processes,
|
processes=self.write_config.num_processes,
|
||||||
) as pool:
|
) as pool:
|
||||||
pool.map(self.upsert_batch, list(chunk_generator(elements_dict, qdrant_batch_size)))
|
pool.map(self.upsert_batch, list(batch_generator(elements_dict, qdrant_batch_size)))
|
||||||
|
|
||||||
def normalize_dict(self, element_dict: dict) -> dict:
|
def normalize_dict(self, element_dict: dict) -> dict:
|
||||||
return {
|
return {
|
||||||
|
|||||||
@ -2,8 +2,8 @@ import itertools
|
|||||||
import json
|
import json
|
||||||
|
|
||||||
|
|
||||||
def chunk_generator(iterable, batch_size=100):
|
def batch_generator(iterable, batch_size=100):
|
||||||
"""A helper function to break an iterable into chunks of size batch_size."""
|
"""A helper function to break an iterable into batches of size batch_size."""
|
||||||
it = iter(iterable)
|
it = iter(iterable)
|
||||||
chunk = tuple(itertools.islice(it, batch_size))
|
chunk = tuple(itertools.islice(it, batch_size))
|
||||||
while chunk:
|
while chunk:
|
||||||
|
|||||||
@ -17,6 +17,7 @@ from .mongodb import mongodb_dest_cmd
|
|||||||
from .onedrive import onedrive_drive_src_cmd
|
from .onedrive import onedrive_drive_src_cmd
|
||||||
from .opensearch import opensearch_dest_cmd, opensearch_src_cmd
|
from .opensearch import opensearch_dest_cmd, opensearch_src_cmd
|
||||||
from .pinecone import pinecone_dest_cmd
|
from .pinecone import pinecone_dest_cmd
|
||||||
|
from .singlestore import singlestore_dest_cmd
|
||||||
from .weaviate import weaviate_dest_cmd
|
from .weaviate import weaviate_dest_cmd
|
||||||
|
|
||||||
src_cmds = [
|
src_cmds = [
|
||||||
@ -55,6 +56,7 @@ dest_cmds = [
|
|||||||
pinecone_dest_cmd,
|
pinecone_dest_cmd,
|
||||||
s3_dest_cmd,
|
s3_dest_cmd,
|
||||||
sftp_dest_cmd,
|
sftp_dest_cmd,
|
||||||
|
singlestore_dest_cmd,
|
||||||
weaviate_dest_cmd,
|
weaviate_dest_cmd,
|
||||||
mongodb_dest_cmd,
|
mongodb_dest_cmd,
|
||||||
]
|
]
|
||||||
|
|||||||
96
unstructured/ingest/v2/cli/cmds/singlestore.py
Normal file
96
unstructured/ingest/v2/cli/cmds/singlestore.py
Normal file
@ -0,0 +1,96 @@
|
|||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
import click
|
||||||
|
|
||||||
|
from unstructured.ingest.v2.cli.base import DestCmd
|
||||||
|
from unstructured.ingest.v2.cli.interfaces import CliConfig
|
||||||
|
from unstructured.ingest.v2.processes.connectors.singlestore import CONNECTOR_TYPE
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SingleStoreCliConnectionConfig(CliConfig):
|
||||||
|
@staticmethod
|
||||||
|
def get_cli_options() -> list[click.Option]:
|
||||||
|
options = [
|
||||||
|
click.Option(
|
||||||
|
["--host"],
|
||||||
|
required=False,
|
||||||
|
type=str,
|
||||||
|
default=None,
|
||||||
|
help="SingleStore host",
|
||||||
|
),
|
||||||
|
click.Option(
|
||||||
|
["--port"],
|
||||||
|
required=False,
|
||||||
|
type=int,
|
||||||
|
default=None,
|
||||||
|
help="SingleStore port",
|
||||||
|
),
|
||||||
|
click.Option(
|
||||||
|
["--user"],
|
||||||
|
required=False,
|
||||||
|
type=str,
|
||||||
|
default=None,
|
||||||
|
help="SingleStore user",
|
||||||
|
),
|
||||||
|
click.Option(
|
||||||
|
["--password"],
|
||||||
|
required=False,
|
||||||
|
type=str,
|
||||||
|
default=None,
|
||||||
|
help="SingleStore password",
|
||||||
|
),
|
||||||
|
click.Option(
|
||||||
|
["--database"],
|
||||||
|
required=False,
|
||||||
|
type=str,
|
||||||
|
default=None,
|
||||||
|
help="SingleStore database",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
return options
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SingleStoreCliUploaderConfig(CliConfig):
|
||||||
|
@staticmethod
|
||||||
|
def get_cli_options() -> list[click.Option]:
|
||||||
|
options = [
|
||||||
|
click.Option(
|
||||||
|
["--drop-empty-cols"],
|
||||||
|
required=False,
|
||||||
|
type=bool,
|
||||||
|
is_flag=True,
|
||||||
|
default=False,
|
||||||
|
help="Drop any columns that have no data",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
return options
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SingleStoreCliUploadStagerConfig(CliConfig):
|
||||||
|
@staticmethod
|
||||||
|
def get_cli_options() -> list[click.Option]:
|
||||||
|
return [
|
||||||
|
click.Option(
|
||||||
|
["--table-name"],
|
||||||
|
required=False,
|
||||||
|
type=str,
|
||||||
|
help="SingleStore table to write contents to",
|
||||||
|
),
|
||||||
|
click.Option(
|
||||||
|
["--batch-size"],
|
||||||
|
required=False,
|
||||||
|
type=click.IntRange(min=1),
|
||||||
|
help="Batch size when writing to SingleStore",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
singlestore_dest_cmd = DestCmd(
|
||||||
|
cmd_name=CONNECTOR_TYPE,
|
||||||
|
connection_config=SingleStoreCliConnectionConfig,
|
||||||
|
uploader_config=SingleStoreCliUploaderConfig,
|
||||||
|
upload_stager_config=SingleStoreCliUploadStagerConfig,
|
||||||
|
)
|
||||||
48
unstructured/ingest/v2/examples/example_singlestore.py
Normal file
48
unstructured/ingest/v2/examples/example_singlestore.py
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from unstructured.ingest.v2.interfaces import ProcessorConfig
|
||||||
|
from unstructured.ingest.v2.logger import logger
|
||||||
|
from unstructured.ingest.v2.pipeline.pipeline import Pipeline
|
||||||
|
from unstructured.ingest.v2.processes.chunker import ChunkerConfig
|
||||||
|
from unstructured.ingest.v2.processes.connectors.local import (
|
||||||
|
LocalConnectionConfig,
|
||||||
|
LocalDownloaderConfig,
|
||||||
|
LocalIndexerConfig,
|
||||||
|
)
|
||||||
|
from unstructured.ingest.v2.processes.connectors.singlestore import (
|
||||||
|
SingleStoreAccessConfig,
|
||||||
|
SingleStoreConnectionConfig,
|
||||||
|
SingleStoreUploaderConfig,
|
||||||
|
SingleStoreUploadStagerConfig,
|
||||||
|
)
|
||||||
|
from unstructured.ingest.v2.processes.embedder import EmbedderConfig
|
||||||
|
from unstructured.ingest.v2.processes.partitioner import PartitionerConfig
|
||||||
|
|
||||||
|
base_path = Path(__file__).parent.parent.parent.parent.parent
|
||||||
|
docs_path = base_path / "example-docs"
|
||||||
|
work_dir = base_path / "tmp_ingest"
|
||||||
|
output_path = work_dir / "output"
|
||||||
|
download_path = work_dir / "download"
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
logger.info(f"Writing all content in: {work_dir.resolve()}")
|
||||||
|
Pipeline.from_configs(
|
||||||
|
context=ProcessorConfig(work_dir=str(work_dir.resolve()), tqdm=True, verbose=True),
|
||||||
|
indexer_config=LocalIndexerConfig(
|
||||||
|
input_path=str(docs_path.resolve()) + "/book-war-and-peace-1p.txt"
|
||||||
|
),
|
||||||
|
downloader_config=LocalDownloaderConfig(download_dir=download_path),
|
||||||
|
source_connection_config=LocalConnectionConfig(),
|
||||||
|
partitioner_config=PartitionerConfig(strategy="fast"),
|
||||||
|
chunker_config=ChunkerConfig(chunking_strategy="by_title"),
|
||||||
|
embedder_config=EmbedderConfig(embedding_provider="langchain-huggingface"),
|
||||||
|
destination_connection_config=SingleStoreConnectionConfig(
|
||||||
|
access_config=SingleStoreAccessConfig(password="password"),
|
||||||
|
host="localhost",
|
||||||
|
port=3306,
|
||||||
|
database="ingest_test",
|
||||||
|
user="root",
|
||||||
|
),
|
||||||
|
stager_config=SingleStoreUploadStagerConfig(),
|
||||||
|
uploader_config=SingleStoreUploaderConfig(table_name="elements"),
|
||||||
|
).run()
|
||||||
@ -6,7 +6,7 @@ from typing import TYPE_CHECKING, Any, Optional
|
|||||||
from unstructured import __name__ as integration_name
|
from unstructured import __name__ as integration_name
|
||||||
from unstructured.__version__ import __version__ as integration_version
|
from unstructured.__version__ import __version__ as integration_version
|
||||||
from unstructured.ingest.enhanced_dataclass import enhanced_field
|
from unstructured.ingest.enhanced_dataclass import enhanced_field
|
||||||
from unstructured.ingest.utils.data_prep import chunk_generator
|
from unstructured.ingest.utils.data_prep import batch_generator
|
||||||
from unstructured.ingest.v2.interfaces import (
|
from unstructured.ingest.v2.interfaces import (
|
||||||
AccessConfig,
|
AccessConfig,
|
||||||
ConnectionConfig,
|
ConnectionConfig,
|
||||||
@ -138,7 +138,7 @@ class AstraUploader(Uploader):
|
|||||||
astra_batch_size = self.upload_config.batch_size
|
astra_batch_size = self.upload_config.batch_size
|
||||||
collection = self.get_collection()
|
collection = self.get_collection()
|
||||||
|
|
||||||
for chunk in chunk_generator(elements_dict, astra_batch_size):
|
for chunk in batch_generator(elements_dict, astra_batch_size):
|
||||||
collection.insert_many(chunk)
|
collection.insert_many(chunk)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -9,7 +9,7 @@ from dateutil import parser
|
|||||||
|
|
||||||
from unstructured.ingest.enhanced_dataclass import enhanced_field
|
from unstructured.ingest.enhanced_dataclass import enhanced_field
|
||||||
from unstructured.ingest.error import DestinationConnectionError
|
from unstructured.ingest.error import DestinationConnectionError
|
||||||
from unstructured.ingest.utils.data_prep import chunk_generator
|
from unstructured.ingest.utils.data_prep import batch_generator
|
||||||
from unstructured.ingest.v2.interfaces import (
|
from unstructured.ingest.v2.interfaces import (
|
||||||
AccessConfig,
|
AccessConfig,
|
||||||
ConnectionConfig,
|
ConnectionConfig,
|
||||||
@ -192,7 +192,7 @@ class ChromaUploader(Uploader):
|
|||||||
collection = self.client.get_or_create_collection(
|
collection = self.client.get_or_create_collection(
|
||||||
name=self.connection_config.collection_name
|
name=self.connection_config.collection_name
|
||||||
)
|
)
|
||||||
for chunk in chunk_generator(elements_dict, self.upload_config.batch_size):
|
for chunk in batch_generator(elements_dict, self.upload_config.batch_size):
|
||||||
self.upsert_batch(collection, self.prepare_chroma_list(chunk))
|
self.upsert_batch(collection, self.prepare_chroma_list(chunk))
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -5,7 +5,7 @@ from typing import TYPE_CHECKING, Any, Optional
|
|||||||
|
|
||||||
from unstructured.__version__ import __version__ as unstructured_version
|
from unstructured.__version__ import __version__ as unstructured_version
|
||||||
from unstructured.ingest.enhanced_dataclass import enhanced_field
|
from unstructured.ingest.enhanced_dataclass import enhanced_field
|
||||||
from unstructured.ingest.utils.data_prep import chunk_generator
|
from unstructured.ingest.utils.data_prep import batch_generator
|
||||||
from unstructured.ingest.v2.interfaces import (
|
from unstructured.ingest.v2.interfaces import (
|
||||||
AccessConfig,
|
AccessConfig,
|
||||||
ConnectionConfig,
|
ConnectionConfig,
|
||||||
@ -125,7 +125,7 @@ class MongoDBUploader(Uploader):
|
|||||||
)
|
)
|
||||||
db = self.client[self.connection_config.database]
|
db = self.client[self.connection_config.database]
|
||||||
collection = db[self.connection_config.collection]
|
collection = db[self.connection_config.collection]
|
||||||
for chunk in chunk_generator(elements_dict, self.upload_config.batch_size):
|
for chunk in batch_generator(elements_dict, self.upload_config.batch_size):
|
||||||
collection.insert_many(chunk)
|
collection.insert_many(chunk)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -7,7 +7,7 @@ from typing import TYPE_CHECKING, Any, Optional
|
|||||||
|
|
||||||
from unstructured.ingest.enhanced_dataclass import enhanced_field
|
from unstructured.ingest.enhanced_dataclass import enhanced_field
|
||||||
from unstructured.ingest.error import DestinationConnectionError
|
from unstructured.ingest.error import DestinationConnectionError
|
||||||
from unstructured.ingest.utils.data_prep import chunk_generator
|
from unstructured.ingest.utils.data_prep import batch_generator
|
||||||
from unstructured.ingest.v2.interfaces import (
|
from unstructured.ingest.v2.interfaces import (
|
||||||
AccessConfig,
|
AccessConfig,
|
||||||
ConnectionConfig,
|
ConnectionConfig,
|
||||||
@ -158,15 +158,15 @@ class PineconeUploader(Uploader):
|
|||||||
pinecone_batch_size = self.upload_config.batch_size
|
pinecone_batch_size = self.upload_config.batch_size
|
||||||
|
|
||||||
if self.upload_config.num_of_processes == 1:
|
if self.upload_config.num_of_processes == 1:
|
||||||
for chunk in chunk_generator(elements_dict, pinecone_batch_size):
|
for batch in batch_generator(elements_dict, pinecone_batch_size):
|
||||||
self.upsert_batch(chunk) # noqa: E203
|
self.upsert_batch(batch) # noqa: E203
|
||||||
|
|
||||||
else:
|
else:
|
||||||
with mp.Pool(
|
with mp.Pool(
|
||||||
processes=self.upload_config.num_of_processes,
|
processes=self.upload_config.num_of_processes,
|
||||||
) as pool:
|
) as pool:
|
||||||
pool.map(
|
pool.map(
|
||||||
self.upsert_batch, list(chunk_generator(elements_dict, pinecone_batch_size))
|
self.upsert_batch, list(batch_generator(elements_dict, pinecone_batch_size))
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
164
unstructured/ingest/v2/processes/connectors/singlestore.py
Normal file
164
unstructured/ingest/v2/processes/connectors/singlestore.py
Normal file
@ -0,0 +1,164 @@
|
|||||||
|
import json
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from datetime import date, datetime
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import TYPE_CHECKING, Any, Optional
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
from dateutil import parser
|
||||||
|
|
||||||
|
from unstructured.ingest.enhanced_dataclass import enhanced_field
|
||||||
|
from unstructured.ingest.utils.data_prep import batch_generator
|
||||||
|
from unstructured.ingest.utils.table import convert_to_pandas_dataframe
|
||||||
|
from unstructured.ingest.v2.interfaces import (
|
||||||
|
AccessConfig,
|
||||||
|
ConnectionConfig,
|
||||||
|
FileData,
|
||||||
|
UploadContent,
|
||||||
|
Uploader,
|
||||||
|
UploaderConfig,
|
||||||
|
UploadStager,
|
||||||
|
UploadStagerConfig,
|
||||||
|
)
|
||||||
|
from unstructured.ingest.v2.logger import logger
|
||||||
|
from unstructured.ingest.v2.processes.connector_registry import (
|
||||||
|
DestinationRegistryEntry,
|
||||||
|
add_destination_entry,
|
||||||
|
)
|
||||||
|
from unstructured.utils import requires_dependencies
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from singlestoredb.connection import Connection
|
||||||
|
|
||||||
|
CONNECTOR_TYPE = "singlestore"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SingleStoreAccessConfig(AccessConfig):
|
||||||
|
password: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SingleStoreConnectionConfig(ConnectionConfig):
|
||||||
|
host: Optional[str] = None
|
||||||
|
port: Optional[int] = None
|
||||||
|
user: Optional[str] = None
|
||||||
|
database: Optional[str] = None
|
||||||
|
access_config: SingleStoreAccessConfig = enhanced_field(sensitive=True)
|
||||||
|
|
||||||
|
@requires_dependencies(["singlestoredb"], extras="singlestore")
|
||||||
|
def get_connection(self) -> "Connection":
|
||||||
|
import singlestoredb as s2
|
||||||
|
|
||||||
|
conn = s2.connect(
|
||||||
|
host=self.host,
|
||||||
|
port=self.port,
|
||||||
|
database=self.database,
|
||||||
|
user=self.user,
|
||||||
|
password=self.access_config.password,
|
||||||
|
)
|
||||||
|
return conn
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SingleStoreUploadStagerConfig(UploadStagerConfig):
|
||||||
|
drop_empty_cols: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SingleStoreUploadStager(UploadStager):
|
||||||
|
upload_stager_config: SingleStoreUploadStagerConfig
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def parse_date_string(date_string: str) -> date:
|
||||||
|
try:
|
||||||
|
timestamp = float(date_string)
|
||||||
|
return datetime.fromtimestamp(timestamp)
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"date {date_string} string not a timestamp: {e}")
|
||||||
|
return parser.parse(date_string)
|
||||||
|
|
||||||
|
def run(
|
||||||
|
self,
|
||||||
|
elements_filepath: Path,
|
||||||
|
file_data: FileData,
|
||||||
|
output_dir: Path,
|
||||||
|
output_filename: str,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> Path:
|
||||||
|
with open(elements_filepath) as elements_file:
|
||||||
|
elements_contents = json.load(elements_file)
|
||||||
|
output_path = Path(output_dir) / Path(f"{output_filename}.csv")
|
||||||
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
df = convert_to_pandas_dataframe(
|
||||||
|
elements_dict=elements_contents,
|
||||||
|
drop_empty_cols=self.upload_stager_config.drop_empty_cols,
|
||||||
|
)
|
||||||
|
datetime_columns = [
|
||||||
|
"data_source_date_created",
|
||||||
|
"data_source_date_modified",
|
||||||
|
"data_source_date_processed",
|
||||||
|
]
|
||||||
|
for column in filter(lambda x: x in df.columns, datetime_columns):
|
||||||
|
df[column] = df[column].apply(self.parse_date_string)
|
||||||
|
if "data_source_record_locator" in df.columns:
|
||||||
|
df["data_source_record_locator"] = df["data_source_record_locator"].apply(
|
||||||
|
lambda x: json.dumps(x) if x else None
|
||||||
|
)
|
||||||
|
|
||||||
|
with output_path.open("w") as output_file:
|
||||||
|
df.to_csv(output_file, index=False)
|
||||||
|
return output_path
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SingleStoreUploaderConfig(UploaderConfig):
|
||||||
|
table_name: str
|
||||||
|
batch_size: int = 100
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SingleStoreUploader(Uploader):
|
||||||
|
connection_config: SingleStoreConnectionConfig
|
||||||
|
upload_config: SingleStoreUploaderConfig
|
||||||
|
connector_type: str = CONNECTOR_TYPE
|
||||||
|
|
||||||
|
def upload_csv(self, content: UploadContent) -> None:
|
||||||
|
df = pd.read_csv(content.path)
|
||||||
|
logger.debug(
|
||||||
|
f"uploading {len(df)} entries to {self.connection_config.database} "
|
||||||
|
f"db in table {self.upload_config.table_name}"
|
||||||
|
)
|
||||||
|
stmt = "INSERT INTO {} ({}) VALUES ({})".format(
|
||||||
|
self.upload_config.table_name,
|
||||||
|
", ".join(df.columns),
|
||||||
|
", ".join(["%s"] * len(df.columns)),
|
||||||
|
)
|
||||||
|
logger.debug(f"sql statement: {stmt}")
|
||||||
|
df.replace({np.nan: None}, inplace=True)
|
||||||
|
data_as_tuples = list(df.itertuples(index=False, name=None))
|
||||||
|
with self.connection_config.get_connection() as conn:
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
for chunk in batch_generator(
|
||||||
|
data_as_tuples, batch_size=self.upload_config.batch_size
|
||||||
|
):
|
||||||
|
cur.executemany(stmt, chunk)
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
def run(self, contents: list[UploadContent], **kwargs: Any) -> None:
|
||||||
|
for content in contents:
|
||||||
|
self.upload_csv(content=content)
|
||||||
|
|
||||||
|
|
||||||
|
add_destination_entry(
|
||||||
|
destination_type=CONNECTOR_TYPE,
|
||||||
|
entry=DestinationRegistryEntry(
|
||||||
|
connection_config=SingleStoreConnectionConfig,
|
||||||
|
uploader=SingleStoreUploader,
|
||||||
|
uploader_config=SingleStoreUploaderConfig,
|
||||||
|
upload_stager=SingleStoreUploadStager,
|
||||||
|
upload_stager_config=SingleStoreUploadStagerConfig,
|
||||||
|
),
|
||||||
|
)
|
||||||
@ -21,6 +21,7 @@ from unstructured.ingest.v2.logger import logger
|
|||||||
from unstructured.ingest.v2.processes.connector_registry import (
|
from unstructured.ingest.v2.processes.connector_registry import (
|
||||||
DestinationRegistryEntry,
|
DestinationRegistryEntry,
|
||||||
)
|
)
|
||||||
|
from unstructured.utils import requires_dependencies
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from weaviate import Client
|
from weaviate import Client
|
||||||
@ -153,17 +154,19 @@ class WeaviateUploaderConfig(UploaderConfig):
|
|||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class WeaviateUploader(Uploader):
|
class WeaviateUploader(Uploader):
|
||||||
connector_type: str = CONNECTOR_TYPE
|
|
||||||
upload_config: WeaviateUploaderConfig
|
upload_config: WeaviateUploaderConfig
|
||||||
connection_config: WeaviateConnectionConfig
|
connection_config: WeaviateConnectionConfig
|
||||||
client: Optional["Client"] = field(init=False)
|
client: Optional["Client"] = field(init=False)
|
||||||
|
connector_type: str = CONNECTOR_TYPE
|
||||||
|
|
||||||
|
@requires_dependencies(["weaviate"], extras="weaviate")
|
||||||
def __post_init__(self):
|
def __post_init__(self):
|
||||||
from weaviate import Client
|
from weaviate import Client
|
||||||
|
|
||||||
auth = self._resolve_auth_method()
|
auth = self._resolve_auth_method()
|
||||||
self.client = Client(url=self.connection_config.host_url, auth_client_secret=auth)
|
self.client = Client(url=self.connection_config.host_url, auth_client_secret=auth)
|
||||||
|
|
||||||
|
@requires_dependencies(["weaviate"], extras="weaviate")
|
||||||
def _resolve_auth_method(self):
|
def _resolve_auth_method(self):
|
||||||
access_configs = self.connection_config.access_config
|
access_configs = self.connection_config.access_config
|
||||||
connection_config = self.connection_config
|
connection_config = self.connection_config
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user