chore: rename astra to astradb (#3458)

DataStax wanted all references to be astradb instead of astra. As per
@erichare

We'll also have to do the same in unstructured-ingest :)
This commit is contained in:
David Potter 2024-08-05 20:41:02 +00:00 committed by GitHub
parent 7e887442c4
commit 59ec64235b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
31 changed files with 123 additions and 112 deletions

View File

@ -1,3 +1,14 @@
## 0.15.2-dev0
### Enhancements
### Features
### Fixes
* **Renames Astra to Astra DB** Conforms with DataStax internal naming conventions.
## 0.15.1
### Enhancements

View File

@ -17,7 +17,7 @@ include requirements/huggingface.in
# Ingest extras
include requirements/ingest/airtable.in
include requirements/ingest/astra.in
include requirements/ingest/astradb.in
include requirements/ingest/azure-cognitive-search.in
include requirements/ingest/azure.in
include requirements/ingest/biomed.in

View File

@ -253,9 +253,9 @@ install-ingest-mongodb:
install-ingest-databricks-volumes:
python3 -m pip install -r requirements/ingest/databricks-volumes.txt
.PHONY: install-ingest-astra
install-ingest-astra:
python3 -m pip install -r requirements/ingest/astra.txt
.PHONY: install-ingest-astradb
install-ingest-astradb:
python3 -m pip install -r requirements/ingest/astradb.txt
.PHONY: install-ingest-clarifai
install-ingest-clarifai:

View File

@ -2,7 +2,7 @@
# This file is autogenerated by pip-compile with Python 3.9
# by the following command:
#
# pip-compile ./ingest/astra.in
# pip-compile ./ingest/astradb.in
#
anyio==3.7.1
# via
@ -10,7 +10,7 @@ anyio==3.7.1
# -c ./ingest/../deps/constraints.txt
# httpx
astrapy==1.4.0
# via -r ./ingest/astra.in
# via -r ./ingest/astradb.in
bson==0.5.10
# via astrapy
cassandra-driver==3.29.1

View File

@ -129,7 +129,7 @@ setup(
"xlsx": xlsx_reqs,
# Extra requirements for data connectors
"airtable": load_requirements("requirements/ingest/airtable.in"),
"astra": load_requirements("requirements/ingest/astra.in"),
"astradb": load_requirements("requirements/ingest/astradb.in"),
"azure": load_requirements("requirements/ingest/azure.in"),
"azure-cognitive-search": load_requirements(
"requirements/ingest/azure-cognitive-search.in",

View File

@ -5,7 +5,7 @@ set -e
SRC_PATH=$(dirname "$(realpath "$0")")
SCRIPT_DIR=$(dirname "$SRC_PATH")
cd "$SCRIPT_DIR"/.. || exit 1
OUTPUT_FOLDER_NAME=astra-dest
OUTPUT_FOLDER_NAME=astradb-dest
OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME
WORK_DIR=$SCRIPT_DIR/workdir/$OUTPUT_FOLDER_NAME
max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")}
@ -21,7 +21,7 @@ if [ -z "$ASTRA_DB_API_ENDPOINT" ]; then
fi
RANDOM_SUFFIX=$((RANDOM % 100000 + 1))
COLLECTION_NAME="astra_test_output_$RANDOM_SUFFIX"
COLLECTION_NAME="astradb_test_output_$RANDOM_SUFFIX"
EMBEDDING_DIMENSION=384
# shellcheck disable=SC1091
@ -31,7 +31,7 @@ function cleanup() {
cleanup_dir "$OUTPUT_DIR"
cleanup_dir "$WORK_DIR"
python "$SCRIPT_DIR"/python/test-ingest-astra-output.py \
python "$SCRIPT_DIR"/python/test-ingest-astradb-output.py \
--token "$ASTRA_DB_APPLICATION_TOKEN" \
--api-endpoint "$ASTRA_DB_API_ENDPOINT" \
--collection-name "$COLLECTION_NAME" down
@ -51,14 +51,14 @@ PYTHONPATH=. ./unstructured/ingest/main.py \
--chunk-max-characters 1500 \
--chunk-multipage-sections \
--embedding-provider "langchain-huggingface" \
astra \
astradb \
--token "$ASTRA_DB_APPLICATION_TOKEN" \
--api-endpoint "$ASTRA_DB_API_ENDPOINT" \
--collection-name "$COLLECTION_NAME" \
--embedding-dimension "$EMBEDDING_DIMENSION" \
--requested-indexing-policy '{"deny": ["metadata"]}'
python "$SCRIPT_DIR"/python/test-ingest-astra-output.py \
python "$SCRIPT_DIR"/python/test-ingest-astradb-output.py \
--token "$ASTRA_DB_APPLICATION_TOKEN" \
--api-endpoint "$ASTRA_DB_API_ENDPOINT" \
--collection-name "$COLLECTION_NAME" check

View File

@ -36,7 +36,7 @@ trap cleanup EXIT
# NOTE(robinson) - per pymongo docs, pymongo ships with its own version of the bson library,
# which is incompatible with the bson installed from pypi. bson is installed as part of the
# astra dependencies.
# astradb dependencies.
# ref: https://pymongo.readthedocs.io/en/stable/installation.html
pip uninstall -y bson pymongo
make install-ingest-mongodb

View File

@ -10,7 +10,7 @@ def get_client(token, api_endpoint, collection_name) -> AstraDB:
return astra_db, astra_db_collection
@click.group(name="astra-ingest")
@click.group(name="astradb-ingest")
@click.option("--token", type=str)
@click.option("--api-endpoint", type=str)
@click.option("--collection-name", type=str, default="collection_test")

View File

@ -5,7 +5,7 @@ set -e
SRC_PATH=$(dirname "$(realpath "$0")")
SCRIPT_DIR=$(dirname "$SRC_PATH")
cd "$SCRIPT_DIR"/.. || exit 1
OUTPUT_FOLDER_NAME=astra
OUTPUT_FOLDER_NAME=astradb
OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME
WORK_DIR=$SCRIPT_DIR/workdir/$OUTPUT_FOLDER_NAME
DOWNLOAD_DIR=$SCRIPT_DIR/download/$OUTPUT_FOLDER_NAME
@ -23,7 +23,7 @@ fi
COLLECTION_NAME="ingest_test_src"
PYTHONPATH=. ./unstructured/ingest/main.py \
astra \
astradb \
--token "$ASTRA_DB_APPLICATION_TOKEN" \
--api-endpoint "$ASTRA_DB_API_ENDPOINT" \
--collection-name "$COLLECTION_NAME" \

View File

@ -22,7 +22,7 @@ fi
# NOTE(robinson) - per pymongo docs, pymongo ships with its own version of the bson library,
# which is incompatible with the bson installed from pypi. bson is installed as part of the
# astra dependencies.
# astradb dependencies.
# ref: https://pymongo.readthedocs.io/en/stable/installation.html
pip uninstall -y bson pymongo
make install-ingest-mongodb

View File

@ -15,7 +15,7 @@ cd "$SCRIPT_DIR"/.. || exit 1
export OMP_THREAD_LIMIT=1
all_tests=(
'astra.sh'
'astradb.sh'
'azure.sh'
'azure-cognitive-search.sh'
'box.sh'

View File

@ -19,7 +19,7 @@ export OMP_THREAD_LIMIT=1
all_tests=(
's3.sh'
's3-minio.sh'
'astra.sh'
'astradb.sh'
'azure.sh'
'biomed-api.sh'
'biomed-path.sh'

View File

@ -1 +1 @@
__version__ = "0.15.1" # pragma: no cover
__version__ = "0.15.2-dev0" # pragma: no cover

View File

@ -7,8 +7,8 @@ from unstructured.ingest.cli.base.src import BaseSrcCmd
from unstructured.ingest.cli.cmds.fsspec.sftp import get_base_src_cmd as sftp_base_src_cmd
from .airtable import get_base_src_cmd as airtable_base_src_cmd
from .astra import get_base_dest_cmd as astra_base_dest_cmd
from .astra import get_base_src_cmd as astra_base_src_cmd
from .astradb import get_base_dest_cmd as astradb_base_dest_cmd
from .astradb import get_base_src_cmd as astradb_base_src_cmd
from .azure_cognitive_search import get_base_dest_cmd as azure_cognitive_search_base_dest_cmd
from .biomed import get_base_src_cmd as biomed_base_src_cmd
from .chroma import get_base_dest_cmd as chroma_base_dest_cmd
@ -63,7 +63,7 @@ if t.TYPE_CHECKING:
base_src_cmd_fns: t.List[t.Callable[[], BaseSrcCmd]] = [
airtable_base_src_cmd,
astra_base_src_cmd,
astradb_base_src_cmd,
azure_base_src_cmd,
biomed_base_src_cmd,
box_base_src_cmd,
@ -106,7 +106,7 @@ if src_duplicates:
)
base_dest_cmd_fns: t.List[t.Callable[[], "BaseDestCmd"]] = [
astra_base_dest_cmd,
astradb_base_dest_cmd,
azure_base_dest_cmd,
box_base_dest_cmd,
chroma_base_dest_cmd,

View File

@ -4,11 +4,11 @@ from dataclasses import dataclass
import click
from unstructured.ingest.cli.interfaces import CliConfig, Dict
from unstructured.ingest.connector.astra import AstraWriteConfig, SimpleAstraConfig
from unstructured.ingest.connector.astradb import AstraDBWriteConfig, SimpleAstraDBConfig
@dataclass
class AstraCliConfig(SimpleAstraConfig, CliConfig):
class AstraDBCliConfig(SimpleAstraDBConfig, CliConfig):
@staticmethod
def get_cli_options() -> t.List[click.Option]:
options = [
@ -48,7 +48,7 @@ class AstraCliConfig(SimpleAstraConfig, CliConfig):
@dataclass
class AstraCliWriteConfig(AstraWriteConfig, CliConfig):
class AstraDBCliWriteConfig(AstraDBWriteConfig, CliConfig):
@staticmethod
def get_cli_options() -> t.List[click.Option]:
options = [
@ -81,8 +81,8 @@ def get_base_src_cmd():
from unstructured.ingest.cli.base.src import BaseSrcCmd
cmd_cls = BaseSrcCmd(
cmd_name="astra",
cli_config=AstraCliConfig,
cmd_name="astradb",
cli_config=AstraDBCliConfig,
)
return cmd_cls
@ -91,9 +91,9 @@ def get_base_dest_cmd():
from unstructured.ingest.cli.base.dest import BaseDestCmd
cmd_cls = BaseDestCmd(
cmd_name="astra",
cli_config=AstraCliConfig,
additional_cli_options=[AstraCliWriteConfig],
write_config=AstraWriteConfig,
cmd_name="astradb",
cli_config=AstraDBCliConfig,
additional_cli_options=[AstraDBCliWriteConfig],
write_config=AstraDBWriteConfig,
)
return cmd_cls

View File

@ -31,23 +31,23 @@ NON_INDEXED_FIELDS = ["metadata._node_content", "content"]
@dataclass
class AstraAccessConfig(AccessConfig):
class AstraDBAccessConfig(AccessConfig):
token: str = enhanced_field(sensitive=True)
api_endpoint: str = enhanced_field(sensitive=True)
@dataclass
class SimpleAstraConfig(BaseConnectorConfig):
access_config: AstraAccessConfig
class SimpleAstraDBConfig(BaseConnectorConfig):
access_config: AstraDBAccessConfig
collection_name: str
namespace: t.Optional[str] = None
@dataclass
class AstraIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc):
connector_config: SimpleAstraConfig
class AstraDBIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc):
connector_config: SimpleAstraDBConfig
metadata: t.Dict[str, str] = field(default_factory=dict)
registry_name: str = "astra"
registry_name: str = "astradb"
@property
def filename(self):
@ -76,7 +76,7 @@ class AstraIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc):
)
@SourceConnectionError.wrap
@requires_dependencies(["astrapy"], extras="astra")
@requires_dependencies(["astrapy"], extras="astradb")
@BaseSingleIngestDoc.skip_if_file_exists
def get_file(self):
self.filename.parent.mkdir(parents=True, exist_ok=True)
@ -90,19 +90,19 @@ class AstraIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc):
@dataclass
class AstraSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector):
connector_config: SimpleAstraConfig
class AstraDBSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector):
connector_config: SimpleAstraDBConfig
_astra_db: t.Optional["AstraDB"] = field(init=False, default=None)
_astra_db_collection: t.Optional["AstraDBCollection"] = field(init=False, default=None)
@property
@requires_dependencies(["astrapy"], extras="astra")
@requires_dependencies(["astrapy"], extras="astradb")
def astra_db_collection(self) -> "AstraDBCollection":
if self._astra_db_collection is None:
from astrapy.db import AstraDB
# Build the Astra DB object.
# caller_name/version for AstraDB tracking
# caller_name/version for Astra DB tracking
self._astra_db = AstraDB(
api_endpoint=self.connector_config.access_config.api_endpoint,
token=self.connector_config.access_config.token,
@ -117,12 +117,12 @@ class AstraSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector):
)
return self._astra_db_collection # type: ignore
@requires_dependencies(["astrapy"], extras="astra")
@requires_dependencies(["astrapy"], extras="astradb")
@SourceConnectionError.wrap # type: ignore
def initialize(self):
_ = self.astra_db_collection
@requires_dependencies(["astrapy"], extras="astra")
@requires_dependencies(["astrapy"], extras="astradb")
def check_connection(self):
try:
_ = self.astra_db_collection
@ -130,14 +130,14 @@ class AstraSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector):
logger.error(f"Failed to validate connection {e}", exc_info=True)
raise SourceConnectionError(f"failed to validate connection: {e}")
@requires_dependencies(["astrapy"], extras="astra")
@requires_dependencies(["astrapy"], extras="astradb")
def get_ingest_docs(self): # type: ignore
# Perform the find operation
astra_docs = list(self.astra_db_collection.paginated_find())
doc_list = []
for record in astra_docs:
doc = AstraIngestDoc(
doc = AstraDBIngestDoc(
connector_config=self.connector_config,
processor_config=self.processor_config,
read_config=self.read_config,
@ -152,16 +152,16 @@ class AstraSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector):
@dataclass
class AstraWriteConfig(WriteConfig):
class AstraDBWriteConfig(WriteConfig):
embedding_dimension: int
requested_indexing_policy: t.Optional[t.Dict[str, t.Any]] = None
batch_size: int = 20
@dataclass
class AstraDestinationConnector(BaseDestinationConnector):
write_config: AstraWriteConfig
connector_config: SimpleAstraConfig
class AstraDBDestinationConnector(BaseDestinationConnector):
write_config: AstraDBWriteConfig
connector_config: SimpleAstraDBConfig
_astra_db: t.Optional["AstraDB"] = field(init=False, default=None)
_astra_db_collection: t.Optional["AstraDBCollection"] = field(init=False, default=None)
@ -180,7 +180,7 @@ class AstraDestinationConnector(BaseDestinationConnector):
return _asdict(self_cp, **kwargs)
@property
@requires_dependencies(["astrapy"], extras="astra")
@requires_dependencies(["astrapy"], extras="astradb")
def astra_db_collection(self) -> "AstraDBCollection":
if self._astra_db_collection is None:
from astrapy.db import AstraDB
@ -188,11 +188,11 @@ class AstraDestinationConnector(BaseDestinationConnector):
collection_name = self.connector_config.collection_name
embedding_dimension = self.write_config.embedding_dimension
# If the user has requested an indexing policy, pass it to the AstraDB
# If the user has requested an indexing policy, pass it to the Astra DB
requested_indexing_policy = self.write_config.requested_indexing_policy
options = {"indexing": requested_indexing_policy} if requested_indexing_policy else None
# caller_name/version for AstraDB tracking
# caller_name/version for Astra DB tracking
self._astra_db = AstraDB(
api_endpoint=self.connector_config.access_config.api_endpoint,
token=self.connector_config.access_config.token,
@ -209,12 +209,12 @@ class AstraDestinationConnector(BaseDestinationConnector):
)
return self._astra_db_collection
@requires_dependencies(["astrapy"], extras="astra")
@requires_dependencies(["astrapy"], extras="astradb")
@DestinationConnectionError.wrap
def initialize(self):
_ = self.astra_db_collection
@requires_dependencies(["astrapy"], extras="astra")
@requires_dependencies(["astrapy"], extras="astradb")
def check_connection(self):
try:
_ = self.astra_db_collection
@ -223,7 +223,7 @@ class AstraDestinationConnector(BaseDestinationConnector):
raise DestinationConnectionError(f"failed to validate connection: {e}")
def write_dict(self, *args, elements_dict: t.List[t.Dict[str, t.Any]], **kwargs) -> None:
logger.info(f"Inserting / updating {len(elements_dict)} documents to Astra.")
logger.info(f"Inserting / updating {len(elements_dict)} documents to Astra DB.")
astra_batch_size = self.write_config.batch_size

View File

@ -2,7 +2,7 @@ import json
from typing import Dict, Type, cast
from unstructured.ingest.connector.airtable import AirtableIngestDoc
from unstructured.ingest.connector.astra import AstraIngestDoc
from unstructured.ingest.connector.astradb import AstraDBIngestDoc
from unstructured.ingest.connector.biomed import BiomedIngestDoc
from unstructured.ingest.connector.confluence import ConfluenceIngestDoc
from unstructured.ingest.connector.delta_table import DeltaTableIngestDoc
@ -46,7 +46,7 @@ from unstructured.ingest.interfaces import BaseIngestDoc
INGEST_DOC_NAME_TO_CLASS: Dict[str, Type[EnhancedDataClassJsonMixin]] = {
"airtable": AirtableIngestDoc,
"astra": AstraIngestDoc,
"astradb": AstraDBIngestDoc,
"azure": AzureBlobStorageIngestDoc,
"biomed": BiomedIngestDoc,
"box": BoxIngestDoc,

View File

@ -2,7 +2,7 @@ import typing as t
from typing import Type
from .airtable import AirtableRunner
from .astra import AstraRunner
from .astradb import AstraDBRunner
from .base_runner import Runner
from .biomed import BiomedRunner
from .confluence import ConfluenceRunner
@ -36,7 +36,7 @@ from .wikipedia import WikipediaRunner
runner_map: t.Dict[str, Type[Runner]] = {
"airtable": AirtableRunner,
"astra": AstraRunner,
"astradb": AstraDBRunner,
"azure": AzureRunner,
"biomed": BiomedRunner,
"box": BoxRunner,

View File

@ -8,27 +8,27 @@ from unstructured.ingest.runner.base_runner import Runner
from unstructured.ingest.runner.utils import update_download_dir_hash
if t.TYPE_CHECKING:
from unstructured.ingest.connector.astra import SimpleAstraConfig
from unstructured.ingest.connector.astradb import SimpleAstraDBConfig
@dataclass
class AstraRunner(Runner):
connector_config: "SimpleAstraConfig"
class AstraDBRunner(Runner):
connector_config: "SimpleAstraDBConfig"
def update_read_config(self):
hashed_dir_name = hashlib.sha256(
str(self.connector_config.access_config.api_endpoint).encode("utf-8"),
)
self.read_config.download_dir = update_download_dir_hash(
connector_name="astra",
connector_name="astradb",
read_config=self.read_config,
hashed_dir_name=hashed_dir_name,
logger=logger,
)
def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]:
from unstructured.ingest.connector.astra import (
AstraSourceConnector,
from unstructured.ingest.connector.astradb import (
AstraDBSourceConnector,
)
return AstraSourceConnector
return AstraDBSourceConnector

View File

@ -1,6 +1,6 @@
import typing as t
from .astra import AstraWriter
from .astradb import AstraDBWriter
from .azure_cognitive_search import AzureCognitiveSearchWriter
from .base_writer import Writer
from .chroma import ChromaWriter
@ -23,7 +23,7 @@ from .vectara import VectaraWriter
from .weaviate import WeaviateWriter
writer_map: t.Dict[str, t.Type[Writer]] = {
"astra": AstraWriter,
"astradb": AstraDBWriter,
"azure": AzureWriter,
"azure_cognitive_search": AzureCognitiveSearchWriter,
"box": BoxWriter,

View File

@ -6,17 +6,17 @@ from unstructured.ingest.interfaces import BaseDestinationConnector
from unstructured.ingest.runner.writers.base_writer import Writer
if t.TYPE_CHECKING:
from unstructured.ingest.connector.astra import AstraWriteConfig, SimpleAstraConfig
from unstructured.ingest.connector.astradb import AstraDBWriteConfig, SimpleAstraDBConfig
@dataclass
class AstraWriter(Writer, EnhancedDataClassJsonMixin):
write_config: "AstraWriteConfig"
connector_config: "SimpleAstraConfig"
class AstraDBWriter(Writer, EnhancedDataClassJsonMixin):
write_config: "AstraDBWriteConfig"
connector_config: "SimpleAstraDBConfig"
def get_connector_cls(self) -> t.Type[BaseDestinationConnector]:
from unstructured.ingest.connector.astra import (
AstraDestinationConnector,
from unstructured.ingest.connector.astradb import (
AstraDBDestinationConnector,
)
return AstraDestinationConnector
return AstraDBDestinationConnector

View File

@ -2,7 +2,7 @@ from collections import Counter
import click
from .astra import astra_dest_cmd
from .astradb import astradb_dest_cmd
from .azure_cognitive_search import azure_cognitive_search_dest_cmd
from .chroma import chroma_dest_cmd
from .databricks_volumes import databricks_volumes_dest_cmd
@ -51,7 +51,7 @@ if duplicate_src_names:
)
dest_cmds = [
astra_dest_cmd,
astradb_dest_cmd,
azure_cognitive_search_dest_cmd,
azure_dest_cmd,
box_dest_cmd,

View File

@ -5,11 +5,11 @@ import click
from unstructured.ingest.v2.cli.base import DestCmd
from unstructured.ingest.v2.cli.interfaces import CliConfig
from unstructured.ingest.v2.cli.utils import Dict
from unstructured.ingest.v2.processes.connectors.astra import CONNECTOR_TYPE
from unstructured.ingest.v2.processes.connectors.astradb import CONNECTOR_TYPE
@dataclass
class AstraCliConnectionConfig(CliConfig):
class AstraDBCliConnectionConfig(CliConfig):
@staticmethod
def get_cli_options() -> list[click.Option]:
options = [
@ -18,7 +18,7 @@ class AstraCliConnectionConfig(CliConfig):
required=True,
type=str,
help="Astra DB Token with access to the database.",
envvar="ASTRA_DB_TOKEN",
envvar="ASTRA_DB_APPLICATION_TOKEN",
show_envvar=True,
),
click.Option(
@ -26,7 +26,7 @@ class AstraCliConnectionConfig(CliConfig):
required=True,
type=str,
help="The API endpoint for the Astra DB.",
envvar="ASTRA_DB_ENDPOINT",
envvar="ASTRA_DB_API_ENDPOINT",
show_envvar=True,
),
]
@ -34,7 +34,7 @@ class AstraCliConnectionConfig(CliConfig):
@dataclass
class AstraCliUploaderConfig(CliConfig):
class AstraDBCliUploaderConfig(CliConfig):
@staticmethod
def get_cli_options() -> list[click.Option]:
options = [
@ -78,8 +78,8 @@ class AstraCliUploaderConfig(CliConfig):
return options
astra_dest_cmd = DestCmd(
astradb_dest_cmd = DestCmd(
cmd_name=CONNECTOR_TYPE,
connection_config=AstraCliConnectionConfig,
uploader_config=AstraCliUploaderConfig,
connection_config=AstraDBCliConnectionConfig,
uploader_config=AstraDBCliUploaderConfig,
)

View File

@ -6,8 +6,8 @@ from unstructured.ingest.v2.processes.connector_registry import (
add_source_entry,
)
from .astra import CONNECTOR_TYPE as ASTRA_CONNECTOR_TYPE
from .astra import astra_destination_entry
from .astradb import CONNECTOR_TYPE as ASTRADB_CONNECTOR_TYPE
from .astradb import astradb_destination_entry
from .chroma import CONNECTOR_TYPE as CHROMA_CONNECTOR_TYPE
from .chroma import chroma_destination_entry
from .databricks_volumes import CONNECTOR_TYPE as DATABRICKS_VOLUMES_CONNECTOR_TYPE
@ -37,7 +37,7 @@ from .sql import sql_destination_entry
from .weaviate import CONNECTOR_TYPE as WEAVIATE_CONNECTOR_TYPE
from .weaviate import weaviate_destination_entry
add_destination_entry(destination_type=ASTRA_CONNECTOR_TYPE, entry=astra_destination_entry)
add_destination_entry(destination_type=ASTRADB_CONNECTOR_TYPE, entry=astradb_destination_entry)
add_destination_entry(destination_type=CHROMA_CONNECTOR_TYPE, entry=chroma_destination_entry)

View File

@ -26,30 +26,30 @@ from unstructured.utils import requires_dependencies
if TYPE_CHECKING:
from astrapy.db import AstraDBCollection
CONNECTOR_TYPE = "astra"
CONNECTOR_TYPE = "astradb"
@dataclass
class AstraAccessConfig(AccessConfig):
class AstraDBAccessConfig(AccessConfig):
token: str
api_endpoint: str
@dataclass
class AstraConnectionConfig(ConnectionConfig):
class AstraDBConnectionConfig(ConnectionConfig):
connection_type: str = CONNECTOR_TYPE
access_config: AstraAccessConfig = enhanced_field(sensitive=True)
access_config: AstraDBAccessConfig = enhanced_field(sensitive=True)
@dataclass
class AstraUploadStagerConfig(UploadStagerConfig):
class AstraDBUploadStagerConfig(UploadStagerConfig):
pass
@dataclass
class AstraUploadStager(UploadStager):
upload_stager_config: AstraUploadStagerConfig = field(
default_factory=lambda: AstraUploadStagerConfig()
class AstraDBUploadStager(UploadStager):
upload_stager_config: AstraDBUploadStagerConfig = field(
default_factory=lambda: AstraDBUploadStagerConfig()
)
def conform_dict(self, element_dict: dict) -> dict:
@ -79,7 +79,7 @@ class AstraUploadStager(UploadStager):
@dataclass
class AstraUploaderConfig(UploaderConfig):
class AstraDBUploaderConfig(UploaderConfig):
collection_name: str
embedding_dimension: int
namespace: Optional[str] = None
@ -88,12 +88,12 @@ class AstraUploaderConfig(UploaderConfig):
@dataclass
class AstraUploader(Uploader):
connection_config: AstraConnectionConfig
upload_config: AstraUploaderConfig
class AstraDBUploader(Uploader):
connection_config: AstraDBConnectionConfig
upload_config: AstraDBUploaderConfig
connector_type: str = CONNECTOR_TYPE
@requires_dependencies(["astrapy"], extras="astra")
@requires_dependencies(["astrapy"], extras="astradb")
def get_collection(self) -> "AstraDBCollection":
from astrapy.db import AstraDB
@ -102,11 +102,11 @@ class AstraUploader(Uploader):
embedding_dimension = self.upload_config.embedding_dimension
requested_indexing_policy = self.upload_config.requested_indexing_policy
# If the user has requested an indexing policy, pass it to the AstraDB
# If the user has requested an indexing policy, pass it to the Astra DB
options = {"indexing": requested_indexing_policy} if requested_indexing_policy else None
# Build the Astra DB object.
# caller_name/version for AstraDB tracking
# caller_name/version for Astra DB tracking
astra_db = AstraDB(
api_endpoint=self.connection_config.access_config.api_endpoint,
token=self.connection_config.access_config.token,
@ -142,10 +142,10 @@ class AstraUploader(Uploader):
collection.insert_many(chunk)
astra_destination_entry = DestinationRegistryEntry(
connection_config=AstraConnectionConfig,
upload_stager_config=AstraUploadStagerConfig,
upload_stager=AstraUploadStager,
uploader_config=AstraUploaderConfig,
uploader=AstraUploader,
astradb_destination_entry = DestinationRegistryEntry(
connection_config=AstraDBConnectionConfig,
upload_stager_config=AstraDBUploadStagerConfig,
upload_stager=AstraDBUploadStager,
uploader_config=AstraDBUploaderConfig,
uploader=AstraDBUploader,
)