mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-08-15 04:08:49 +00:00

- Adds a destination connector to upload processed output into a PostgreSQL/Sqlite database instance. - Users are responsible to provide their instances. This PR includes a couple of configuration examples. - Defines the scripts required to setup a PostgreSQL instance with the unstructured elements schema. - Validates postgres/pgvector embedding storage and retrieval --------- Co-authored-by: potter-potter <david.potter@gmail.com>
67 lines
1.7 KiB
Python
67 lines
1.7 KiB
Python
import typing as t
|
|
from dataclasses import dataclass
|
|
|
|
import click
|
|
|
|
from unstructured.ingest.cli.interfaces import CliConfig
|
|
from unstructured.ingest.connector.sql import SimpleSqlConfig
|
|
from unstructured.ingest.interfaces import WriteConfig
|
|
|
|
SQL_DRIVERS = {"postgresql", "sqlite"}
|
|
|
|
|
|
@dataclass
|
|
class SqlCliConfig(SimpleSqlConfig, CliConfig):
|
|
@staticmethod
|
|
def get_cli_options() -> t.List[click.Option]:
|
|
options = [
|
|
click.Option(
|
|
["--db-type"],
|
|
required=True,
|
|
type=click.Choice(SQL_DRIVERS),
|
|
help="Type of the database backend",
|
|
),
|
|
click.Option(
|
|
["--username"],
|
|
default=None,
|
|
type=str,
|
|
help="DB username",
|
|
),
|
|
click.Option(
|
|
["--password"],
|
|
default=None,
|
|
type=str,
|
|
help="DB password",
|
|
),
|
|
click.Option(
|
|
["--host"],
|
|
default=None,
|
|
type=str,
|
|
help="DB host",
|
|
),
|
|
click.Option(
|
|
["--port"],
|
|
default=None,
|
|
type=int,
|
|
help="DB host connection port",
|
|
),
|
|
click.Option(
|
|
["--database"],
|
|
default=None,
|
|
type=str,
|
|
help="Database name. For sqlite databases, this is the path to the .db file.",
|
|
),
|
|
]
|
|
return options
|
|
|
|
|
|
def get_base_dest_cmd():
|
|
from unstructured.ingest.cli.base.dest import BaseDestCmd
|
|
|
|
cmd_cls = BaseDestCmd(
|
|
cmd_name="sql",
|
|
cli_config=SqlCliConfig,
|
|
write_config=WriteConfig,
|
|
)
|
|
return cmd_cls
|