mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-06-27 02:30:08 +00:00

- Adds a destination connector to upload processed output into a PostgreSQL/Sqlite database instance. - Users are responsible to provide their instances. This PR includes a couple of configuration examples. - Defines the scripts required to setup a PostgreSQL instance with the unstructured elements schema. - Validates postgres/pgvector embedding storage and retrieval --------- Co-authored-by: potter-potter <david.potter@gmail.com>
45 lines
1.0 KiB
SQL
45 lines
1.0 KiB
SQL
CREATE EXTENSION vector;
|
|
|
|
CREATE TABLE elements (
|
|
id UUID PRIMARY KEY,
|
|
element_id VARCHAR,
|
|
text TEXT,
|
|
embeddings vector(384),
|
|
type VARCHAR,
|
|
system VARCHAR,
|
|
layout_width DECIMAL,
|
|
layout_height DECIMAL,
|
|
points TEXT,
|
|
url TEXT,
|
|
version VARCHAR,
|
|
date_created TIMESTAMPTZ,
|
|
date_modified TIMESTAMPTZ,
|
|
date_processed TIMESTAMPTZ,
|
|
permissions_data TEXT,
|
|
record_locator TEXT,
|
|
category_depth INTEGER,
|
|
parent_id VARCHAR,
|
|
attached_filename VARCHAR,
|
|
filetype VARCHAR,
|
|
last_modified TIMESTAMPTZ,
|
|
file_directory VARCHAR,
|
|
filename VARCHAR,
|
|
languages VARCHAR [],
|
|
page_number VARCHAR,
|
|
links TEXT,
|
|
page_name VARCHAR,
|
|
link_urls VARCHAR [],
|
|
link_texts VARCHAR [],
|
|
sent_from VARCHAR [],
|
|
sent_to VARCHAR [],
|
|
subject VARCHAR,
|
|
section VARCHAR,
|
|
header_footer_type VARCHAR,
|
|
emphasized_text_contents VARCHAR [],
|
|
emphasized_text_tags VARCHAR [],
|
|
text_as_html TEXT,
|
|
regex_metadata TEXT,
|
|
detection_class_prob DECIMAL
|
|
);
|
|
|