mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-06-27 02:30:08 +00:00

- Adds a destination connector to upload processed output into a PostgreSQL/Sqlite database instance. - Users are responsible to provide their instances. This PR includes a couple of configuration examples. - Defines the scripts required to setup a PostgreSQL instance with the unstructured elements schema. - Validates postgres/pgvector embedding storage and retrieval --------- Co-authored-by: potter-potter <david.potter@gmail.com>
42 lines
893 B
SQL
42 lines
893 B
SQL
CREATE TABLE elements (
|
|
id TEXT PRIMARY KEY,
|
|
element_id TEXT,
|
|
text TEXT,
|
|
embeddings TEXT,
|
|
type TEXT,
|
|
system TEXT,
|
|
layout_width REAL,
|
|
layout_height REAL,
|
|
points TEXT,
|
|
url TEXT,
|
|
version TEXT,
|
|
date_created TEXT,
|
|
date_modified TEXT,
|
|
date_processed TEXT,
|
|
permissions_data TEXT,
|
|
record_locator TEXT,
|
|
category_depth INTEGER,
|
|
parent_id TEXT,
|
|
attached_filename TEXT,
|
|
filetype TEXT,
|
|
last_modified TEXT,
|
|
file_directory TEXT,
|
|
filename TEXT,
|
|
languages TEXT,
|
|
page_number TEXT,
|
|
links TEXT,
|
|
page_name TEXT,
|
|
link_urls TEXT,
|
|
link_texts TEXT,
|
|
sent_from TEXT,
|
|
sent_to TEXT,
|
|
subject TEXT,
|
|
section TEXT,
|
|
header_footer_type TEXT,
|
|
emphasized_text_contents TEXT,
|
|
emphasized_text_tags TEXT,
|
|
text_as_html TEXT,
|
|
regex_metadata TEXT,
|
|
detection_class_prob DECIMAL
|
|
);
|