mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-06-27 02:30:08 +00:00

**Summary** Step 2 in prep for pluggable auto-partitioners, remove `regex_metadata` field from `ElementMetadata`. **Additional Context** - "regex-metadata" was an experimental feature that didn't pan out. - It's implemented by one of the post-partitioning metadata decorators, so get rid of it as part of the cleanup before consolidating those decorators.
49 lines
1.2 KiB
SQL
49 lines
1.2 KiB
SQL
CREATE DATABASE ingest_test;
|
|
USE ingest_test;
|
|
|
|
CREATE TABLE elements (
|
|
id INT PRIMARY KEY NOT NULL AUTO_INCREMENT,
|
|
element_id TEXT,
|
|
text TEXT,
|
|
embeddings Vector(384),
|
|
type TEXT,
|
|
url TEXT,
|
|
version TEXT,
|
|
data_source_date_created TIMESTAMP,
|
|
data_source_date_modified TIMESTAMP,
|
|
data_source_date_processed TIMESTAMP,
|
|
data_source_permissions_data TEXT,
|
|
data_source_url TEXT,
|
|
data_source_version TEXT,
|
|
data_source_record_locator JSON,
|
|
category_depth INTEGER,
|
|
parent_id TEXT,
|
|
attached_filename TEXT,
|
|
filetype TEXT,
|
|
last_modified TIMESTAMP,
|
|
file_directory TEXT,
|
|
filename TEXT,
|
|
languages TEXT,
|
|
page_number TEXT,
|
|
links TEXT,
|
|
page_name TEXT,
|
|
link_urls TEXT,
|
|
link_texts TEXT,
|
|
sent_from TEXT,
|
|
sent_to TEXT,
|
|
subject TEXT,
|
|
section TEXT,
|
|
header_footer_type TEXT,
|
|
emphasized_text_contents TEXT,
|
|
emphasized_text_tags TEXT,
|
|
text_as_html TEXT,
|
|
detection_class_prob DECIMAL,
|
|
is_continuation BOOLEAN,
|
|
orig_elements TEXT,
|
|
coordinates_points TEXT,
|
|
coordinates_system TEXT,
|
|
coordinates_layout_width DECIMAL,
|
|
coordinates_layout_height DECIMAL
|
|
);
|
|
|