mirror of
				https://github.com/Unstructured-IO/unstructured.git
				synced 2025-10-30 17:38:13 +00:00 
			
		
		
		
	 950e5d68f9
			
		
	
	
		950e5d68f9
		
			
		
	
	
	
	
		
			
			- Adds a destination connector to upload processed output into a PostgreSQL/Sqlite database instance. - Users are responsible to provide their instances. This PR includes a couple of configuration examples. - Defines the scripts required to setup a PostgreSQL instance with the unstructured elements schema. - Validates postgres/pgvector embedding storage and retrieval --------- Co-authored-by: potter-potter <david.potter@gmail.com>
		
			
				
	
	
		
			42 lines
		
	
	
		
			893 B
		
	
	
	
		
			SQL
		
	
	
	
	
	
			
		
		
	
	
			42 lines
		
	
	
		
			893 B
		
	
	
	
		
			SQL
		
	
	
	
	
	
| CREATE TABLE elements (
 | |
|     id TEXT PRIMARY KEY,
 | |
|     element_id TEXT,
 | |
|     text TEXT,
 | |
|     embeddings TEXT,
 | |
|     type TEXT,
 | |
|     system TEXT,
 | |
|     layout_width REAL,
 | |
|     layout_height REAL,
 | |
|     points TEXT,
 | |
|     url TEXT,
 | |
|     version TEXT,
 | |
|     date_created TEXT,
 | |
|     date_modified TEXT,
 | |
|     date_processed TEXT,
 | |
|     permissions_data TEXT,
 | |
|     record_locator TEXT,
 | |
|     category_depth INTEGER,
 | |
|     parent_id TEXT,
 | |
|     attached_filename TEXT,
 | |
|     filetype TEXT,
 | |
|     last_modified TEXT,
 | |
|     file_directory TEXT,
 | |
|     filename TEXT,
 | |
|     languages TEXT,
 | |
|     page_number TEXT,
 | |
|     links TEXT,
 | |
|     page_name TEXT,
 | |
|     link_urls TEXT,
 | |
|     link_texts TEXT,
 | |
|     sent_from TEXT,
 | |
|     sent_to TEXT,
 | |
|     subject TEXT,
 | |
|     section TEXT,
 | |
|     header_footer_type TEXT,
 | |
|     emphasized_text_contents TEXT,
 | |
|     emphasized_text_tags TEXT,
 | |
|     text_as_html TEXT,
 | |
|     regex_metadata TEXT,
 | |
|     detection_class_prob DECIMAL
 | |
| );
 |