Steve Canny 086b8d6f8a
rfctr(part): prepare for pluggable auto-partitioners 2 (#3657)
**Summary**
Step 2 in prep for pluggable auto-partitioners, remove `regex_metadata`
field from `ElementMetadata`.

**Additional Context**
- "regex-metadata" was an experimental feature that didn't pan out.
- It's implemented by one of the post-partitioning metadata decorators,
so get rid of it as part of the cleanup before consolidating those
decorators.
2024-09-24 17:33:25 +00:00

145 lines
3.7 KiB
JSON

{
"properties": {
"element_id": {
"type": "keyword"
},
"text": {
"type": "text",
"analyzer": "english"
},
"type": {
"type": "text"
},
"embeddings": {
"type": "dense_vector",
"dims": 384,
"index": "true",
"similarity": "cosine"
},
"metadata": {
"type": "object",
"properties": {
"category_depth": {
"type": "integer"
},
"parent_id": {
"type": "keyword"
},
"attached_to_filename": {
"type": "keyword"
},
"filetype": {
"type": "keyword"
},
"last_modified": {
"type": "date"
},
"file_directory": {
"type": "keyword"
},
"filename": {
"type": "keyword"
},
"data_source": {
"type": "object",
"properties": {
"url": {
"type": "text",
"analyzer": "standard"
},
"version": {
"type": "keyword"
},
"date_created": {
"type": "date"
},
"date_modified": {
"type": "date"
},
"date_processed": {
"type": "date"
},
"record_locator": {
"type": "keyword"
},
"permissions_data": {
"type": "object"
}
}
},
"coordinates": {
"type": "object",
"properties": {
"system": {
"type": "keyword"
},
"layout_width": {
"type": "float"
},
"layout_height": {
"type": "float"
},
"points": {
"type": "float"
}
}
},
"languages": {
"type": "keyword"
},
"page_number": {
"type": "integer"
},
"page_name": {
"type": "keyword"
},
"url": {
"type": "text",
"analyzer": "standard"
},
"links": {
"type": "object"
},
"link_urls": {
"type": "text"
},
"link_texts": {
"type": "text"
},
"sent_from": {
"type": "text",
"analyzer": "standard"
},
"sent_to": {
"type": "text",
"analyzer": "standard"
},
"subject": {
"type": "text",
"analyzer": "standard"
},
"section": {
"type": "text",
"analyzer": "standard"
},
"header_footer_type": {
"type": "keyword"
},
"emphasized_text_contents": {
"type": "text"
},
"emphasized_text_tags": {
"type": "keyword"
},
"text_as_html": {
"type": "text",
"analyzer": "standard"
},
"detection_class_prob": {
"type": "float"
}
}
}
}
}