unstructured/test_unstructured_ingest/files/azure_cognitive_index_schema.json
Steve Canny 086b8d6f8a
rfctr(part): prepare for pluggable auto-partitioners 2 (#3657)
**Summary**
Step 2 in prep for pluggable auto-partitioners, remove `regex_metadata`
field from `ElementMetadata`.

**Additional Context**
- "regex-metadata" was an experimental feature that didn't pan out.
- It's implemented by one of the post-partitioning metadata decorators,
so get rid of it as part of the cleanup before consolidating those
decorators.
2024-09-24 17:33:25 +00:00

202 lines
4.4 KiB
JSON

{
"@odata.context": "https://utic-test-ingest-fixtures.search.windows.net/$metadata#indexes/$entity",
"@odata.etag": "\"0x8DBB93E09C8F4BD\"",
"fields": [
{
"name": "id",
"type": "Edm.String",
"key": true
},
{
"name": "element_id",
"type": "Edm.String"
},
{
"name": "text",
"type": "Edm.String"
},
{
"name": "embeddings",
"type": "Collection(Edm.Single)",
"dimensions": 384,
"vectorSearchConfiguration": "embeddings-config"
},
{
"name": "type",
"type": "Edm.String"
},
{
"name": "metadata",
"type": "Edm.ComplexType",
"fields": [
{
"name": "category_depth",
"type": "Edm.Int32"
},
{
"name": "parent_id",
"type": "Edm.String"
},
{
"name": "attached_to_filename",
"type": "Edm.String"
},
{
"name": "filetype",
"type": "Edm.String"
},
{
"name": "last_modified",
"type": "Edm.DateTimeOffset"
},
{
"name": "is_continuation",
"type": "Edm.Boolean"
},
{
"name": "file_directory",
"type": "Edm.String"
},
{
"name": "filename",
"type": "Edm.String"
},
{
"name": "data_source",
"type": "Edm.ComplexType",
"fields": [
{
"name": "url",
"type": "Edm.String"
},
{
"name": "version",
"type": "Edm.String"
},
{
"name": "date_created",
"type": "Edm.DateTimeOffset"
},
{
"name": "date_modified",
"type": "Edm.DateTimeOffset"
},
{
"name": "date_processed",
"type": "Edm.DateTimeOffset"
},
{
"name": "permissions_data",
"type": "Edm.String"
},
{
"name": "record_locator",
"type": "Edm.String"
}
]
},
{
"name": "coordinates",
"type": "Edm.ComplexType",
"fields": [
{
"name": "system",
"type": "Edm.String"
},
{
"name": "layout_width",
"type": "Edm.Double"
},
{
"name": "layout_height",
"type": "Edm.Double"
},
{
"name": "points",
"type": "Edm.String"
}
]
},
{
"name": "languages",
"type": "Collection(Edm.String)"
},
{
"name": "page_number",
"type": "Edm.String"
},
{
"name": "links",
"type": "Collection(Edm.String)"
},
{
"name": "page_name",
"type": "Edm.String"
},
{
"name": "url",
"type": "Edm.String"
},
{
"name": "link_urls",
"type": "Collection(Edm.String)"
},
{
"name": "link_texts",
"type": "Collection(Edm.String)"
},
{
"name": "sent_from",
"type": "Collection(Edm.String)"
},
{
"name": "sent_to",
"type": "Collection(Edm.String)"
},
{
"name": "subject",
"type": "Edm.String"
},
{
"name": "section",
"type": "Edm.String"
},
{
"name": "header_footer_type",
"type": "Edm.String"
},
{
"name": "emphasized_text_contents",
"type": "Collection(Edm.String)"
},
{
"name": "emphasized_text_tags",
"type": "Collection(Edm.String)"
},
{
"name": "text_as_html",
"type": "Edm.String"
},
{
"name": "detection_class_prob",
"type": "Edm.Double"
}
]
}
],
"vectorSearch": {
"algorithmConfigurations": [
{
"name": "embeddings-config",
"kind": "hnsw",
"hnswParameters": {
"metric": "cosine",
"m": 4,
"efConstruction": 400,
"efSearch": 500
}
}
]
}
}