mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-12-30 00:38:10 +00:00
### Description Migrate over the sharepoint connector to v2 and in the process refactor the majority of the connector. It now pulls in much more content from the SDK on index time, including permissions data is the parameters are passed in. HTML content generated from the SitePage is isolated to the html content in the `CanvasContent1` and `LayoutWebpartsContent` returned by the SDK. Some TODOs were left in there for future iterations. Currently only document and site page content is being pulled in from sharepoint, but sharepoint has more types of content than just that, such as lists. Note left in there to support other sharepoint types. --------- Co-authored-by: ryannikolaidis <1208590+ryannikolaidis@users.noreply.github.com> Co-authored-by: rbiseck3 <rbiseck3@users.noreply.github.com> Co-authored-by: vangheem <vangheem@gmail.com> Co-authored-by: Ahmet Melek <ahmetmeleq@gmail.com> Co-authored-by: Ahmet Melek <39141206+ahmetmeleq@users.noreply.github.com>
134 lines
3.8 KiB
JSON
134 lines
3.8 KiB
JSON
[
|
|
{
|
|
"type": "NarrativeText",
|
|
"element_id": "fc01503614e0f12f585427cccf81cf86",
|
|
"text": "This is a test document to use for unit tests.",
|
|
"metadata": {
|
|
"filename": "fake-text.txt",
|
|
"languages": [
|
|
"eng"
|
|
],
|
|
"filetype": "text/plain",
|
|
"data_source": {
|
|
"url": "https://unstructuredio.sharepoint.com/Shared%20Documents/fake-text.txt",
|
|
"version": "1.0",
|
|
"record_locator": {
|
|
"server_path": "/Shared Documents/fake-text.txt",
|
|
"site_url": "https://unstructuredio.sharepoint.com"
|
|
},
|
|
"date_created": "1686891895.0",
|
|
"date_modified": "1686891895.0"
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"type": "Address",
|
|
"element_id": "d06d10c0722ac08a2488076a48e858d5",
|
|
"text": "Doylestown, PA 18901",
|
|
"metadata": {
|
|
"filename": "fake-text.txt",
|
|
"languages": [
|
|
"eng"
|
|
],
|
|
"filetype": "text/plain",
|
|
"data_source": {
|
|
"url": "https://unstructuredio.sharepoint.com/Shared%20Documents/fake-text.txt",
|
|
"version": "1.0",
|
|
"record_locator": {
|
|
"server_path": "/Shared Documents/fake-text.txt",
|
|
"site_url": "https://unstructuredio.sharepoint.com"
|
|
},
|
|
"date_created": "1686891895.0",
|
|
"date_modified": "1686891895.0"
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"type": "Title",
|
|
"element_id": "a190164de573571375ecf759a5027a3a",
|
|
"text": "Important points:",
|
|
"metadata": {
|
|
"filename": "fake-text.txt",
|
|
"languages": [
|
|
"eng"
|
|
],
|
|
"filetype": "text/plain",
|
|
"data_source": {
|
|
"url": "https://unstructuredio.sharepoint.com/Shared%20Documents/fake-text.txt",
|
|
"version": "1.0",
|
|
"record_locator": {
|
|
"server_path": "/Shared Documents/fake-text.txt",
|
|
"site_url": "https://unstructuredio.sharepoint.com"
|
|
},
|
|
"date_created": "1686891895.0",
|
|
"date_modified": "1686891895.0"
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"type": "ListItem",
|
|
"element_id": "64c58eb106608ad05424e47bbcdef7dc",
|
|
"text": "Hamburgers are delicious",
|
|
"metadata": {
|
|
"filename": "fake-text.txt",
|
|
"languages": [
|
|
"eng"
|
|
],
|
|
"filetype": "text/plain",
|
|
"data_source": {
|
|
"url": "https://unstructuredio.sharepoint.com/Shared%20Documents/fake-text.txt",
|
|
"version": "1.0",
|
|
"record_locator": {
|
|
"server_path": "/Shared Documents/fake-text.txt",
|
|
"site_url": "https://unstructuredio.sharepoint.com"
|
|
},
|
|
"date_created": "1686891895.0",
|
|
"date_modified": "1686891895.0"
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"type": "ListItem",
|
|
"element_id": "9210d7882755a60fc82272a0e93df94f",
|
|
"text": "Dogs are the best",
|
|
"metadata": {
|
|
"filename": "fake-text.txt",
|
|
"languages": [
|
|
"eng"
|
|
],
|
|
"filetype": "text/plain",
|
|
"data_source": {
|
|
"url": "https://unstructuredio.sharepoint.com/Shared%20Documents/fake-text.txt",
|
|
"version": "1.0",
|
|
"record_locator": {
|
|
"server_path": "/Shared Documents/fake-text.txt",
|
|
"site_url": "https://unstructuredio.sharepoint.com"
|
|
},
|
|
"date_created": "1686891895.0",
|
|
"date_modified": "1686891895.0"
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"type": "ListItem",
|
|
"element_id": "970dd4ac687529e2d2126e0a51cb7c27",
|
|
"text": "I love fuzzy blankets",
|
|
"metadata": {
|
|
"filename": "fake-text.txt",
|
|
"languages": [
|
|
"eng"
|
|
],
|
|
"filetype": "text/plain",
|
|
"data_source": {
|
|
"url": "https://unstructuredio.sharepoint.com/Shared%20Documents/fake-text.txt",
|
|
"version": "1.0",
|
|
"record_locator": {
|
|
"server_path": "/Shared Documents/fake-text.txt",
|
|
"site_url": "https://unstructuredio.sharepoint.com"
|
|
},
|
|
"date_created": "1686891895.0",
|
|
"date_modified": "1686891895.0"
|
|
}
|
|
}
|
|
}
|
|
] |