2023-08-10 10:37:58 -06:00
#!/usr/bin/env bash
# Processes the Unstructured-IO/unstructured repository
# through Unstructured's library in 2 processes.
# Structured outputs are stored in sharepoint-ingest-output/
# NOTE, this script is not ready-to-run!
# You must enter a MS Sharepoint app client-id, client secret and sharepoint site url
2023-09-11 11:40:56 -04:00
# before running.
2023-08-10 10:37:58 -06:00
# To get the credentials for your Sharepoint app, follow these steps:
# https://github.com/vgrem/Office365-REST-Python-Client/wiki/How-to-connect-to-SharePoint-Online-and-and-SharePoint-2013-2016-2019-on-premises--with-app-principal
2023-10-13 01:38:08 +01:00
# To optionally set up your application and obtain permissions related variables (--permissions-application-id, --permissions-client-cred, --permissions-tenant), follow these steps:
# https://tsmatz.wordpress.com/2016/10/07/application-permission-with-v2-endpoint-and-microsoft-graph
2023-08-10 10:37:58 -06:00
2023-12-11 20:04:15 -05:00
SCRIPT_DIR = $( cd -- " $( dirname -- " ${ BASH_SOURCE [0] } " ) " & >/dev/null && pwd )
2023-08-10 10:37:58 -06:00
cd " $SCRIPT_DIR " /../../.. || exit 1
PYTHONPATH = . ./unstructured/ingest/main.py \
2023-12-18 23:48:21 -08:00
sharepoint \
--client-id "<Microsoft Sharepoint app client-id>" \
--client-cred "<Microsoft Sharepoint app client-secret>" \
--site "<e.g https://contoso.sharepoint.com or https://contoso.admin.sharepoint.com to process all sites within tenant>" \
--permissions-application-id "<Microsoft Graph API application id to process per-file access permissions>" \
--permissions-client-cred "<Microsoft Graph API application credentials to process per-file access permissions>" \
--permissions-tenant "<e.g https://contoso.onmicrosoft.com to process per-file access permissions>" \
--files-only "Flag to process only files within the site(s)" \
--output-dir sharepoint-ingest-output \
--num-processes 2 \
--path "Shared Documents" \
--verbose