| 
									
										
										
										
											2023-08-08 22:01:25 -04:00
										 |  |  | #!/usr/bin/env bash
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # Processes the Unstructured-IO/unstructured repository | 
					
						
							|  |  |  | # through Unstructured's library in 2 processes. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # Structured outputs are stored in notion-ingest-output/ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # NOTE, this script is not ready-to-run! | 
					
						
							|  |  |  | # You must enter a Notion api key | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # To get the credentials for your Notion workspace, follow these steps: | 
					
						
							|  |  |  | # https://developers.notion.com/docs/create-a-notion-integration | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-12-11 20:04:15 -05:00
										 |  |  | SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) | 
					
						
							| 
									
										
										
										
											2023-08-08 22:01:25 -04:00
										 |  |  | cd "$SCRIPT_DIR"/../../.. || exit 1 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | PYTHONPATH=. ./unstructured/ingest/main.py \
 | 
					
						
							| 
									
										
										
										
											2023-12-18 23:48:21 -08:00
										 |  |  |   notion \
 | 
					
						
							|  |  |  |   --api-key "<Notion api key>" \
 | 
					
						
							|  |  |  |   --output-dir notion-ingest-output \
 | 
					
						
							|  |  |  |   --page-ids "<Comma delimited list of page ids to process>" \
 | 
					
						
							|  |  |  |   --database-ids "<Comma delimited list of database ids to process>" \
 | 
					
						
							|  |  |  |   --num-processes 2 \
 | 
					
						
							|  |  |  |   --verbose |