| 
									
										
										
										
											2023-06-21 15:14:50 -07:00
										 |  |  | #!/usr/bin/env bash
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-07-31 13:20:10 -04:00
										 |  |  | # Processes several files in a nested folder structure from gs://utic-test-ingest-fixtures-public/ | 
					
						
							| 
									
										
										
										
											2023-06-21 15:14:50 -07:00
										 |  |  | # through Unstructured's library in 2 processes. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # Structured outputs are stored in gcs-output/ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-12-11 20:04:15 -05:00
										 |  |  | SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) | 
					
						
							| 
									
										
										
										
											2023-06-21 15:14:50 -07:00
										 |  |  | cd "$SCRIPT_DIR"/../../.. || exit 1 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | PYTHONPATH=. ./unstructured/ingest/main.py \
 | 
					
						
							| 
									
										
										
										
											2023-12-18 23:48:21 -08:00
										 |  |  |   gcs \
 | 
					
						
							|  |  |  |   --remote-url gs://utic-test-ingest-fixtures-public/ \
 | 
					
						
							|  |  |  |   --output-dir gcs-output \
 | 
					
						
							|  |  |  |   --num-processes 2 \
 | 
					
						
							|  |  |  |   --recursive \
 | 
					
						
							|  |  |  |   --verbose |