mirror of
				https://github.com/Unstructured-IO/unstructured.git
				synced 2025-10-31 18:14:51 +00:00 
			
		
		
		
	 bd8a74d686
			
		
	
	
		bd8a74d686
		
			
		
	
	
	
	
		
			
			Given the tendency for shell scripts to easily enter into a few levels of indentation and long line lengths, update the default to 2 spaces.
		
			
				
	
	
		
			32 lines
		
	
	
		
			1.2 KiB
		
	
	
	
		
			Bash
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			32 lines
		
	
	
		
			1.2 KiB
		
	
	
	
		
			Bash
		
	
	
		
			Executable File
		
	
	
	
	
| #!/usr/bin/env bash
 | |
| 
 | |
| # Processes several files in a nested folder structure from box://utic-test-ingest-fixtures/
 | |
| # through Unstructured's library in 2 processes.
 | |
| 
 | |
| # Structured outputs are stored in box-output/
 | |
| 
 | |
| # Setting up your App/Credential to access Box folders/files:
 | |
| # First of all, this does not work with a free Box account.
 | |
| # Make sure the App service email is a collaborator for your folder (co-owner or editor)
 | |
| # Make sure you have the 'write all files' application scope
 | |
| # Maybe check 'Make api calls as the as-user header'
 | |
| # REAUTHORIZE app after making any of the above changes
 | |
| 
 | |
| # box-app-config is the path to a json file, available in the App Settings section of your Box App
 | |
| # More info to set up the app:
 | |
| # https://developer.box.com/guides/authentication/jwt/jwt-setup/
 | |
| # and set up the app config.json file here:
 | |
| # https://developer.box.com/guides/authentication/jwt/with-sdk/
 | |
| 
 | |
| SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
 | |
| cd "$SCRIPT_DIR"/../../.. || exit 1
 | |
| 
 | |
| PYTHONPATH=. ./unstructured/ingest/main.py \
 | |
|   box \
 | |
|   --box-app-config "$BOX_APP_CONFIG_PATH" \
 | |
|   --remote-url box://utic-test-ingest-fixtures \
 | |
|   --output-dir box-output \
 | |
|   --num-processes 2 \
 | |
|   --recursive \
 | |
|   --verbose
 |