| 
									
										
										
										
											2023-11-01 15:23:44 -04:00
										 |  |  | #!/usr/bin/env bash
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-11-29 08:41:19 -05:00
										 |  |  | set -u -o pipefail | 
					
						
							| 
									
										
										
										
											2023-11-01 15:23:44 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-12-11 20:04:15 -05:00
										 |  |  | SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) | 
					
						
							| 
									
										
										
										
											2023-11-29 08:41:19 -05:00
										 |  |  | SKIPPED_FILES_LOG=$SCRIPT_DIR/skipped-files.txt | 
					
						
							|  |  |  | # If the file already exists, reset it | 
					
						
							|  |  |  | if [ -f "$SKIPPED_FILES_LOG" ]; then | 
					
						
							| 
									
										
										
										
											2023-12-18 23:48:21 -08:00
										 |  |  |   rm "$SKIPPED_FILES_LOG" | 
					
						
							| 
									
										
										
										
											2023-11-29 08:41:19 -05:00
										 |  |  | fi | 
					
						
							| 
									
										
										
										
											2023-12-05 15:55:19 -05:00
										 |  |  | touch "$SKIPPED_FILES_LOG" | 
					
						
							| 
									
										
										
										
											2023-11-01 15:23:44 -04:00
										 |  |  | cd "$SCRIPT_DIR"/.. || exit 1 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # NOTE(crag): sets number of tesseract threads to 1 which may help with more reproducible outputs | 
					
						
							|  |  |  | export OMP_THREAD_LIMIT=1 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | all_tests=( | 
					
						
							| 
									
										
										
										
											2023-12-18 23:48:21 -08:00
										 |  |  |   'azure.sh' | 
					
						
							|  |  |  |   'azure-cognitive-search.sh' | 
					
						
							|  |  |  |   'box.sh' | 
					
						
							| 
									
										
										
										
											2023-12-19 08:58:23 -08:00
										 |  |  |   'chroma.sh' | 
					
						
							| 
									
										
										
										
											2023-12-18 23:48:21 -08:00
										 |  |  |   'delta-table.sh' | 
					
						
							|  |  |  |   'dropbox.sh' | 
					
						
							| 
									
										
										
										
											2023-12-20 01:26:58 +00:00
										 |  |  |   'elasticsearch.sh' | 
					
						
							| 
									
										
										
										
											2023-12-18 23:48:21 -08:00
										 |  |  |   'gcs.sh' | 
					
						
							|  |  |  |   'mongodb.sh' | 
					
						
							| 
									
										
										
										
											2024-01-04 13:33:16 -06:00
										 |  |  |   'pgvector.sh' | 
					
						
							| 
									
										
										
										
											2023-12-18 23:48:21 -08:00
										 |  |  |   'pinecone.sh' | 
					
						
							| 
									
										
										
										
											2024-01-02 14:08:20 -08:00
										 |  |  |   'qdrant.sh' | 
					
						
							| 
									
										
										
										
											2023-12-18 23:48:21 -08:00
										 |  |  |   's3.sh' | 
					
						
							|  |  |  |   'sharepoint-embed-cog-index.sh' | 
					
						
							| 
									
										
										
										
											2024-01-04 13:33:16 -06:00
										 |  |  |   'sqlite.sh' | 
					
						
							|  |  |  |   'weaviate.sh' | 
					
						
							| 
									
										
										
										
											2023-11-01 15:23:44 -04:00
										 |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | full_python_matrix_tests=( | 
					
						
							| 
									
										
										
										
											2023-12-18 23:48:21 -08:00
										 |  |  |   'azure.sh' | 
					
						
							|  |  |  |   'gcs.sh' | 
					
						
							|  |  |  |   's3.sh' | 
					
						
							| 
									
										
										
										
											2023-11-01 15:23:44 -04:00
										 |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | CURRENT_TEST="none" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | function print_last_run() { | 
					
						
							| 
									
										
										
										
											2023-12-18 23:48:21 -08:00
										 |  |  |   if [ "$CURRENT_TEST" != "none" ]; then | 
					
						
							|  |  |  |     echo "Last ran script: $CURRENT_TEST" | 
					
						
							|  |  |  |   fi | 
					
						
							|  |  |  |   echo "######## SKIPPED TESTS: ########" | 
					
						
							|  |  |  |   cat "$SKIPPED_FILES_LOG" | 
					
						
							| 
									
										
										
										
											2023-11-01 15:23:44 -04:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | trap print_last_run EXIT | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | python_version=$(python --version 2>&1) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | tests_to_ignore=( | 
					
						
							| 
									
										
										
										
											2023-12-18 23:48:21 -08:00
										 |  |  |   'notion.sh' | 
					
						
							|  |  |  |   'dropbox.sh' | 
					
						
							|  |  |  |   'sharepoint.sh' | 
					
						
							| 
									
										
										
										
											2023-11-01 15:23:44 -04:00
										 |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | for test in "${all_tests[@]}"; do | 
					
						
							| 
									
										
										
										
											2023-12-18 23:48:21 -08:00
										 |  |  |   CURRENT_TEST="$test" | 
					
						
							|  |  |  |   # IF: python_version is not 3.10 (wildcarded to match any subminor version) AND the current test is not in full_python_matrix_tests | 
					
						
							|  |  |  |   # Note: to test we expand the full_python_matrix_tests array to a string and then regex match the current test | 
					
						
							|  |  |  |   if [[ "$python_version" != "Python 3.10"* ]] && [[ ! "${full_python_matrix_tests[*]}" =~ $test ]]; then | 
					
						
							|  |  |  |     echo "--------- SKIPPING SCRIPT $test ---------" | 
					
						
							|  |  |  |     continue | 
					
						
							|  |  |  |   fi | 
					
						
							|  |  |  |   echo "--------- RUNNING SCRIPT $test ---------" | 
					
						
							|  |  |  |   echo "Running ./test_unstructured_ingest/$test" | 
					
						
							|  |  |  |   ./test_unstructured_ingest/dest/"$test" | 
					
						
							|  |  |  |   rc=$? | 
					
						
							|  |  |  |   if [[ $rc -eq 8 ]]; then | 
					
						
							|  |  |  |     echo "$test (skipped due to missing env var)" | tee -a "$SKIPPED_FILES_LOG" | 
					
						
							|  |  |  |   elif [[ "${tests_to_ignore[*]}" =~ $test ]]; then | 
					
						
							|  |  |  |     echo "$test (skipped checking error code: $rc)" | tee -a "$SKIPPED_FILES_LOG" | 
					
						
							|  |  |  |     continue | 
					
						
							|  |  |  |   elif [[ $rc -ne 0 ]]; then | 
					
						
							|  |  |  |     exit $rc | 
					
						
							|  |  |  |   fi | 
					
						
							|  |  |  |   echo "--------- FINISHED SCRIPT $test ---------" | 
					
						
							| 
									
										
										
										
											2023-11-01 15:23:44 -04:00
										 |  |  | done |