From 8b93217a332770d3dfde2280197a537079bdb405 Mon Sep 17 00:00:00 2001 From: David Potter Date: Sat, 7 Oct 2023 19:34:32 -0700 Subject: [PATCH] built(test): exclude version metadata from google drive test (#1682) --- .../117qrVqiCoR5EjYMsDHGdy3UMkEtKr9Q8-test-drive-doc.docx.json | 2 -- .../1SpQuE7jHz9nMt5hfQXsiok1SgIdRYX5o-fake.docx.json | 1 - .../google-drive/1cTKXAreuj-wYmL38nFnqKvz3X8UKcaMC-foo.txt.json | 1 - test_unstructured_ingest/test-ingest-google-drive.sh | 2 +- 4 files changed, 1 insertion(+), 5 deletions(-) diff --git a/test_unstructured_ingest/expected-structured-output/google-drive/117qrVqiCoR5EjYMsDHGdy3UMkEtKr9Q8-test-drive-doc.docx.json b/test_unstructured_ingest/expected-structured-output/google-drive/117qrVqiCoR5EjYMsDHGdy3UMkEtKr9Q8-test-drive-doc.docx.json index 96a24d2b9..e67b0cfcd 100644 --- a/test_unstructured_ingest/expected-structured-output/google-drive/117qrVqiCoR5EjYMsDHGdy3UMkEtKr9Q8-test-drive-doc.docx.json +++ b/test_unstructured_ingest/expected-structured-output/google-drive/117qrVqiCoR5EjYMsDHGdy3UMkEtKr9Q8-test-drive-doc.docx.json @@ -5,7 +5,6 @@ "metadata": { "data_source": { "url": "https://drive.google.com/uc?id=117qrVqiCoR5EjYMsDHGdy3UMkEtKr9Q8&export=download", - "version": "15", "record_locator": { "drive_id": "1OQZ66OHBE30rNsNa7dweGLfRmXvkT_jr", "file_id": "117qrVqiCoR5EjYMsDHGdy3UMkEtKr9Q8" @@ -29,7 +28,6 @@ "metadata": { "data_source": { "url": "https://drive.google.com/uc?id=117qrVqiCoR5EjYMsDHGdy3UMkEtKr9Q8&export=download", - "version": "15", "record_locator": { "drive_id": "1OQZ66OHBE30rNsNa7dweGLfRmXvkT_jr", "file_id": "117qrVqiCoR5EjYMsDHGdy3UMkEtKr9Q8" diff --git a/test_unstructured_ingest/expected-structured-output/google-drive/1SpQuE7jHz9nMt5hfQXsiok1SgIdRYX5o-fake.docx.json b/test_unstructured_ingest/expected-structured-output/google-drive/1SpQuE7jHz9nMt5hfQXsiok1SgIdRYX5o-fake.docx.json index 96ec57344..1173dbd23 100644 --- a/test_unstructured_ingest/expected-structured-output/google-drive/1SpQuE7jHz9nMt5hfQXsiok1SgIdRYX5o-fake.docx.json +++ b/test_unstructured_ingest/expected-structured-output/google-drive/1SpQuE7jHz9nMt5hfQXsiok1SgIdRYX5o-fake.docx.json @@ -5,7 +5,6 @@ "metadata": { "data_source": { "url": "https://drive.google.com/uc?id=1SpQuE7jHz9nMt5hfQXsiok1SgIdRYX5o&export=download", - "version": "17", "record_locator": { "drive_id": "1OQZ66OHBE30rNsNa7dweGLfRmXvkT_jr", "file_id": "1SpQuE7jHz9nMt5hfQXsiok1SgIdRYX5o" diff --git a/test_unstructured_ingest/expected-structured-output/google-drive/1cTKXAreuj-wYmL38nFnqKvz3X8UKcaMC-foo.txt.json b/test_unstructured_ingest/expected-structured-output/google-drive/1cTKXAreuj-wYmL38nFnqKvz3X8UKcaMC-foo.txt.json index 2b5a21e4c..16b92a8de 100644 --- a/test_unstructured_ingest/expected-structured-output/google-drive/1cTKXAreuj-wYmL38nFnqKvz3X8UKcaMC-foo.txt.json +++ b/test_unstructured_ingest/expected-structured-output/google-drive/1cTKXAreuj-wYmL38nFnqKvz3X8UKcaMC-foo.txt.json @@ -5,7 +5,6 @@ "metadata": { "data_source": { "url": "https://drive.google.com/uc?id=1cTKXAreuj-wYmL38nFnqKvz3X8UKcaMC&export=download", - "version": "9", "record_locator": { "drive_id": "1OQZ66OHBE30rNsNa7dweGLfRmXvkT_jr", "file_id": "1cTKXAreuj-wYmL38nFnqKvz3X8UKcaMC" diff --git a/test_unstructured_ingest/test-ingest-google-drive.sh b/test_unstructured_ingest/test-ingest-google-drive.sh index 18560a171..09db555ec 100755 --- a/test_unstructured_ingest/test-ingest-google-drive.sh +++ b/test_unstructured_ingest/test-ingest-google-drive.sh @@ -35,7 +35,7 @@ echo "$GCP_INGEST_SERVICE_KEY" >"$GCP_INGEST_SERVICE_KEY_FILE" PYTHONPATH=. unstructured/ingest/main.py \ google-drive \ --download-dir "$DOWNLOAD_DIR" \ - --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_processed,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \ + --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_processed,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth,metadata.data_source.version \ --num-processes "$max_processes" \ --strategy hi_res \ --preserve-downloads \