From 5d387030ebefcf4b5e520dbb6fab6e857e6c20ad Mon Sep 17 00:00:00 2001 From: Roman Isecke <136338424+rbiseck3@users.noreply.github.com> Date: Thu, 18 Jul 2024 12:09:19 -0400 Subject: [PATCH] bugfix: google drive connector metadata safegaurds (#3407) ### Description At times, the google drive response doens't have some of the metadata we're grabbing to populate the `FileData` metadata. This is fine, but without the added safegaurds, this can cause a `KeyError`. --- .../ingest/v2/processes/connectors/google_drive.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/unstructured/ingest/v2/processes/connectors/google_drive.py b/unstructured/ingest/v2/processes/connectors/google_drive.py index 4b0cff841..8d61671cf 100644 --- a/unstructured/ingest/v2/processes/connectors/google_drive.py +++ b/unstructured/ingest/v2/processes/connectors/google_drive.py @@ -129,15 +129,15 @@ class GoogleDriveIndexer(Indexer): def map_file_data(f: dict) -> FileData: file_id = f["id"] filename = f.pop("name") - url = f.pop("webContentLink") + url = f.pop("webContentLink", None) version = f.pop("version", None) permissions = f.pop("permissions", None) - date_created_str = f.pop("createdTime") - date_created_dt = parser.parse(date_created_str) - date_modified_str = f.pop("modifiedTime") + date_created_str = f.pop("createdTime", None) + date_created_dt = parser.parse(date_created_str) if date_created_str else None + date_modified_str = f.pop("modifiedTime", None) parent_path = f.pop("parent_path", None) parent_root_path = f.pop("parent_root_path", None) - date_modified_dt = parser.parse(date_modified_str) + date_modified_dt = parser.parse(date_modified_str) if date_modified_str else None if ( parent_path and isinstance(parent_path, str)