2024-09-25 10:49:44 +05:30 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								---
							 
						 
					
						
							
								
									
										
										
										
											2025-06-20 19:46:38 +05:30 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								title: GCS Datalake Connector | OpenMetadata Cloud Storage Integration
							 
						 
					
						
							
								
									
										
										
										
											2024-09-25 10:49:44 +05:30 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								slug: /connectors/database/gcs-datalake
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								---
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								{% connectorDetailsHeader
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								name="GCS Datalake"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								stage="PROD"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								platform="OpenMetadata"
							 
						 
					
						
							
								
									
										
										
										
											2025-03-03 14:39:14 +05:30 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								availableFeatures=["Metadata", "Data Profiler", "Data Quality", "Sample Data"]
							 
						 
					
						
							
								
									
										
										
										
											2024-09-25 10:49:44 +05:30 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								unavailableFeatures=["Query Usage", "Lineage", "Column-level Lineage", "Owners", "dbt", "Tags", "Stored Procedures"]
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/ %}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								In this section, we provide guides and references to use the GCS Datalake connector.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								Configure and schedule GCS Datalake metadata and profiler workflows from the OpenMetadata UI:
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								-  [Requirements ](#requirements ) 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								-  [Metadata Ingestion ](#metadata-ingestion ) 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								-  [Data Profiler ](/how-to-guides/data-quality-observability/profiler/workflow ) 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								-  [Data Quality ](/how-to-guides/data-quality-observability/quality ) 
						 
					
						
							
								
									
										
										
										
											2025-04-07 09:49:37 +05:30 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								-  [Troubleshooting ](/connectors/database/gcs-datalake/troubleshooting ) 
						 
					
						
							
								
									
										
										
										
											2024-09-25 10:49:44 +05:30 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2025-06-27 12:22:38 +05:30 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								{% partial file="/v1.9/connectors/ingestion-modes-tiles.md" variables={yamlPath: "/connectors/database/gcs-datalake/yaml"} /%}
							 
						 
					
						
							
								
									
										
										
										
											2024-09-25 10:49:44 +05:30 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								## Requirements
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								{% note %}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								The GCS Datalake connector supports extracting metadata from file types `JSON` , `CSV` , `TSV`  &  `Parquet` .
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								{% /note %}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								## Metadata Ingestion
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								{% partial 
							 
						 
					
						
							
								
									
										
										
										
											2025-06-27 12:22:38 +05:30 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								  file="/v1.9/connectors/metadata-ingestion-ui.md" 
							 
						 
					
						
							
								
									
										
										
										
											2024-09-25 10:49:44 +05:30 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								  variables={
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    connector: "Datalake", 
							 
						 
					
						
							
								
									
										
										
										
											2025-06-27 12:22:38 +05:30 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    selectServicePath: "/images/v1.9/connectors/datalake/select-service.png",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    addNewServicePath: "/images/v1.9/connectors/datalake/add-new-service.png",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    serviceConnectionPath: "/images/v1.9/connectors/datalake/service-connection.png",
							 
						 
					
						
							
								
									
										
										
										
											2024-09-25 10:49:44 +05:30 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/%}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								{% stepsContainer %}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								{% extraContent parentTagName="stepsContainer" %}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								#### Connection Details for GCS
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								-  **Bucket Name**: A bucket name in DataLake is a unique identifier used to organize and store data objects. 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  It's similar to a folder name, but it's used for object storage rather than file storage.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								-  **Prefix**: The prefix of a data source in datalake refers to the first part of the data path that identifies the source or origin of the data. It's used to organize and categorize data within the datalake, and can help users easily locate and access the data they need. 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								**GCS Credentials**
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								We support two ways of authenticating to GCS:
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								1.  Passing the raw credential values provided by BigQuery. This requires us to provide the following information, all provided by BigQuery: 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   1.  Credentials type, e.g. `service_account` .
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   2.  Project ID
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   3.  Private Key ID
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   4.  Private Key
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   5.  Client Email
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   6.  Client ID
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   7.  Auth URI, [https://accounts.google.com/o/oauth2/auth ](https://accounts.google.com/o/oauth2/auth ) by default
							 
						 
					
						
							
								
									
										
										
										
											2025-06-20 19:46:24 +05:30 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   8.  Token URI, **https://oauth2.googleapis.com/token**  by default
							 
						 
					
						
							
								
									
										
										
										
											2024-09-25 10:49:44 +05:30 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								   9.  Authentication Provider X509 Certificate URL, [https://www.googleapis.com/oauth2/v1/certs ](https://www.googleapis.com/oauth2/v1/certs ) by default
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   10.  Client X509 Certificate URL
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2025-06-27 12:22:38 +05:30 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								{% partial file="/v1.9/connectors/database/advanced-configuration.md" /%}
							 
						 
					
						
							
								
									
										
										
										
											2024-09-25 10:49:44 +05:30 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								{% /extraContent %}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2025-06-27 12:22:38 +05:30 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								{% partial file="/v1.9/connectors/test-connection.md" /%}
							 
						 
					
						
							
								
									
										
										
										
											2024-09-25 10:49:44 +05:30 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2025-06-27 12:22:38 +05:30 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								{% partial file="/v1.9/connectors/database/configure-ingestion.md" /%}
							 
						 
					
						
							
								
									
										
										
										
											2024-09-25 10:49:44 +05:30 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2025-06-27 12:22:38 +05:30 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								{% partial file="/v1.9/connectors/ingestion-schedule-and-deploy.md" /%}
							 
						 
					
						
							
								
									
										
										
										
											2024-09-25 10:49:44 +05:30 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								{% /stepsContainer %}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2025-06-27 12:22:38 +05:30 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								{% partial file="/v1.9/connectors/database/related.md" /%}