mirror of
				https://github.com/open-metadata/OpenMetadata.git
				synced 2025-10-31 02:29:03 +00:00 
			
		
		
		
	Simplify bigquery usage (#5728)
This commit is contained in:
		
							parent
							
								
									7734adb3db
								
							
						
					
					
						commit
						e3223f6a20
					
				| @ -73,6 +73,12 @@ | |||||||
|       "type": "string", |       "type": "string", | ||||||
|       "default": "us" |       "default": "us" | ||||||
|     }, |     }, | ||||||
|  |     "usageLocation": { | ||||||
|  |       "title": "Usage Location", | ||||||
|  |       "description": "Location used to query INFORMATION_SCHEMA.JOBS_BY_PROJECT to fetch usage data. You can pass multi-regions, such as `us` or `eu`, or you specific region. Australia and Asia multi-regions are not yet in GA.", | ||||||
|  |       "type": "string", | ||||||
|  |       "default": "us" | ||||||
|  |     }, | ||||||
|     "connectionOptions": { |     "connectionOptions": { | ||||||
|       "title": "Connection Options", |       "title": "Connection Options", | ||||||
|       "$ref": "../connectionBasicType.json#/definitions/connectionOptions" |       "$ref": "../connectionBasicType.json#/definitions/connectionOptions" | ||||||
|  | |||||||
| @ -8,16 +8,12 @@ | |||||||
| #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||||
| #  See the License for the specific language governing permissions and | #  See the License for the specific language governing permissions and | ||||||
| #  limitations under the License. | #  limitations under the License. | ||||||
| 
 | """ | ||||||
| import collections | Handle big query usage extraction | ||||||
| 
 | """ | ||||||
| # This import verifies that the dependencies are available. |  | ||||||
| import os |  | ||||||
| from datetime import datetime | from datetime import datetime | ||||||
| from typing import Any, Dict, Iterable, Optional |  | ||||||
| 
 | 
 | ||||||
| from google import auth | from google import auth | ||||||
| from google.cloud import logging |  | ||||||
| 
 | 
 | ||||||
| from metadata.generated.schema.entity.services.connections.database.bigQueryConnection import ( | from metadata.generated.schema.entity.services.connections.database.bigQueryConnection import ( | ||||||
|     BigQueryConnection, |     BigQueryConnection, | ||||||
| @ -31,11 +27,11 @@ from metadata.generated.schema.entity.services.databaseService import ( | |||||||
| from metadata.generated.schema.metadataIngestion.workflow import ( | from metadata.generated.schema.metadataIngestion.workflow import ( | ||||||
|     Source as WorkflowSource, |     Source as WorkflowSource, | ||||||
| ) | ) | ||||||
| from metadata.generated.schema.type.tableQuery import TableQueries, TableQuery |  | ||||||
| from metadata.ingestion.api.source import InvalidSourceException | from metadata.ingestion.api.source import InvalidSourceException | ||||||
| from metadata.ingestion.source.database.usage_source import UsageSource | from metadata.ingestion.source.database.usage_source import UsageSource | ||||||
| from metadata.utils.credentials import set_google_credentials | from metadata.utils.credentials import set_google_credentials | ||||||
| from metadata.utils.logger import ingestion_logger | from metadata.utils.logger import ingestion_logger | ||||||
|  | from metadata.utils.sql_queries import BIGQUERY_USAGE_STATEMENT | ||||||
| 
 | 
 | ||||||
| logger = ingestion_logger() | logger = ingestion_logger() | ||||||
| 
 | 
 | ||||||
| @ -46,14 +42,11 @@ class BigqueryUsageSource(UsageSource): | |||||||
| 
 | 
 | ||||||
|     def __init__(self, config: WorkflowSource, metadata_config: OpenMetadataConnection): |     def __init__(self, config: WorkflowSource, metadata_config: OpenMetadataConnection): | ||||||
|         super().__init__(config, metadata_config) |         super().__init__(config, metadata_config) | ||||||
|         self.temp_credentials = None |  | ||||||
| 
 | 
 | ||||||
|         self.project_id = self.set_project_id() |         self.project_id = self.set_project_id() | ||||||
|  |         self.database = self.project_id | ||||||
| 
 | 
 | ||||||
|         self.logger_name = "cloudaudit.googleapis.com%2Fdata_access" |         self.sql_stmt = BIGQUERY_USAGE_STATEMENT | ||||||
|         self.logging_client = logging.Client() |  | ||||||
|         self.usage_logger = self.logging_client.logger(self.logger_name) |  | ||||||
|         logger.debug("Listing entries for logger {}:".format(self.usage_logger.name)) |  | ||||||
| 
 | 
 | ||||||
|     @classmethod |     @classmethod | ||||||
|     def create(cls, config_dict, metadata_config: OpenMetadataConnection): |     def create(cls, config_dict, metadata_config: OpenMetadataConnection): | ||||||
| @ -70,61 +63,17 @@ class BigqueryUsageSource(UsageSource): | |||||||
| 
 | 
 | ||||||
|         return cls(config, metadata_config) |         return cls(config, metadata_config) | ||||||
| 
 | 
 | ||||||
|  |     def get_sql_statement(self, start_time: datetime, end_time: datetime) -> str: | ||||||
|  |         """ | ||||||
|  |         returns sql statement to fetch query logs | ||||||
|  |         """ | ||||||
|  |         return self.sql_stmt.format( | ||||||
|  |             start_time=start_time, | ||||||
|  |             end_time=end_time, | ||||||
|  |             region=self.connection.usageLocation, | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|     @staticmethod |     @staticmethod | ||||||
|     def set_project_id(): |     def set_project_id(): | ||||||
|         _, project_id = auth.default() |         _, project_id = auth.default() | ||||||
|         return project_id |         return project_id | ||||||
| 
 |  | ||||||
|     def get_table_query(self, entries: Iterable): |  | ||||||
|         query_list = [] |  | ||||||
|         for entry in entries: |  | ||||||
|             timestamp = entry.timestamp.isoformat() |  | ||||||
|             timestamp = datetime.strptime(timestamp[0:10], "%Y-%m-%d") |  | ||||||
|             if timestamp >= self.start and timestamp <= self.end: |  | ||||||
|                 if ("query" in str(entry.payload)) and type( |  | ||||||
|                     entry.payload |  | ||||||
|                 ) == collections.OrderedDict: |  | ||||||
|                     payload = list(entry.payload.items())[-1][1] |  | ||||||
|                     if "jobChange" in payload: |  | ||||||
|                         logger.debug(f"\nEntries: {payload}") |  | ||||||
|                         if "queryConfig" in payload["jobChange"]["job"]["jobConfig"]: |  | ||||||
|                             queryConfig = payload["jobChange"]["job"]["jobConfig"][ |  | ||||||
|                                 "queryConfig" |  | ||||||
|                             ] |  | ||||||
|                         else: |  | ||||||
|                             continue |  | ||||||
|                         jobStats = payload["jobChange"]["job"]["jobStats"] |  | ||||||
|                         statementType = "" |  | ||||||
|                         if hasattr(queryConfig, "statementType"): |  | ||||||
|                             statementType = queryConfig["statementType"] |  | ||||||
|                         database = self.project_id |  | ||||||
|                         analysis_date = str( |  | ||||||
|                             datetime.strptime( |  | ||||||
|                                 jobStats["startTime"][0:19], "%Y-%m-%dT%H:%M:%S" |  | ||||||
|                             ).strftime("%Y-%m-%d %H:%M:%S") |  | ||||||
|                         ) |  | ||||||
|                         logger.debug(f"Query :{statementType}:{queryConfig['query']}") |  | ||||||
|                         tq = TableQuery( |  | ||||||
|                             query=queryConfig["query"], |  | ||||||
|                             userName=entry.resource.labels["project_id"], |  | ||||||
|                             startTime=str(jobStats["startTime"]), |  | ||||||
|                             endTime=str(jobStats["endTime"]), |  | ||||||
|                             analysisDate=analysis_date, |  | ||||||
|                             aborted=0, |  | ||||||
|                             databaseName=str(database), |  | ||||||
|                             serviceName=self.config.serviceName, |  | ||||||
|                             databaseSchema=None, |  | ||||||
|                         ) |  | ||||||
|                         query_list.append(tq) |  | ||||||
|         return query_list |  | ||||||
| 
 |  | ||||||
|     def _get_raw_extract_iter(self) -> Optional[Iterable[Dict[str, Any]]]: |  | ||||||
|         entries = self.usage_logger.list_entries() |  | ||||||
|         yield TableQueries( |  | ||||||
|             queries=self.get_table_query(entries), |  | ||||||
|         ) |  | ||||||
| 
 |  | ||||||
|     def close(self): |  | ||||||
|         super().close() |  | ||||||
|         if self.temp_credentials: |  | ||||||
|             os.unlink(self.temp_credentials) |  | ||||||
|  | |||||||
| @ -340,3 +340,18 @@ SNOWFLAKE_GET_COMMENTS = """ | |||||||
|     WHERE TABLE_SCHEMA = '{schema_name}' |     WHERE TABLE_SCHEMA = '{schema_name}' | ||||||
|       AND TABLE_NAME = '{table_name}' |       AND TABLE_NAME = '{table_name}' | ||||||
| """ | """ | ||||||
|  | 
 | ||||||
|  | BIGQUERY_USAGE_STATEMENT = """ | ||||||
|  |  SELECT | ||||||
|  |    project_id as database_name, | ||||||
|  |    user_email as user_name, | ||||||
|  |    statement_type as query_type, | ||||||
|  |    start_time, | ||||||
|  |    end_time, | ||||||
|  |    query as query_text, | ||||||
|  |    null as schema_name | ||||||
|  | FROM `region-{region}`.INFORMATION_SCHEMA.JOBS_BY_PROJECT | ||||||
|  | WHERE creation_time BETWEEN "{start_time}" AND "{end_time}" | ||||||
|  |  AND job_type = "QUERY" | ||||||
|  |  AND state = "DONE" | ||||||
|  | """ | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Pere Miquel Brull
						Pere Miquel Brull