diff --git a/ingestion/src/metadata/examples/workflows/powerbi.yaml b/ingestion/src/metadata/examples/workflows/powerbi.yaml index cf0749a755c..633543c6979 100644 --- a/ingestion/src/metadata/examples/workflows/powerbi.yaml +++ b/ingestion/src/metadata/examples/workflows/powerbi.yaml @@ -1,6 +1,6 @@ source: type: powerbi - serviceName: local_power11 + serviceName: local_powerbi serviceConnection: config: clientId: client_id @@ -8,6 +8,7 @@ source: tenantId: tenant_id scope: - https://analysis.windows.net/powerbi/api/.default + pagination_entity_per_page: 100 type: PowerBI sourceConfig: config: diff --git a/ingestion/src/metadata/ingestion/source/dashboard/powerbi/client.py b/ingestion/src/metadata/ingestion/source/dashboard/powerbi/client.py index 5bc5a3cb374..5bf9551c490 100644 --- a/ingestion/src/metadata/ingestion/source/dashboard/powerbi/client.py +++ b/ingestion/src/metadata/ingestion/source/dashboard/powerbi/client.py @@ -98,7 +98,7 @@ class PowerBiApiClient: dict """ try: - entities_per_page = 1000 + entities_per_page = min(100, self.config.pagination_entity_per_page) params_data = {"$top": "1"} response = self.client.get("/myorg/admin/groups", data=params_data) count = response.get("@odata.count") diff --git a/ingestion/src/metadata/ingestion/source/dashboard/powerbi/metadata.py b/ingestion/src/metadata/ingestion/source/dashboard/powerbi/metadata.py index 3e28e2ec9ad..ea886fcb3ef 100644 --- a/ingestion/src/metadata/ingestion/source/dashboard/powerbi/metadata.py +++ b/ingestion/src/metadata/ingestion/source/dashboard/powerbi/metadata.py @@ -56,7 +56,10 @@ class PowerbiSource(DashboardServiceSource): ): super().__init__(config, metadata_config) - self.workspace_data = {} + self.pagination_entity_per_page = min( + 100, self.service_connection.pagination_entity_per_page + ) + self.workspace_data = [] def prepare(self): # fetch all the workspace ids @@ -65,20 +68,34 @@ class PowerbiSource(DashboardServiceSource): workspace_id_list = [workspace.get("id") for workspace in workspaces] # Start the scan of the available workspaces for dashboard metadata - workspace_scan = self.client.initiate_workspace_scan(workspace_id_list) - workspace_scan_id = workspace_scan.get("id") + workspace_paginated_list = [ + workspace_id_list[i : i + self.pagination_entity_per_page] + for i in range( + 0, len(workspace_id_list), self.pagination_entity_per_page + ) + ] + count = 1 + for workspace_ids_chunk in workspace_paginated_list: + logger.info( + f"Scanning {count}/{len(workspace_paginated_list)} set of workspaces" + ) + workspace_scan = self.client.initiate_workspace_scan( + workspace_ids_chunk + ) + workspace_scan_id = workspace_scan.get("id") - # Keep polling the scan status endpoint to check if scan is succeeded - workspace_scan_status = self.client.wait_for_scan_complete( - scan_id=workspace_scan_id - ) - if workspace_scan_status: - response = self.client.fetch_workspace_scan_result( + # Keep polling the scan status endpoint to check if scan is succeeded + workspace_scan_status = self.client.wait_for_scan_complete( scan_id=workspace_scan_id ) - self.workspace_data = response.get("workspaces") - else: - logger.error("Error in fetching dashboards and charts") + if workspace_scan_status: + response = self.client.fetch_workspace_scan_result( + scan_id=workspace_scan_id + ) + self.workspace_data.extend(response.get("workspaces")) + else: + logger.error("Error in fetching dashboards and charts") + count += 1 else: logger.error("Unable to fetch any Powerbi workspaces") return super().prepare() diff --git a/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/dashboard/powerBIConnection.json b/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/dashboard/powerBIConnection.json index a7002ecc5aa..bf593a144c5 100644 --- a/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/dashboard/powerBIConnection.json +++ b/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/dashboard/powerBIConnection.json @@ -59,6 +59,12 @@ }, "default": ["https://analysis.windows.net/powerbi/api/.default"] }, + "pagination_entity_per_page": { + "title": "Pagination Entity Per Page", + "description": "Entity Limit set here will be used to paginate the PowerBi APIs", + "type": "integer", + "default": 100 + }, "supportsMetadataExtraction": { "title": "Supports Metadata Extraction", "$ref": "../connectionBasicType.json#/definitions/supportsMetadataExtraction"