Powerbi pagination fix (#9784)

* Fixed powerbi pagination logic

* Added yaml changes
This commit is contained in:
Onkar Ravgan 2023-01-18 17:17:13 +05:30 committed by GitHub
parent da856108ad
commit 08a4d33c5f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 38 additions and 14 deletions

View File

@ -1,6 +1,6 @@
source:
type: powerbi
serviceName: local_power11
serviceName: local_powerbi
serviceConnection:
config:
clientId: client_id
@ -8,6 +8,7 @@ source:
tenantId: tenant_id
scope:
- https://analysis.windows.net/powerbi/api/.default
pagination_entity_per_page: 100
type: PowerBI
sourceConfig:
config:

View File

@ -98,7 +98,7 @@ class PowerBiApiClient:
dict
"""
try:
entities_per_page = 1000
entities_per_page = min(100, self.config.pagination_entity_per_page)
params_data = {"$top": "1"}
response = self.client.get("/myorg/admin/groups", data=params_data)
count = response.get("@odata.count")

View File

@ -56,7 +56,10 @@ class PowerbiSource(DashboardServiceSource):
):
super().__init__(config, metadata_config)
self.workspace_data = {}
self.pagination_entity_per_page = min(
100, self.service_connection.pagination_entity_per_page
)
self.workspace_data = []
def prepare(self):
# fetch all the workspace ids
@ -65,20 +68,34 @@ class PowerbiSource(DashboardServiceSource):
workspace_id_list = [workspace.get("id") for workspace in workspaces]
# Start the scan of the available workspaces for dashboard metadata
workspace_scan = self.client.initiate_workspace_scan(workspace_id_list)
workspace_scan_id = workspace_scan.get("id")
workspace_paginated_list = [
workspace_id_list[i : i + self.pagination_entity_per_page]
for i in range(
0, len(workspace_id_list), self.pagination_entity_per_page
)
]
count = 1
for workspace_ids_chunk in workspace_paginated_list:
logger.info(
f"Scanning {count}/{len(workspace_paginated_list)} set of workspaces"
)
workspace_scan = self.client.initiate_workspace_scan(
workspace_ids_chunk
)
workspace_scan_id = workspace_scan.get("id")
# Keep polling the scan status endpoint to check if scan is succeeded
workspace_scan_status = self.client.wait_for_scan_complete(
scan_id=workspace_scan_id
)
if workspace_scan_status:
response = self.client.fetch_workspace_scan_result(
# Keep polling the scan status endpoint to check if scan is succeeded
workspace_scan_status = self.client.wait_for_scan_complete(
scan_id=workspace_scan_id
)
self.workspace_data = response.get("workspaces")
else:
logger.error("Error in fetching dashboards and charts")
if workspace_scan_status:
response = self.client.fetch_workspace_scan_result(
scan_id=workspace_scan_id
)
self.workspace_data.extend(response.get("workspaces"))
else:
logger.error("Error in fetching dashboards and charts")
count += 1
else:
logger.error("Unable to fetch any Powerbi workspaces")
return super().prepare()

View File

@ -59,6 +59,12 @@
},
"default": ["https://analysis.windows.net/powerbi/api/.default"]
},
"pagination_entity_per_page": {
"title": "Pagination Entity Per Page",
"description": "Entity Limit set here will be used to paginate the PowerBi APIs",
"type": "integer",
"default": 100
},
"supportsMetadataExtraction": {
"title": "Supports Metadata Extraction",
"$ref": "../connectionBasicType.json#/definitions/supportsMetadataExtraction"