1824 lines
68 KiB
Python
Raw Normal View History

import datetime
import logging
import re
import sys
from typing import Any, Dict, List, Optional, cast
2022-03-01 07:07:22 +05:30
from unittest import mock
from unittest.mock import MagicMock
2022-03-01 07:07:22 +05:30
import pytest
2022-03-01 07:07:22 +05:30
from freezegun import freeze_time
from datahub.ingestion.run.pipeline import Pipeline
from datahub.ingestion.source.powerbi.config import (
Constant,
PowerBiDashboardSourceConfig,
SupportedDataPlatform,
)
from datahub.ingestion.source.powerbi.powerbi import PowerBiDashboardSource
from datahub.ingestion.source.powerbi.rest_api_wrapper.data_classes import (
Page,
Report,
Workspace,
)
from tests.test_helpers import mce_helpers, test_connection_helpers
2022-03-01 07:07:22 +05:30
pytestmark = pytest.mark.integration_batch_2
2022-03-01 07:07:22 +05:30
FROZEN_TIME = "2022-02-03 07:00:00"
def enable_logging():
# set logging to console
logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))
logging.getLogger().setLevel(logging.DEBUG)
class MsalClient:
call_num = 0
token: Dict[str, Any] = {
"access_token": "dummy",
}
@staticmethod
def acquire_token_for_client(*args, **kwargs):
MsalClient.call_num += 1
return MsalClient.token
@staticmethod
def reset():
MsalClient.call_num = 0
2022-03-01 07:07:22 +05:30
def mock_msal_cca(*args, **kwargs):
return MsalClient()
2022-03-01 07:07:22 +05:30
def scan_init_response(request, context):
# Request mock is passing POST input in the form of workspaces=<workspace_id>
# If we scan 2 or more, it get messy like this. 'workspaces=64ED5CAD-7C10-4684-8180-826122881108&workspaces=64ED5CAD-7C22-4684-8180-826122881108'
workspace_id_list = request.text.replace("&", "").split("workspaces=")
workspace_id = "||".join(workspace_id_list[1:])
w_id_vs_response: Dict[str, Any] = {
"64ED5CAD-7C10-4684-8180-826122881108": {
"id": "4674efd1-603c-4129-8d82-03cf2be05aff"
},
"64ED5CAD-7C22-4684-8180-826122881108": {
"id": "a674efd1-603c-4129-8d82-03cf2be05aff"
},
"64ED5CAD-7C10-4684-8180-826122881108||64ED5CAD-7C22-4684-8180-826122881108": {
"id": "a674efd1-603c-4129-8d82-03cf2be05aff"
},
}
return w_id_vs_response[workspace_id]
def register_mock_api(request_mock: Any, override_data: Optional[dict] = None) -> None:
override_data = override_data or {}
api_vs_response = {
"https://api.powerbi.com/v1.0/myorg/groups": {
"method": "GET",
"status_code": 200,
"json": {
"@odata.count": 3,
"value": [
{
"id": "64ED5CAD-7C10-4684-8180-826122881108",
"isReadOnly": True,
"name": "demo-workspace",
"type": "Workspace",
},
{
"id": "64ED5CAD-7C22-4684-8180-826122881108",
"isReadOnly": True,
"name": "second-demo-workspace",
"type": "Workspace",
},
{
"id": "64ED5CAD-7322-4684-8180-826122881108",
"isReadOnly": True,
"name": "Workspace 2",
"type": "Workspace",
},
],
},
},
"https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/dashboards": {
"method": "GET",
"status_code": 200,
"json": {
"value": [
{
"id": "7D668CAD-7FFC-4505-9215-655BCA5BEBAE",
"isReadOnly": True,
"displayName": "test_dashboard",
"description": "Description of test dashboard",
"embedUrl": "https://localhost/dashboards/embed/1",
"webUrl": "https://localhost/dashboards/web/1",
}
]
},
},
"https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C22-4684-8180-826122881108/dashboards": {
"method": "GET",
"status_code": 200,
"json": {
"value": [
{
"id": "7D668CAD-8FFC-4505-9215-655BCA5BEBAE",
"isReadOnly": True,
"displayName": "test_dashboard2",
"embedUrl": "https://localhost/dashboards/embed/1",
"webUrl": "https://localhost/dashboards/web/1",
}
]
},
},
"https://api.powerbi.com/v1.0/myorg/admin/reports/5b218778-e7a5-4d73-8187-f10824047715/users": {
"method": "GET",
"status_code": 200,
"json": {
"value": [
{
"identifier": "User1@foo.com",
"displayName": "user1",
"emailAddress": "User1@foo.com",
"datasetUserAccessRight": "ReadWrite",
"graphId": "C9EE53F2-88EA-4711-A173-AF0515A3CD46",
"principalType": "User",
},
{
"identifier": "User2@foo.com",
"displayName": "user2",
"emailAddress": "User2@foo.com",
"datasetUserAccessRight": "ReadWrite",
"graphId": "C9EE53F2-88EA-4711-A173-AF0515A5REWS",
"principalType": "User",
},
]
},
},
"https://api.powerbi.com/v1.0/myorg/admin/dashboards/7D668CAD-7FFC-4505-9215-655BCA5BEBAE/users": {
"method": "GET",
"status_code": 200,
"json": {
"value": [
{
"identifier": "User1@foo.com",
"displayName": "user1",
"emailAddress": "User1@foo.com",
"datasetUserAccessRight": "ReadWrite",
"graphId": "C9EE53F2-88EA-4711-A173-AF0515A3CD46",
"principalType": "User",
},
{
"identifier": "User2@foo.com",
"displayName": "user2",
"emailAddress": "User2@foo.com",
"datasetUserAccessRight": "ReadWrite",
"graphId": "C9EE53F2-88EA-4711-A173-AF0515A5REWS",
"principalType": "User",
},
]
},
},
"https://api.powerbi.com/v1.0/myorg/admin/dashboards/7D668CAD-8FFC-4505-9215-655BCA5BEBAE/users": {
"method": "GET",
"status_code": 200,
"json": {
"value": [
{
"identifier": "User3@foo.com",
"displayName": "user3",
"emailAddress": "User3@foo.com",
"datasetUserAccessRight": "ReadWrite",
"graphId": "C9EE53F2-88EA-4711-A173-AF0515A3CD46",
"principalType": "User",
},
{
"identifier": "User4@foo.com",
"displayName": "user4",
"emailAddress": "User4@foo.com",
"datasetUserAccessRight": "ReadWrite",
"graphId": "C9EE53F2-88EA-4711-A173-AF0515A5REWS",
"principalType": "User",
},
]
},
},
"https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/dashboards/7D668CAD-7FFC-4505-9215-655BCA5BEBAE/tiles": {
"method": "GET",
"status_code": 200,
"json": {
"value": [
{
"id": "B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0",
"title": "test_tile",
"embedUrl": "https://localhost/tiles/embed/1",
"datasetId": "05169CD2-E713-41E6-9600-1D8066D95445",
},
{
"id": "23212598-23b5-4980-87cc-5fc0ecd84385",
"title": "yearly_sales",
"embedUrl": "https://localhost/tiles/embed/2",
"datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed",
},
]
},
},
"https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C22-4684-8180-826122881108/dashboards/7D668CAD-8FFC-4505-9215-655BCA5BEBAE/tiles": {
"method": "GET",
"status_code": 200,
"json": {"value": []},
},
"https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445": {
"method": "GET",
"status_code": 200,
"json": {
"id": "05169CD2-E713-41E6-9600-1D8066D95445",
"name": "library-dataset",
"description": "Library dataset description",
"webUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445",
},
},
"https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C22-4684-8180-826122881108/datasets/05169CD2-E713-41E6-96AA-1D8066D95445": {
"method": "GET",
"status_code": 200,
"json": {
"id": "05169CD2-E713-41E6-96AA-1D8066D95445",
"name": "library-dataset",
"description": "Library dataset description",
"webUrl": "http://localhost/groups/64ED5CAD-7C22-4684-8180-826122881108/datasets/05169CD2-E713-41E6-96AA-1D8066D95445",
},
},
"https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed": {
"method": "GET",
"status_code": 200,
"json": {
"id": "ba0130a1-5b03-40de-9535-b34e778ea6ed",
"name": "hr_pbi_test",
"description": "hr pbi test description",
"webUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed",
},
},
"https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/datasources": {
"method": "GET",
"status_code": 200,
"json": {
"value": [
{
"datasourceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3",
"datasourceType": "PostgreSql",
"connectionDetails": {
"database": "library_db",
"server": "foo",
2022-03-01 07:07:22 +05:30
},
},
]
},
},
"https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C22-4684-8180-826122881108/datasets/05169CD2-E713-41E6-96AA-1D8066D95445/datasources": {
"method": "GET",
"status_code": 200,
"json": {
"value": [
{
"datasourceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3",
"datasourceType": "PostgreSql",
"connectionDetails": {
"database": "library_db",
"server": "foo",
},
},
]
},
},
"https://api.powerbi.com/v1.0/myorg/admin/workspaces/scanStatus/4674efd1-603c-4129-8d82-03cf2be05aff": {
"method": "GET",
"status_code": 200,
"json": {
"status": "SUCCEEDED",
},
},
"https://api.powerbi.com/v1.0/myorg/admin/workspaces/scanStatus/a674efd1-603c-4129-8d82-03cf2be05aff": {
"method": "GET",
"status_code": 200,
"json": {
"status": "SUCCEEDED",
},
},
"https://api.powerbi.com/v1.0/myorg/admin/workspaces/scanResult/4674efd1-603c-4129-8d82-03cf2be05aff": {
"method": "GET",
"status_code": 200,
"json": {
"workspaces": [
{
"id": "64ED5CAD-7C10-4684-8180-826122881108",
"name": "demo-workspace",
"state": "Active",
"datasets": [
{
"id": "05169CD2-E713-41E6-9600-1D8066D95445",
"endorsementDetails": {"endorsement": "Promoted"},
"name": "test_sf_pbi_test",
"tables": [
{
"name": "public issue_history",
"source": [
{
"expression": "dummy",
}
],
"datasourceUsages": [
{
"datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3",
}
],
},
{
"name": "SNOWFLAKE_TESTTABLE",
"source": [
{
"expression": 'let\n Source = Snowflake.Databases("hp123rt5.ap-southeast-2.fakecomputing.com","PBI_TEST_WAREHOUSE_PROD",[Role="PBI_TEST_MEMBER"]),\n PBI_TEST_Database = Source{[Name="PBI_TEST",Kind="Database"]}[Data],\n TEST_Schema = PBI_TEST_Database{[Name="TEST",Kind="Schema"]}[Data],\n TESTTABLE_Table = TEST_Schema{[Name="TESTTABLE",Kind="Table"]}[Data]\nin\n TESTTABLE_Table',
}
],
"datasourceUsages": [
{
"datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3",
}
],
},
{
"name": "snowflake native-query",
"source": [
{
"expression": 'let\n Source = Value.NativeQuery(Snowflake.Databases("bu20658.ap-southeast-2.snowflakecomputing.com","operations_analytics_warehouse_prod",[Role="OPERATIONS_ANALYTICS_MEMBER"]){[Name="OPERATIONS_ANALYTICS"]}[Data], "SELECT#(lf)concat((UPPER(REPLACE(SELLER,\'-\',\'\'))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,\'-\',\'\'))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4", null, [EnableFolding=true]),\n #"Added Conditional Column" = Table.AddColumn(Source, "SME Units ENT", each if [DEAL_TYPE] = "SME Unit" then [UNIT] else 0),\n #"Added Conditional Column1" = Table.AddColumn(#"Added Conditional Column", "Banklink Units", each if [DEAL_TYPE] = "Banklink" then [UNIT] else 0),\n #"Removed Columns" = Table.RemoveColumns(#"Added Conditional Column1",{"Banklink Units"}),\n #"Added Custom" = Table.AddColumn(#"Removed Columns", "Banklink Units", each if [DEAL_TYPE] = "Banklink" and [SALES_TYPE] = "3 - Upsell"\nthen [UNIT]\n\nelse if [SALES_TYPE] = "Adjusted BL Migration"\nthen [UNIT]\n\nelse 0),\n #"Added Custom1" = Table.AddColumn(#"Added Custom", "SME Units in $ (*$361)", each if [DEAL_TYPE] = "SME Unit" \nand [SALES_TYPE] <> "4 - Renewal"\n then [UNIT] * 361\nelse 0),\n #"Added Custom2" = Table.AddColumn(#"Added Custom1", "Banklink in $ (*$148)", each [Banklink Units] * 148)\nin\n #"Added Custom2"',
}
],
"datasourceUsages": [
{
"datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3",
}
],
},
{
"name": "big-query-with-parameter",
"source": [
{
"expression": 'let\n Source = GoogleBigQuery.Database([BillingProject = #"Parameter - Source"]),\n#"gcp-project" = Source{[Name=#"Parameter - Source"]}[Data],\nuniversal_Schema = #"gcp-project"{[Name="universal",Kind="Schema"]}[Data],\nD_WH_DATE_Table = universal_Schema{[Name="D_WH_DATE",Kind="Table"]}[Data],\n#"Filtered Rows" = Table.SelectRows(D_WH_DATE_Table, each [D_DATE] > #datetime(2019, 9, 10, 0, 0, 0)),\n#"Filtered Rows1" = Table.SelectRows(#"Filtered Rows", each DateTime.IsInPreviousNHours([D_DATE], 87600))\n in \n#"Filtered Rows1"',
}
],
"datasourceUsages": [
{
"datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3",
}
],
},
{
"name": "snowflake native-query-with-join",
"source": [
{
"expression": 'let\n Source = Value.NativeQuery(Snowflake.Databases("xaa48144.snowflakecomputing.com","GSL_TEST_WH",[Role="ACCOUNTADMIN"]){[Name="GSL_TEST_DB"]}[Data], "select A.name from GSL_TEST_DB.PUBLIC.SALES_ANALYST as A inner join GSL_TEST_DB.PUBLIC.SALES_FORECAST as B on A.name = B.name where startswith(A.name, \'mo\')", null, [EnableFolding=true])\nin\n Source',
}
],
"datasourceUsages": [
{
"datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3",
}
],
},
{
"name": "job-history",
"source": [
{
"expression": 'let\n Source = Oracle.Database("localhost:1521/salesdb.domain.com", [HierarchicalNavigation=true]), HR = Source{[Schema="HR"]}[Data], EMPLOYEES1 = HR{[Name="EMPLOYEES"]}[Data] \n in EMPLOYEES1',
}
],
"datasourceUsages": [
{
"datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3",
}
],
},
{
"name": "postgres_test_table",
"source": [
{
"expression": 'let\n Source = PostgreSQL.Database("localhost" , "mics" ),\n public_order_date = Source{[Schema="public",Item="order_date"]}[Data] \n in \n public_order_date',
}
],
"datasourceUsages": [
{
"datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3",
}
],
},
],
},
{
"id": "ba0130a1-5b03-40de-9535-b34e778ea6ed",
"name": "hr_pbi_test",
"tables": [
{
"name": "dbo_book_issue",
"source": [
{
"expression": 'let\n Source = Sql.Database("localhost", "library"),\n dbo_book_issue = Source{[Schema="dbo",Item="book_issue"]}[Data]\n in dbo_book_issue',
}
],
"datasourceUsages": [
{
"datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3",
}
],
},
{
"name": "ms_sql_native_table",
"source": [
{
"expression": 'let\n Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="select *,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,\'-\',\'\'))), MONTH_WID) as CD_AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,\'-\',\'\'))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_PS_CD_RETENTION", CommandTimeout=#duration(0, 1, 30, 0)]),\n #"Changed Type" = Table.TransformColumnTypes(Source,{{"mth_date", type date}}),\n #"Added Custom" = Table.AddColumn(#"Changed Type", "Month", each Date.Month([mth_date])),\n #"Added Custom1" = Table.AddColumn(#"Added Custom", "TPV Opening", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n #"Added Custom1"',
}
],
"datasourceUsages": [
{
"datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3",
}
],
},
],
},
{
"id": "91580e0e-1680-4b1c-bbf9-4f6764d7a5ff",
"tables": [
{
"name": "employee_ctc",
"source": [
{
"expression": "dummy",
}
],
}
],
},
],
"dashboards": [
{
"id": "7D668CAD-7FFC-4505-9215-655BCA5BEBAE",
"isReadOnly": True,
}
],
"reports": [
{
"datasetId": "05169CD2-E713-41E6-9600-1D8066D95445",
"id": "5b218778-e7a5-4d73-8187-f10824047715",
"name": "SalesMarketing",
"description": "Acryl sales marketing report",
}
],
2022-03-01 07:07:22 +05:30
},
]
},
},
"https://api.powerbi.com/v1.0/myorg/admin/workspaces/scanResult/a674efd1-603c-4129-8d82-03cf2be05aff": {
"method": "GET",
"status_code": 200,
"json": {
"workspaces": [
{
"id": "64ED5CAD-7C22-4684-8180-826122881108",
"name": "second-demo-workspace",
"state": "Active",
"datasets": [
{
"id": "05169CD2-E713-41E6-96AA-1D8066D95445",
"tables": [
{
"name": "public articles",
"source": [
{
"expression": "dummy",
}
],
}
],
}
],
"dashboards": [
{
"id": "7D668CAD-8FFC-4505-9215-655BCA5BEBAE",
"isReadOnly": True,
}
],
"reports": [
{
"datasetId": "05169CD2-E713-41E6-9600-1D8066D95445",
"id": "5b218778-e7a5-4d73-8187-f10824047715",
"name": "SalesMarketing",
"description": "Acryl sales marketing report",
}
],
},
]
},
},
"https://api.powerbi.com/v1.0/myorg/admin/workspaces/getInfo": {
"method": "POST",
"status_code": 200,
"json": scan_init_response,
},
"https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/reports": {
"method": "GET",
"status_code": 200,
"json": {
"value": [
{
"datasetId": "05169CD2-E713-41E6-9600-1D8066D95445",
"id": "5b218778-e7a5-4d73-8187-f10824047715",
"name": "SalesMarketing",
"description": "Acryl sales marketing report",
"webUrl": "https://app.powerbi.com/groups/f089354e-8366-4e18-aea3-4cb4a3a50b48/reports/5b218778-e7a5-4d73-8187-f10824047715",
"embedUrl": "https://app.powerbi.com/reportEmbed?reportId=5b218778-e7a5-4d73-8187-f10824047715&groupId=f089354e-8366-4e18-aea3-4cb4a3a50b48",
}
]
},
},
"https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/5b218778-e7a5-4d73-8187-f10824047715": {
"method": "GET",
"status_code": 200,
"json": {
"datasetId": "05169CD2-E713-41E6-9600-1D8066D95445",
"id": "5b218778-e7a5-4d73-8187-f10824047715",
"name": "SalesMarketing",
"description": "Acryl sales marketing report",
"webUrl": "https://app.powerbi.com/groups/f089354e-8366-4e18-aea3-4cb4a3a50b48/reports/5b218778-e7a5-4d73-8187-f10824047715",
"embedUrl": "https://app.powerbi.com/reportEmbed?reportId=5b218778-e7a5-4d73-8187-f10824047715&groupId=f089354e-8366-4e18-aea3-4cb4a3a50b48",
},
},
"https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/5b218778-e7a5-4d73-8187-f10824047715/pages": {
"method": "GET",
"status_code": 200,
"json": {
"value": [
{
"displayName": "Regional Sales Analysis",
"name": "ReportSection",
"order": "0",
},
{
"displayName": "Geographic Analysis",
"name": "ReportSection1",
"order": "1",
},
]
},
},
"https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/parameters": {
"method": "GET",
"status_code": 200,
"json": {
"value": [
{
"name": "Parameter - Source",
"type": "Text",
"isRequired": True,
"currentValue": "my-test-project",
},
{
"name": "My bq project",
"type": "Text",
"isRequired": True,
"currentValue": "gcp_billing",
},
]
},
},
"https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/91580e0e-1680-4b1c-bbf9-4f6764d7a5ff": {
"method": "GET",
"status_code": 200,
"json": {
"id": "91580e0e-1680-4b1c-bbf9-4f6764d7a5ff",
"name": "employee-dataset",
"description": "Employee Management",
"webUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/91580e0e-1680-4b1c-bbf9-4f6764d7a5ff",
},
},
}
api_vs_response.update(override_data)
for url in api_vs_response.keys():
request_mock.register_uri(
api_vs_response[url]["method"],
url,
json=api_vs_response[url]["json"],
status_code=api_vs_response[url]["status_code"],
2022-03-01 07:07:22 +05:30
)
def default_source_config():
return {
"client_id": "foo",
"client_secret": "bar",
"tenant_id": "0B0C960B-FCDF-4D0F-8C45-2E03BB59DDEB",
"workspace_id": "64ED5CAD-7C10-4684-8180-826122881108",
"extract_lineage": False,
"extract_reports": False,
"extract_ownership": True,
"convert_lineage_urns_to_lowercase": False,
"workspace_id_pattern": {"allow": ["64ED5CAD-7C10-4684-8180-826122881108"]},
"dataset_type_mapping": {
"PostgreSql": "postgres",
"Oracle": "oracle",
},
"env": "DEV",
"extract_workspaces_to_containers": False,
"enable_advance_lineage_sql_construct": False,
}
@freeze_time(FROZEN_TIME)
@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
@pytest.mark.integration
def test_powerbi_ingest(
mock_msal: MagicMock,
pytestconfig: pytest.Config,
tmp_path: str,
mock_time: datetime.datetime,
requests_mock: Any,
) -> None:
enable_logging()
test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
register_mock_api(request_mock=requests_mock)
pipeline = Pipeline.create(
{
"run_id": "powerbi-test",
"source": {
"type": "powerbi",
"config": {
**default_source_config(),
},
},
"sink": {
"type": "file",
"config": {
"filename": f"{tmp_path}/powerbi_mces.json",
},
},
}
)
pipeline.run()
pipeline.raise_from_status()
golden_file = "golden_test_ingest.json"
mce_helpers.check_golden_file(
pytestconfig,
output_path=f"{tmp_path}/powerbi_mces.json",
golden_path=f"{test_resources_dir}/{golden_file}",
)
@freeze_time(FROZEN_TIME)
@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
@pytest.mark.integration
def test_powerbi_ingest_patch_disabled(
mock_msal: MagicMock,
pytestconfig: pytest.Config,
tmp_path: str,
mock_time: datetime.datetime,
requests_mock: Any,
) -> None:
enable_logging()
test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
register_mock_api(request_mock=requests_mock)
pipeline = Pipeline.create(
{
"run_id": "powerbi-test",
"source": {
"type": "powerbi",
"config": {
**default_source_config(),
"patch_metadata": False,
},
},
"sink": {
"type": "file",
"config": {
"filename": f"{tmp_path}/powerbi_mces.json",
},
},
}
)
pipeline.run()
pipeline.raise_from_status()
golden_file = "golden_test_ingest_patch_disabled.json"
mce_helpers.check_golden_file(
pytestconfig,
output_path=f"{tmp_path}/powerbi_mces.json",
golden_path=f"{test_resources_dir}/{golden_file}",
)
@freeze_time(FROZEN_TIME)
@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
@pytest.mark.integration
def test_powerbi_test_connection_success(mock_msal):
report = test_connection_helpers.run_test_connection(
PowerBiDashboardSource, default_source_config()
)
test_connection_helpers.assert_basic_connectivity_success(report)
@freeze_time(FROZEN_TIME)
@pytest.mark.integration
def test_powerbi_test_connection_failure():
report = test_connection_helpers.run_test_connection(
PowerBiDashboardSource, default_source_config()
)
test_connection_helpers.assert_basic_connectivity_failure(
report, "Unable to get authority configuration"
)
@freeze_time(FROZEN_TIME)
@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
@pytest.mark.integration
def test_powerbi_platform_instance_ingest(
mock_msal: MagicMock,
pytestconfig: pytest.Config,
tmp_path: str,
mock_time: datetime.datetime,
requests_mock: Any,
) -> None:
enable_logging()
test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
register_mock_api(request_mock=requests_mock)
output_path: str = f"{tmp_path}/powerbi_platform_instance_mces.json"
pipeline = Pipeline.create(
{
"run_id": "powerbi-test",
"source": {
"type": "powerbi",
"config": {
**default_source_config(),
"platform_instance": "aws-ap-south-1",
},
},
"sink": {
"type": "file",
"config": {
"filename": output_path,
},
},
}
)
pipeline.run()
pipeline.raise_from_status()
golden_file = "golden_test_platform_instance_ingest.json"
mce_helpers.check_golden_file(
pytestconfig,
output_path=output_path,
golden_path=f"{test_resources_dir}/{golden_file}",
)
@freeze_time(FROZEN_TIME)
@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
@pytest.mark.integration
def test_powerbi_ingest_urn_lower_case(
mock_msal: MagicMock,
pytestconfig: pytest.Config,
tmp_path: str,
mock_time: datetime.datetime,
requests_mock: Any,
) -> None:
test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
register_mock_api(request_mock=requests_mock)
pipeline = Pipeline.create(
{
"run_id": "powerbi-test",
"source": {
"type": "powerbi",
"config": {
**default_source_config(),
"convert_urns_to_lowercase": True,
"convert_lineage_urns_to_lowercase": True,
},
},
"sink": {
"type": "file",
"config": {
"filename": f"{tmp_path}/powerbi_lower_case_urn_mces.json",
},
},
}
)
pipeline.run()
pipeline.raise_from_status()
golden_file = "golden_test_lower_case_urn_ingest.json"
mce_helpers.check_golden_file(
pytestconfig,
output_path=f"{tmp_path}/powerbi_lower_case_urn_mces.json",
golden_path=f"{test_resources_dir}/{golden_file}",
)
@freeze_time(FROZEN_TIME)
@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
@pytest.mark.integration
def test_override_ownership(
mock_msal: MagicMock,
pytestconfig: pytest.Config,
tmp_path: str,
mock_time: datetime.datetime,
requests_mock: Any,
) -> None:
test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
register_mock_api(request_mock=requests_mock)
pipeline = Pipeline.create(
{
"run_id": "powerbi-test",
"source": {
"type": "powerbi",
"config": {
**default_source_config(),
"extract_ownership": False,
},
},
"sink": {
"type": "file",
"config": {
"filename": f"{tmp_path}/powerbi_mces_disabled_ownership.json",
},
},
}
)
pipeline.run()
pipeline.raise_from_status()
mce_out_file = "golden_test_disabled_ownership.json"
mce_helpers.check_golden_file(
pytestconfig,
output_path=f"{tmp_path}/powerbi_mces_disabled_ownership.json",
golden_path=f"{test_resources_dir}/{mce_out_file}",
)
@freeze_time(FROZEN_TIME)
@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
@pytest.mark.integration
def test_scan_all_workspaces(
mock_msal: MagicMock,
pytestconfig: pytest.Config,
tmp_path: str,
mock_time: datetime.datetime,
requests_mock: Any,
) -> None:
test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
register_mock_api(request_mock=requests_mock)
pipeline = Pipeline.create(
{
"run_id": "powerbi-test",
"source": {
"type": "powerbi",
"config": {
**default_source_config(),
"extract_reports": False,
"extract_ownership": False,
"workspace_id_pattern": {
"deny": ["64ED5CAD-7322-4684-8180-826122881108"],
},
},
},
"sink": {
"type": "file",
"config": {
"filename": f"{tmp_path}/powerbi_mces_scan_all_workspaces.json",
},
},
}
)
pipeline.run()
pipeline.raise_from_status()
golden_file = "golden_test_scan_all_workspaces.json"
mce_helpers.check_golden_file(
pytestconfig,
output_path=f"{tmp_path}/powerbi_mces_scan_all_workspaces.json",
golden_path=f"{test_resources_dir}/{golden_file}",
)
@freeze_time(FROZEN_TIME)
@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
@pytest.mark.integration
def test_extract_reports(
mock_msal: MagicMock,
pytestconfig: pytest.Config,
tmp_path: str,
mock_time: datetime.datetime,
requests_mock: Any,
) -> None:
enable_logging()
test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
register_mock_api(request_mock=requests_mock)
pipeline = Pipeline.create(
{
"run_id": "powerbi-test",
"source": {
"type": "powerbi",
"config": {
**default_source_config(),
"extract_reports": True,
},
},
"sink": {
"type": "file",
"config": {
"filename": f"{tmp_path}/powerbi_report_mces.json",
},
},
}
)
pipeline.run()
pipeline.raise_from_status()
golden_file = "golden_test_report.json"
mce_helpers.check_golden_file(
pytestconfig,
output_path=f"{tmp_path}/powerbi_report_mces.json",
golden_path=f"{test_resources_dir}/{golden_file}",
)
@freeze_time(FROZEN_TIME)
@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
@pytest.mark.integration
def test_extract_lineage(
mock_msal: MagicMock,
pytestconfig: pytest.Config,
tmp_path: str,
mock_time: datetime.datetime,
requests_mock: Any,
) -> None:
enable_logging()
test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
register_mock_api(request_mock=requests_mock)
pipeline = Pipeline.create(
{
"run_id": "powerbi-lineage-test",
"source": {
"type": "powerbi",
"config": {
**default_source_config(),
"extract_lineage": True,
"dataset_type_mapping": {
"PostgreSql": {"platform_instance": "operational_instance"},
"Oracle": {
"platform_instance": "high_performance_production_unit"
},
"Sql": {"platform_instance": "reporting-db"},
"Snowflake": {"platform_instance": "sn-2"},
},
},
},
"sink": {
"type": "file",
"config": {
"filename": f"{tmp_path}/powerbi_lineage_mces.json",
},
},
}
)
pipeline.run()
pipeline.raise_from_status()
golden_file = "golden_test_lineage.json"
mce_helpers.check_golden_file(
pytestconfig,
output_path=f"{tmp_path}/powerbi_lineage_mces.json",
golden_path=f"{test_resources_dir}/{golden_file}",
)
@freeze_time(FROZEN_TIME)
@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
@pytest.mark.integration
def test_extract_endorsements(
mock_msal: MagicMock,
pytestconfig: pytest.Config,
tmp_path: str,
mock_time: datetime.datetime,
requests_mock: Any,
) -> None:
test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
register_mock_api(request_mock=requests_mock)
pipeline = Pipeline.create(
{
"run_id": "powerbi-test",
"source": {
"type": "powerbi",
"config": {
**default_source_config(),
"extract_reports": False,
"extract_endorsements_to_tags": True,
},
},
"sink": {
"type": "file",
"config": {
"filename": f"{tmp_path}/powerbi_endorsement_mces.json",
},
},
}
)
pipeline.run()
pipeline.raise_from_status()
mce_out_file = "golden_test_endorsement.json"
mce_helpers.check_golden_file(
pytestconfig,
output_path=f"{tmp_path}/powerbi_endorsement_mces.json",
golden_path=f"{test_resources_dir}/{mce_out_file}",
)
@freeze_time(FROZEN_TIME)
@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
@pytest.mark.integration
def test_admin_access_is_not_allowed(
mock_msal: MagicMock,
pytestconfig: pytest.Config,
tmp_path: str,
mock_time: datetime.datetime,
requests_mock: Any,
) -> None:
enable_logging()
test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
register_mock_api(
request_mock=requests_mock,
override_data={
"https://api.powerbi.com/v1.0/myorg/admin/workspaces/getInfo": {
"method": "POST",
"status_code": 403,
"json": {},
},
},
)
pipeline = Pipeline.create(
{
"run_id": "powerbi-admin-api-disabled-test",
"source": {
"type": "powerbi",
"config": {
**default_source_config(),
"extract_lineage": True,
"dataset_type_mapping": {
"PostgreSql": {"platform_instance": "operational_instance"},
"Oracle": {
"platform_instance": "high_performance_production_unit"
},
"Sql": {"platform_instance": "reporting-db"},
"Snowflake": {"platform_instance": "sn-2"},
},
},
},
"sink": {
"type": "file",
"config": {
"filename": f"{tmp_path}/golden_test_admin_access_not_allowed_mces.json",
},
},
}
)
pipeline.run()
pipeline.raise_from_status()
golden_file = "golden_test_admin_access_not_allowed.json"
mce_helpers.check_golden_file(
pytestconfig,
output_path=f"{tmp_path}/golden_test_admin_access_not_allowed_mces.json",
golden_path=f"{test_resources_dir}/{golden_file}",
)
@freeze_time(FROZEN_TIME)
@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
def test_workspace_container(
mock_msal: MagicMock,
pytestconfig: pytest.Config,
tmp_path: str,
mock_time: datetime.datetime,
requests_mock: Any,
) -> None:
enable_logging()
test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
register_mock_api(request_mock=requests_mock)
pipeline = Pipeline.create(
{
"run_id": "powerbi-test",
"source": {
"type": "powerbi",
"config": {
**default_source_config(),
"workspace_id_pattern": {
"deny": ["64ED5CAD-7322-4684-8180-826122881108"],
},
"extract_workspaces_to_containers": True,
"extract_datasets_to_containers": True,
"extract_reports": True,
},
},
"sink": {
"type": "file",
"config": {
"filename": f"{tmp_path}/powerbi_container_mces.json",
},
},
}
)
pipeline.run()
pipeline.raise_from_status()
mce_out_file = "golden_test_container.json"
mce_helpers.check_golden_file(
pytestconfig,
output_path=f"{tmp_path}/powerbi_container_mces.json",
golden_path=f"{test_resources_dir}/{mce_out_file}",
)
@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
def test_access_token_expiry_with_long_expiry(
mock_msal: MagicMock,
pytestconfig: pytest.Config,
tmp_path: str,
mock_time: datetime.datetime,
requests_mock: Any,
) -> None:
enable_logging()
register_mock_api(request_mock=requests_mock)
pipeline = Pipeline.create(
{
"run_id": "powerbi-test",
"source": {
"type": "powerbi",
"config": {
**default_source_config(),
},
},
"sink": {
"type": "file",
"config": {
"filename": f"{tmp_path}/powerbi_access_token_mces.json",
},
},
}
)
# for long expiry, the token should only be requested once.
MsalClient.token = {
"access_token": "dummy2",
"expires_in": 3600,
}
MsalClient.reset()
pipeline.run()
# We expect the token to be requested twice (once for AdminApiResolver and one for RegularApiResolver)
assert MsalClient.call_num == 2
@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
def test_access_token_expiry_with_short_expiry(
mock_msal: MagicMock,
pytestconfig: pytest.Config,
tmp_path: str,
mock_time: datetime.datetime,
requests_mock: Any,
) -> None:
enable_logging()
register_mock_api(request_mock=requests_mock)
pipeline = Pipeline.create(
{
"run_id": "powerbi-test",
"source": {
"type": "powerbi",
"config": {
**default_source_config(),
},
},
"sink": {
"type": "file",
"config": {
"filename": f"{tmp_path}/powerbi_access_token_mces.json",
},
},
}
)
# for short expiry, the token should be requested when expires.
MsalClient.token = {
"access_token": "dummy",
"expires_in": 0,
}
pipeline.run()
assert MsalClient.call_num > 2
def dataset_type_mapping_set_to_all_platform(pipeline: Pipeline) -> None:
source_config: PowerBiDashboardSourceConfig = cast(
PowerBiDashboardSource, pipeline.source
).source_config
assert source_config.dataset_type_mapping is not None
# Generate default dataset_type_mapping and compare it with source_config.dataset_type_mapping
default_dataset_type_mapping: dict = {}
for item in SupportedDataPlatform:
default_dataset_type_mapping[
item.value.powerbi_data_platform_name
] = item.value.datahub_data_platform_name
assert default_dataset_type_mapping == source_config.dataset_type_mapping
@freeze_time(FROZEN_TIME)
@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
@pytest.mark.integration
def test_dataset_type_mapping_should_set_to_all(
mock_msal, pytestconfig, tmp_path, mock_time, requests_mock
):
"""
Here we don't need to run the pipeline. We need to verify dataset_type_mapping is set to default dataplatform
"""
register_mock_api(request_mock=requests_mock)
new_config: dict = {**default_source_config()}
del new_config["dataset_type_mapping"]
pipeline = Pipeline.create(
{
"run_id": "powerbi-test",
"source": {
"type": "powerbi",
"config": {
**new_config,
},
},
"sink": {
"type": "file",
"config": {
"filename": f"{tmp_path}/powerbi_lower_case_urn_mces.json",
},
},
}
)
dataset_type_mapping_set_to_all_platform(pipeline)
@freeze_time(FROZEN_TIME)
@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
@pytest.mark.integration
def test_dataset_type_mapping_error(
mock_msal, pytestconfig, tmp_path, mock_time, requests_mock
):
"""
Here we don't need to run the pipeline. We need to verify if both dataset_type_mapping and server_to_platform_instance
are set then value error should get raised
"""
register_mock_api(request_mock=requests_mock)
with pytest.raises(Exception, match=r"dataset_type_mapping is deprecated"):
Pipeline.create(
{
"run_id": "powerbi-test",
"source": {
"type": "powerbi",
"config": {
**default_source_config(),
"server_to_platform_instance": {
"localhost": {
"platform_instance": "test",
}
},
},
},
"sink": {
"type": "file",
"config": {
"filename": f"{tmp_path}/powerbi_lower_case_urn_mces.json",
},
},
}
)
@freeze_time(FROZEN_TIME)
@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
def test_server_to_platform_map(
mock_msal, pytestconfig, tmp_path, mock_time, requests_mock
):
enable_logging()
test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
new_config: dict = {
**default_source_config(),
"extract_lineage": True,
"convert_lineage_urns_to_lowercase": True,
}
del new_config["dataset_type_mapping"]
new_config["server_to_platform_instance"] = {
"hp123rt5.ap-southeast-2.fakecomputing.com": {
"platform_instance": "snowflake_production_instance",
"env": "PROD",
},
"my-test-project": {
"platform_instance": "bigquery-computing-dev-account",
"env": "QA",
},
"localhost:1521": {"platform_instance": "oracle-sales-instance", "env": "PROD"},
}
register_mock_api(request_mock=requests_mock)
output_path: str = f"{tmp_path}/powerbi_server_to_platform_instance_mces.json"
pipeline = Pipeline.create(
{
"run_id": "powerbi-test",
"source": {
"type": "powerbi",
"config": new_config,
},
"sink": {
"type": "file",
"config": {
"filename": output_path,
},
},
}
)
pipeline.run()
pipeline.raise_from_status()
golden_file_path: str = (
f"{test_resources_dir}/golden_test_server_to_platform_instance.json"
)
mce_helpers.check_golden_file(
pytestconfig,
output_path=output_path,
golden_path=golden_file_path,
)
# As server_to_platform_instance map is provided, the old dataset_type_mapping
# should be set to all supported platform
# to process all available upstream lineage even if mapping for platform instance is
# not provided in server_to_platform_instance map
dataset_type_mapping_set_to_all_platform(pipeline)
def validate_pipeline(pipeline: Pipeline) -> None:
mock_workspace: Workspace = Workspace(
id="64ED5CAD-7C10-4684-8180-826122881108",
name="demo-workspace",
datasets={},
dashboards=[],
reports=[],
report_endorsements={},
dashboard_endorsements={},
scan_result={},
independent_datasets=[],
)
# Fetch actual reports
reports: List[Report] = cast(
PowerBiDashboardSource, pipeline.source
).powerbi_client.get_reports(workspace=mock_workspace)
assert len(reports) == 2
# Generate expected reports using mock reports
mock_reports: List[Dict] = [
{
"datasetId": "05169CD2-E713-41E6-9600-1D8066D95445",
"id": "5b218778-e7a5-4d73-8187-f10824047715",
"name": "SalesMarketing",
"description": "Acryl sales marketing report",
"pages": [
{
"name": "ReportSection",
"displayName": "Regional Sales Analysis",
"order": "0",
},
{
"name": "ReportSection1",
"displayName": "Geographic Analysis",
"order": "1",
},
],
},
{
"datasetId": "05169CD2-E713-41E6-9600-1D8066D95445",
"id": "e9fd6b0b-d8c8-4265-8c44-67e183aebf97",
"name": "Product",
"description": "Acryl product report",
"pages": [],
},
]
expected_reports: List[Report] = [
Report(
id=report[Constant.ID],
name=report[Constant.NAME],
webUrl="",
embedUrl="",
description=report[Constant.DESCRIPTION],
pages=[
Page(
id="{}.{}".format(
report[Constant.ID], page[Constant.NAME].replace(" ", "_")
),
name=page[Constant.NAME],
displayName=page[Constant.DISPLAY_NAME],
order=page[Constant.ORDER],
)
for page in report["pages"]
],
users=[],
tags=[],
dataset=mock_workspace.datasets.get(report[Constant.DATASET_ID]),
)
for report in mock_reports
]
# Compare actual and expected reports
for i in range(2):
assert reports[i].id == expected_reports[i].id
assert reports[i].name == expected_reports[i].name
assert reports[i].description == expected_reports[i].description
assert reports[i].dataset == expected_reports[i].dataset
assert reports[i].pages == expected_reports[i].pages
@freeze_time(FROZEN_TIME)
@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
@pytest.mark.integration
def test_reports_with_failed_page_request(
mock_msal: MagicMock,
pytestconfig: pytest.Config,
tmp_path: str,
mock_time: datetime.datetime,
requests_mock: Any,
) -> None:
"""
Test that all reports are fetched even if a single page request fails
"""
register_mock_api(
request_mock=requests_mock,
override_data={
"https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/reports": {
"method": "GET",
"status_code": 200,
"json": {
"value": [
{
"datasetId": "05169CD2-E713-41E6-9600-1D8066D95445",
"id": "5b218778-e7a5-4d73-8187-f10824047715",
"name": "SalesMarketing",
"description": "Acryl sales marketing report",
"webUrl": "https://app.powerbi.com/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/5b218778-e7a5-4d73-8187-f10824047715",
"embedUrl": "https://app.powerbi.com/reportEmbed?reportId=5b218778-e7a5-4d73-8187-f10824047715&groupId=64ED5CAD-7C10-4684-8180-826122881108",
},
{
"datasetId": "05169CD2-E713-41E6-9600-1D8066D95445",
"id": "e9fd6b0b-d8c8-4265-8c44-67e183aebf97",
"name": "Product",
"description": "Acryl product report",
"webUrl": "https://app.powerbi.com/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/e9fd6b0b-d8c8-4265-8c44-67e183aebf97",
"embedUrl": "https://app.powerbi.com/reportEmbed?reportId=e9fd6b0b-d8c8-4265-8c44-67e183aebf97&groupId=64ED5CAD-7C10-4684-8180-826122881108",
},
]
},
},
"https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/5b218778-e7a5-4d73-8187-f10824047715": {
"method": "GET",
"status_code": 200,
"json": {
"datasetId": "05169CD2-E713-41E6-9600-1D8066D95445",
"id": "5b218778-e7a5-4d73-8187-f10824047715",
"name": "SalesMarketing",
"description": "Acryl sales marketing report",
"webUrl": "https://app.powerbi.com/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/5b218778-e7a5-4d73-8187-f10824047715",
"embedUrl": "https://app.powerbi.com/reportEmbed?reportId=5b218778-e7a5-4d73-8187-f10824047715&groupId=64ED5CAD-7C10-4684-8180-826122881108",
},
},
"https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/e9fd6b0b-d8c8-4265-8c44-67e183aebf97": {
"method": "GET",
"status_code": 200,
"json": {
"datasetId": "05169CD2-E713-41E6-9600-1D8066D95445",
"id": "e9fd6b0b-d8c8-4265-8c44-67e183aebf97",
"name": "Product",
"description": "Acryl product report",
"webUrl": "https://app.powerbi.com/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/e9fd6b0b-d8c8-4265-8c44-67e183aebf97",
"embedUrl": "https://app.powerbi.com/reportEmbed?reportId=e9fd6b0b-d8c8-4265-8c44-67e183aebf97&groupId=64ED5CAD-7C10-4684-8180-826122881108",
},
},
"https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/5b218778-e7a5-4d73-8187-f10824047715/pages": {
"method": "GET",
"status_code": 200,
"json": {
"value": [
{
"displayName": "Regional Sales Analysis",
"name": "ReportSection",
"order": "0",
},
{
"displayName": "Geographic Analysis",
"name": "ReportSection1",
"order": "1",
},
]
},
},
"https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/e9fd6b0b-d8c8-4265-8c44-67e183aebf97/pages": {
"method": "GET",
"status_code": 400,
"json": {
"error": {
"code": "InvalidRequest",
"message": "Request is currently not supported for RDL reports",
}
},
},
},
)
pipeline = Pipeline.create(
{
"run_id": "powerbi-test",
"source": {
"type": "powerbi",
"config": {
**default_source_config(),
"extract_reports": True,
"platform_instance": "aws-ap-south-1",
},
},
"sink": {
"type": "file",
"config": {
"filename": f"{tmp_path}powerbi_reports_with_failed_page_request_mces.json",
},
},
}
)
validate_pipeline(pipeline)
@freeze_time(FROZEN_TIME)
@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
def test_independent_datasets_extraction(
mock_msal: MagicMock,
pytestconfig: pytest.Config,
tmp_path: str,
mock_time: datetime.datetime,
requests_mock: Any,
) -> None:
test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
register_mock_api(
request_mock=requests_mock,
override_data={
"https://api.powerbi.com/v1.0/myorg/groups": {
"method": "GET",
"status_code": 200,
"json": {
"@odata.count": 3,
"value": [
{
"id": "64ED5CAD-7C10-4684-8180-826122881108",
"isReadOnly": True,
"name": "demo-workspace",
"type": "Workspace",
},
],
},
},
"https://api.powerbi.com/v1.0/myorg/admin/workspaces/scanResult/4674efd1-603c-4129-8d82-03cf2be05aff": {
"method": "GET",
"status_code": 200,
"json": {
"workspaces": [
{
"id": "64ED5CAD-7C10-4684-8180-826122881108",
"name": "demo-workspace",
"state": "Active",
"datasets": [
{
"id": "91580e0e-1680-4b1c-bbf9-4f6764d7a5ff",
"tables": [
{
"name": "employee_ctc",
"source": [
{
"expression": "dummy",
}
],
}
],
},
],
},
]
},
},
"https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/dashboards": {
"method": "GET",
"status_code": 200,
"json": {"value": []},
},
},
)
pipeline = Pipeline.create(
{
"run_id": "powerbi-test",
"source": {
"type": "powerbi",
"config": {
**default_source_config(),
"extract_independent_datasets": True,
},
},
"sink": {
"type": "file",
"config": {
"filename": f"{tmp_path}/powerbi_independent_mces.json",
},
},
}
)
pipeline.run()
pipeline.raise_from_status()
golden_file = "golden_test_independent_datasets.json"
mce_helpers.check_golden_file(
pytestconfig,
output_path=f"{tmp_path}/powerbi_independent_mces.json",
golden_path=f"{test_resources_dir}/{golden_file}",
)
@freeze_time(FROZEN_TIME)
@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
def test_cll_extraction(
mock_msal: MagicMock,
pytestconfig: pytest.Config,
tmp_path: str,
mock_time: datetime.datetime,
requests_mock: Any,
) -> None:
test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
register_mock_api(
request_mock=requests_mock,
)
default_conf: dict = default_source_config()
del default_conf[
"dataset_type_mapping"
] # delete this key so that connector set it to default (all dataplatform)
pipeline = Pipeline.create(
{
"run_id": "powerbi-test",
"source": {
"type": "powerbi",
"config": {
**default_conf,
"extract_lineage": True,
"extract_column_level_lineage": True,
"enable_advance_lineage_sql_construct": True,
"native_query_parsing": True,
"extract_independent_datasets": True,
},
},
"sink": {
"type": "file",
"config": {
"filename": f"{tmp_path}/powerbi_cll_mces.json",
},
},
}
)
pipeline.run()
pipeline.raise_from_status()
golden_file = "golden_test_cll.json"
mce_helpers.check_golden_file(
pytestconfig,
output_path=f"{tmp_path}/powerbi_cll_mces.json",
golden_path=f"{test_resources_dir}/{golden_file}",
)
@freeze_time(FROZEN_TIME)
@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
def test_cll_extraction_flags(
mock_msal: MagicMock,
pytestconfig: pytest.Config,
tmp_path: str,
mock_time: datetime.datetime,
requests_mock: Any,
) -> None:
register_mock_api(
request_mock=requests_mock,
)
default_conf: dict = default_source_config()
pattern: str = re.escape(
"Enable all these flags in recipe: ['native_query_parsing', 'enable_advance_lineage_sql_construct', 'extract_lineage']"
)
with pytest.raises(Exception, match=pattern):
Pipeline.create(
{
"run_id": "powerbi-test",
"source": {
"type": "powerbi",
"config": {
**default_conf,
"extract_column_level_lineage": True,
},
},
"sink": {
"type": "file",
"config": {
"filename": f"{tmp_path}/powerbi_cll_mces.json",
},
},
}
)