import datetime import logging import re import sys from typing import Any, Dict, List, Optional, cast from unittest import mock from unittest.mock import MagicMock import pytest from freezegun import freeze_time from datahub.ingestion.run.pipeline import Pipeline from datahub.ingestion.source.powerbi.config import ( Constant, PowerBiDashboardSourceConfig, SupportedDataPlatform, ) from datahub.ingestion.source.powerbi.powerbi import PowerBiDashboardSource from datahub.ingestion.source.powerbi.rest_api_wrapper.data_classes import ( Page, Report, Workspace, ) from tests.test_helpers import mce_helpers, test_connection_helpers pytestmark = pytest.mark.integration_batch_2 FROZEN_TIME = "2022-02-03 07:00:00" def enable_logging(): # set logging to console logging.getLogger().addHandler(logging.StreamHandler(sys.stdout)) logging.getLogger().setLevel(logging.DEBUG) class MsalClient: call_num = 0 token: Dict[str, Any] = { "access_token": "dummy", } @staticmethod def acquire_token_for_client(*args, **kwargs): MsalClient.call_num += 1 return MsalClient.token @staticmethod def reset(): MsalClient.call_num = 0 def mock_msal_cca(*args, **kwargs): return MsalClient() def scan_init_response(request, context): # Request mock is passing POST input in the form of workspaces= # If we scan 2 or more, it get messy like this. 'workspaces=64ED5CAD-7C10-4684-8180-826122881108&workspaces=64ED5CAD-7C22-4684-8180-826122881108' workspace_id_list = request.text.replace("&", "").split("workspaces=") workspace_id = "||".join(workspace_id_list[1:]) w_id_vs_response: Dict[str, Any] = { "64ED5CAD-7C10-4684-8180-826122881108": { "id": "4674efd1-603c-4129-8d82-03cf2be05aff" }, "64ED5CAD-7C22-4684-8180-826122881108": { "id": "a674efd1-603c-4129-8d82-03cf2be05aff" }, "64ED5CAD-7C10-4684-8180-826122881108||64ED5CAD-7C22-4684-8180-826122881108": { "id": "a674efd1-603c-4129-8d82-03cf2be05aff" }, } return w_id_vs_response[workspace_id] def register_mock_api(request_mock: Any, override_data: Optional[dict] = None) -> None: override_data = override_data or {} api_vs_response = { "https://api.powerbi.com/v1.0/myorg/groups": { "method": "GET", "status_code": 200, "json": { "@odata.count": 3, "value": [ { "id": "64ED5CAD-7C10-4684-8180-826122881108", "isReadOnly": True, "name": "demo-workspace", "type": "Workspace", }, { "id": "64ED5CAD-7C22-4684-8180-826122881108", "isReadOnly": True, "name": "second-demo-workspace", "type": "Workspace", }, { "id": "64ED5CAD-7322-4684-8180-826122881108", "isReadOnly": True, "name": "Workspace 2", "type": "Workspace", }, ], }, }, "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/dashboards": { "method": "GET", "status_code": 200, "json": { "value": [ { "id": "7D668CAD-7FFC-4505-9215-655BCA5BEBAE", "isReadOnly": True, "displayName": "test_dashboard", "description": "Description of test dashboard", "embedUrl": "https://localhost/dashboards/embed/1", "webUrl": "https://localhost/dashboards/web/1", } ] }, }, "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C22-4684-8180-826122881108/dashboards": { "method": "GET", "status_code": 200, "json": { "value": [ { "id": "7D668CAD-8FFC-4505-9215-655BCA5BEBAE", "isReadOnly": True, "displayName": "test_dashboard2", "embedUrl": "https://localhost/dashboards/embed/1", "webUrl": "https://localhost/dashboards/web/1", } ] }, }, "https://api.powerbi.com/v1.0/myorg/admin/reports/5b218778-e7a5-4d73-8187-f10824047715/users": { "method": "GET", "status_code": 200, "json": { "value": [ { "identifier": "User1@foo.com", "displayName": "user1", "emailAddress": "User1@foo.com", "datasetUserAccessRight": "ReadWrite", "graphId": "C9EE53F2-88EA-4711-A173-AF0515A3CD46", "principalType": "User", }, { "identifier": "User2@foo.com", "displayName": "user2", "emailAddress": "User2@foo.com", "datasetUserAccessRight": "ReadWrite", "graphId": "C9EE53F2-88EA-4711-A173-AF0515A5REWS", "principalType": "User", }, ] }, }, "https://api.powerbi.com/v1.0/myorg/admin/dashboards/7D668CAD-7FFC-4505-9215-655BCA5BEBAE/users": { "method": "GET", "status_code": 200, "json": { "value": [ { "identifier": "User1@foo.com", "displayName": "user1", "emailAddress": "User1@foo.com", "datasetUserAccessRight": "ReadWrite", "graphId": "C9EE53F2-88EA-4711-A173-AF0515A3CD46", "principalType": "User", }, { "identifier": "User2@foo.com", "displayName": "user2", "emailAddress": "User2@foo.com", "datasetUserAccessRight": "ReadWrite", "graphId": "C9EE53F2-88EA-4711-A173-AF0515A5REWS", "principalType": "User", }, ] }, }, "https://api.powerbi.com/v1.0/myorg/admin/dashboards/7D668CAD-8FFC-4505-9215-655BCA5BEBAE/users": { "method": "GET", "status_code": 200, "json": { "value": [ { "identifier": "User3@foo.com", "displayName": "user3", "emailAddress": "User3@foo.com", "datasetUserAccessRight": "ReadWrite", "graphId": "C9EE53F2-88EA-4711-A173-AF0515A3CD46", "principalType": "User", }, { "identifier": "User4@foo.com", "displayName": "user4", "emailAddress": "User4@foo.com", "datasetUserAccessRight": "ReadWrite", "graphId": "C9EE53F2-88EA-4711-A173-AF0515A5REWS", "principalType": "User", }, ] }, }, "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/dashboards/7D668CAD-7FFC-4505-9215-655BCA5BEBAE/tiles": { "method": "GET", "status_code": 200, "json": { "value": [ { "id": "B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0", "title": "test_tile", "embedUrl": "https://localhost/tiles/embed/1", "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", }, { "id": "23212598-23b5-4980-87cc-5fc0ecd84385", "title": "yearly_sales", "embedUrl": "https://localhost/tiles/embed/2", "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed", }, ] }, }, "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C22-4684-8180-826122881108/dashboards/7D668CAD-8FFC-4505-9215-655BCA5BEBAE/tiles": { "method": "GET", "status_code": 200, "json": {"value": []}, }, "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445": { "method": "GET", "status_code": 200, "json": { "id": "05169CD2-E713-41E6-9600-1D8066D95445", "name": "library-dataset", "description": "Library dataset description", "webUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445", }, }, "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C22-4684-8180-826122881108/datasets/05169CD2-E713-41E6-96AA-1D8066D95445": { "method": "GET", "status_code": 200, "json": { "id": "05169CD2-E713-41E6-96AA-1D8066D95445", "name": "library-dataset", "description": "Library dataset description", "webUrl": "http://localhost/groups/64ED5CAD-7C22-4684-8180-826122881108/datasets/05169CD2-E713-41E6-96AA-1D8066D95445", }, }, "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed": { "method": "GET", "status_code": 200, "json": { "id": "ba0130a1-5b03-40de-9535-b34e778ea6ed", "name": "hr_pbi_test", "description": "hr pbi test description", "webUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed", }, }, "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/datasources": { "method": "GET", "status_code": 200, "json": { "value": [ { "datasourceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3", "datasourceType": "PostgreSql", "connectionDetails": { "database": "library_db", "server": "foo", }, }, ] }, }, "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C22-4684-8180-826122881108/datasets/05169CD2-E713-41E6-96AA-1D8066D95445/datasources": { "method": "GET", "status_code": 200, "json": { "value": [ { "datasourceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3", "datasourceType": "PostgreSql", "connectionDetails": { "database": "library_db", "server": "foo", }, }, ] }, }, "https://api.powerbi.com/v1.0/myorg/admin/workspaces/scanStatus/4674efd1-603c-4129-8d82-03cf2be05aff": { "method": "GET", "status_code": 200, "json": { "status": "SUCCEEDED", }, }, "https://api.powerbi.com/v1.0/myorg/admin/workspaces/scanStatus/a674efd1-603c-4129-8d82-03cf2be05aff": { "method": "GET", "status_code": 200, "json": { "status": "SUCCEEDED", }, }, "https://api.powerbi.com/v1.0/myorg/admin/workspaces/scanResult/4674efd1-603c-4129-8d82-03cf2be05aff": { "method": "GET", "status_code": 200, "json": { "workspaces": [ { "id": "64ED5CAD-7C10-4684-8180-826122881108", "name": "demo-workspace", "state": "Active", "datasets": [ { "id": "05169CD2-E713-41E6-9600-1D8066D95445", "endorsementDetails": {"endorsement": "Promoted"}, "name": "test_sf_pbi_test", "tables": [ { "name": "public issue_history", "source": [ { "expression": "dummy", } ], "datasourceUsages": [ { "datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3", } ], }, { "name": "SNOWFLAKE_TESTTABLE", "source": [ { "expression": 'let\n Source = Snowflake.Databases("hp123rt5.ap-southeast-2.fakecomputing.com","PBI_TEST_WAREHOUSE_PROD",[Role="PBI_TEST_MEMBER"]),\n PBI_TEST_Database = Source{[Name="PBI_TEST",Kind="Database"]}[Data],\n TEST_Schema = PBI_TEST_Database{[Name="TEST",Kind="Schema"]}[Data],\n TESTTABLE_Table = TEST_Schema{[Name="TESTTABLE",Kind="Table"]}[Data]\nin\n TESTTABLE_Table', } ], "datasourceUsages": [ { "datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3", } ], }, { "name": "snowflake native-query", "source": [ { "expression": 'let\n Source = Value.NativeQuery(Snowflake.Databases("bu20658.ap-southeast-2.snowflakecomputing.com","operations_analytics_warehouse_prod",[Role="OPERATIONS_ANALYTICS_MEMBER"]){[Name="OPERATIONS_ANALYTICS"]}[Data], "SELECT#(lf)concat((UPPER(REPLACE(SELLER,\'-\',\'\'))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,\'-\',\'\'))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4", null, [EnableFolding=true]),\n #"Added Conditional Column" = Table.AddColumn(Source, "SME Units ENT", each if [DEAL_TYPE] = "SME Unit" then [UNIT] else 0),\n #"Added Conditional Column1" = Table.AddColumn(#"Added Conditional Column", "Banklink Units", each if [DEAL_TYPE] = "Banklink" then [UNIT] else 0),\n #"Removed Columns" = Table.RemoveColumns(#"Added Conditional Column1",{"Banklink Units"}),\n #"Added Custom" = Table.AddColumn(#"Removed Columns", "Banklink Units", each if [DEAL_TYPE] = "Banklink" and [SALES_TYPE] = "3 - Upsell"\nthen [UNIT]\n\nelse if [SALES_TYPE] = "Adjusted BL Migration"\nthen [UNIT]\n\nelse 0),\n #"Added Custom1" = Table.AddColumn(#"Added Custom", "SME Units in $ (*$361)", each if [DEAL_TYPE] = "SME Unit" \nand [SALES_TYPE] <> "4 - Renewal"\n then [UNIT] * 361\nelse 0),\n #"Added Custom2" = Table.AddColumn(#"Added Custom1", "Banklink in $ (*$148)", each [Banklink Units] * 148)\nin\n #"Added Custom2"', } ], "datasourceUsages": [ { "datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3", } ], }, { "name": "big-query-with-parameter", "source": [ { "expression": 'let\n Source = GoogleBigQuery.Database([BillingProject = #"Parameter - Source"]),\n#"gcp-project" = Source{[Name=#"Parameter - Source"]}[Data],\nuniversal_Schema = #"gcp-project"{[Name="universal",Kind="Schema"]}[Data],\nD_WH_DATE_Table = universal_Schema{[Name="D_WH_DATE",Kind="Table"]}[Data],\n#"Filtered Rows" = Table.SelectRows(D_WH_DATE_Table, each [D_DATE] > #datetime(2019, 9, 10, 0, 0, 0)),\n#"Filtered Rows1" = Table.SelectRows(#"Filtered Rows", each DateTime.IsInPreviousNHours([D_DATE], 87600))\n in \n#"Filtered Rows1"', } ], "datasourceUsages": [ { "datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3", } ], }, { "name": "snowflake native-query-with-join", "source": [ { "expression": 'let\n Source = Value.NativeQuery(Snowflake.Databases("xaa48144.snowflakecomputing.com","GSL_TEST_WH",[Role="ACCOUNTADMIN"]){[Name="GSL_TEST_DB"]}[Data], "select A.name from GSL_TEST_DB.PUBLIC.SALES_ANALYST as A inner join GSL_TEST_DB.PUBLIC.SALES_FORECAST as B on A.name = B.name where startswith(A.name, \'mo\')", null, [EnableFolding=true])\nin\n Source', } ], "datasourceUsages": [ { "datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3", } ], }, { "name": "job-history", "source": [ { "expression": 'let\n Source = Oracle.Database("localhost:1521/salesdb.domain.com", [HierarchicalNavigation=true]), HR = Source{[Schema="HR"]}[Data], EMPLOYEES1 = HR{[Name="EMPLOYEES"]}[Data] \n in EMPLOYEES1', } ], "datasourceUsages": [ { "datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3", } ], }, { "name": "postgres_test_table", "source": [ { "expression": 'let\n Source = PostgreSQL.Database("localhost" , "mics" ),\n public_order_date = Source{[Schema="public",Item="order_date"]}[Data] \n in \n public_order_date', } ], "datasourceUsages": [ { "datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3", } ], }, ], }, { "id": "ba0130a1-5b03-40de-9535-b34e778ea6ed", "name": "hr_pbi_test", "tables": [ { "name": "dbo_book_issue", "source": [ { "expression": 'let\n Source = Sql.Database("localhost", "library"),\n dbo_book_issue = Source{[Schema="dbo",Item="book_issue"]}[Data]\n in dbo_book_issue', } ], "datasourceUsages": [ { "datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3", } ], }, { "name": "ms_sql_native_table", "source": [ { "expression": 'let\n Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="select *,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,\'-\',\'\'))), MONTH_WID) as CD_AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,\'-\',\'\'))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_PS_CD_RETENTION", CommandTimeout=#duration(0, 1, 30, 0)]),\n #"Changed Type" = Table.TransformColumnTypes(Source,{{"mth_date", type date}}),\n #"Added Custom" = Table.AddColumn(#"Changed Type", "Month", each Date.Month([mth_date])),\n #"Added Custom1" = Table.AddColumn(#"Added Custom", "TPV Opening", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n #"Added Custom1"', } ], "datasourceUsages": [ { "datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3", } ], }, ], }, { "id": "91580e0e-1680-4b1c-bbf9-4f6764d7a5ff", "tables": [ { "name": "employee_ctc", "source": [ { "expression": "dummy", } ], } ], }, ], "dashboards": [ { "id": "7D668CAD-7FFC-4505-9215-655BCA5BEBAE", "isReadOnly": True, } ], "reports": [ { "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", "id": "5b218778-e7a5-4d73-8187-f10824047715", "name": "SalesMarketing", "description": "Acryl sales marketing report", } ], }, ] }, }, "https://api.powerbi.com/v1.0/myorg/admin/workspaces/scanResult/a674efd1-603c-4129-8d82-03cf2be05aff": { "method": "GET", "status_code": 200, "json": { "workspaces": [ { "id": "64ED5CAD-7C22-4684-8180-826122881108", "name": "second-demo-workspace", "state": "Active", "datasets": [ { "id": "05169CD2-E713-41E6-96AA-1D8066D95445", "tables": [ { "name": "public articles", "source": [ { "expression": "dummy", } ], } ], } ], "dashboards": [ { "id": "7D668CAD-8FFC-4505-9215-655BCA5BEBAE", "isReadOnly": True, } ], "reports": [ { "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", "id": "5b218778-e7a5-4d73-8187-f10824047715", "name": "SalesMarketing", "description": "Acryl sales marketing report", } ], }, ] }, }, "https://api.powerbi.com/v1.0/myorg/admin/workspaces/getInfo": { "method": "POST", "status_code": 200, "json": scan_init_response, }, "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/reports": { "method": "GET", "status_code": 200, "json": { "value": [ { "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", "id": "5b218778-e7a5-4d73-8187-f10824047715", "name": "SalesMarketing", "description": "Acryl sales marketing report", "webUrl": "https://app.powerbi.com/groups/f089354e-8366-4e18-aea3-4cb4a3a50b48/reports/5b218778-e7a5-4d73-8187-f10824047715", "embedUrl": "https://app.powerbi.com/reportEmbed?reportId=5b218778-e7a5-4d73-8187-f10824047715&groupId=f089354e-8366-4e18-aea3-4cb4a3a50b48", } ] }, }, "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/5b218778-e7a5-4d73-8187-f10824047715": { "method": "GET", "status_code": 200, "json": { "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", "id": "5b218778-e7a5-4d73-8187-f10824047715", "name": "SalesMarketing", "description": "Acryl sales marketing report", "webUrl": "https://app.powerbi.com/groups/f089354e-8366-4e18-aea3-4cb4a3a50b48/reports/5b218778-e7a5-4d73-8187-f10824047715", "embedUrl": "https://app.powerbi.com/reportEmbed?reportId=5b218778-e7a5-4d73-8187-f10824047715&groupId=f089354e-8366-4e18-aea3-4cb4a3a50b48", }, }, "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/5b218778-e7a5-4d73-8187-f10824047715/pages": { "method": "GET", "status_code": 200, "json": { "value": [ { "displayName": "Regional Sales Analysis", "name": "ReportSection", "order": "0", }, { "displayName": "Geographic Analysis", "name": "ReportSection1", "order": "1", }, ] }, }, "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/parameters": { "method": "GET", "status_code": 200, "json": { "value": [ { "name": "Parameter - Source", "type": "Text", "isRequired": True, "currentValue": "my-test-project", }, { "name": "My bq project", "type": "Text", "isRequired": True, "currentValue": "gcp_billing", }, ] }, }, "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/91580e0e-1680-4b1c-bbf9-4f6764d7a5ff": { "method": "GET", "status_code": 200, "json": { "id": "91580e0e-1680-4b1c-bbf9-4f6764d7a5ff", "name": "employee-dataset", "description": "Employee Management", "webUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/91580e0e-1680-4b1c-bbf9-4f6764d7a5ff", }, }, } api_vs_response.update(override_data) for url in api_vs_response.keys(): request_mock.register_uri( api_vs_response[url]["method"], url, json=api_vs_response[url]["json"], status_code=api_vs_response[url]["status_code"], ) def default_source_config(): return { "client_id": "foo", "client_secret": "bar", "tenant_id": "0B0C960B-FCDF-4D0F-8C45-2E03BB59DDEB", "workspace_id": "64ED5CAD-7C10-4684-8180-826122881108", "extract_lineage": False, "extract_reports": False, "extract_ownership": True, "convert_lineage_urns_to_lowercase": False, "workspace_id_pattern": {"allow": ["64ED5CAD-7C10-4684-8180-826122881108"]}, "dataset_type_mapping": { "PostgreSql": "postgres", "Oracle": "oracle", }, "env": "DEV", "extract_workspaces_to_containers": False, "enable_advance_lineage_sql_construct": False, } @freeze_time(FROZEN_TIME) @mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca) @pytest.mark.integration def test_powerbi_ingest( mock_msal: MagicMock, pytestconfig: pytest.Config, tmp_path: str, mock_time: datetime.datetime, requests_mock: Any, ) -> None: enable_logging() test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi" register_mock_api(request_mock=requests_mock) pipeline = Pipeline.create( { "run_id": "powerbi-test", "source": { "type": "powerbi", "config": { **default_source_config(), }, }, "sink": { "type": "file", "config": { "filename": f"{tmp_path}/powerbi_mces.json", }, }, } ) pipeline.run() pipeline.raise_from_status() golden_file = "golden_test_ingest.json" mce_helpers.check_golden_file( pytestconfig, output_path=f"{tmp_path}/powerbi_mces.json", golden_path=f"{test_resources_dir}/{golden_file}", ) @freeze_time(FROZEN_TIME) @mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca) @pytest.mark.integration def test_powerbi_ingest_patch_disabled( mock_msal: MagicMock, pytestconfig: pytest.Config, tmp_path: str, mock_time: datetime.datetime, requests_mock: Any, ) -> None: enable_logging() test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi" register_mock_api(request_mock=requests_mock) pipeline = Pipeline.create( { "run_id": "powerbi-test", "source": { "type": "powerbi", "config": { **default_source_config(), "patch_metadata": False, }, }, "sink": { "type": "file", "config": { "filename": f"{tmp_path}/powerbi_mces.json", }, }, } ) pipeline.run() pipeline.raise_from_status() golden_file = "golden_test_ingest_patch_disabled.json" mce_helpers.check_golden_file( pytestconfig, output_path=f"{tmp_path}/powerbi_mces.json", golden_path=f"{test_resources_dir}/{golden_file}", ) @freeze_time(FROZEN_TIME) @mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca) @pytest.mark.integration def test_powerbi_test_connection_success(mock_msal): report = test_connection_helpers.run_test_connection( PowerBiDashboardSource, default_source_config() ) test_connection_helpers.assert_basic_connectivity_success(report) @freeze_time(FROZEN_TIME) @pytest.mark.integration def test_powerbi_test_connection_failure(): report = test_connection_helpers.run_test_connection( PowerBiDashboardSource, default_source_config() ) test_connection_helpers.assert_basic_connectivity_failure( report, "Unable to get authority configuration" ) @freeze_time(FROZEN_TIME) @mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca) @pytest.mark.integration def test_powerbi_platform_instance_ingest( mock_msal: MagicMock, pytestconfig: pytest.Config, tmp_path: str, mock_time: datetime.datetime, requests_mock: Any, ) -> None: enable_logging() test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi" register_mock_api(request_mock=requests_mock) output_path: str = f"{tmp_path}/powerbi_platform_instance_mces.json" pipeline = Pipeline.create( { "run_id": "powerbi-test", "source": { "type": "powerbi", "config": { **default_source_config(), "platform_instance": "aws-ap-south-1", }, }, "sink": { "type": "file", "config": { "filename": output_path, }, }, } ) pipeline.run() pipeline.raise_from_status() golden_file = "golden_test_platform_instance_ingest.json" mce_helpers.check_golden_file( pytestconfig, output_path=output_path, golden_path=f"{test_resources_dir}/{golden_file}", ) @freeze_time(FROZEN_TIME) @mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca) @pytest.mark.integration def test_powerbi_ingest_urn_lower_case( mock_msal: MagicMock, pytestconfig: pytest.Config, tmp_path: str, mock_time: datetime.datetime, requests_mock: Any, ) -> None: test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi" register_mock_api(request_mock=requests_mock) pipeline = Pipeline.create( { "run_id": "powerbi-test", "source": { "type": "powerbi", "config": { **default_source_config(), "env": "PROD", "platform_instance": "myPlatformInstance", "convert_urns_to_lowercase": True, "convert_lineage_urns_to_lowercase": True, }, }, "sink": { "type": "file", "config": { "filename": f"{tmp_path}/powerbi_lower_case_urn_mces.json", }, }, } ) pipeline.run() pipeline.raise_from_status() golden_file = "golden_test_lower_case_urn_ingest.json" mce_helpers.check_golden_file( pytestconfig, output_path=f"{tmp_path}/powerbi_lower_case_urn_mces.json", golden_path=f"{test_resources_dir}/{golden_file}", ) @freeze_time(FROZEN_TIME) @mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca) @pytest.mark.integration def test_override_ownership( mock_msal: MagicMock, pytestconfig: pytest.Config, tmp_path: str, mock_time: datetime.datetime, requests_mock: Any, ) -> None: test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi" register_mock_api(request_mock=requests_mock) pipeline = Pipeline.create( { "run_id": "powerbi-test", "source": { "type": "powerbi", "config": { **default_source_config(), "extract_ownership": False, }, }, "sink": { "type": "file", "config": { "filename": f"{tmp_path}/powerbi_mces_disabled_ownership.json", }, }, } ) pipeline.run() pipeline.raise_from_status() mce_out_file = "golden_test_disabled_ownership.json" mce_helpers.check_golden_file( pytestconfig, output_path=f"{tmp_path}/powerbi_mces_disabled_ownership.json", golden_path=f"{test_resources_dir}/{mce_out_file}", ) @freeze_time(FROZEN_TIME) @mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca) @pytest.mark.integration def test_scan_all_workspaces( mock_msal: MagicMock, pytestconfig: pytest.Config, tmp_path: str, mock_time: datetime.datetime, requests_mock: Any, ) -> None: test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi" register_mock_api(request_mock=requests_mock) pipeline = Pipeline.create( { "run_id": "powerbi-test", "source": { "type": "powerbi", "config": { **default_source_config(), "extract_reports": False, "extract_ownership": False, "workspace_id_pattern": { "deny": ["64ED5CAD-7322-4684-8180-826122881108"], }, }, }, "sink": { "type": "file", "config": { "filename": f"{tmp_path}/powerbi_mces_scan_all_workspaces.json", }, }, } ) pipeline.run() pipeline.raise_from_status() golden_file = "golden_test_scan_all_workspaces.json" mce_helpers.check_golden_file( pytestconfig, output_path=f"{tmp_path}/powerbi_mces_scan_all_workspaces.json", golden_path=f"{test_resources_dir}/{golden_file}", ) @freeze_time(FROZEN_TIME) @mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca) @pytest.mark.integration def test_extract_reports( mock_msal: MagicMock, pytestconfig: pytest.Config, tmp_path: str, mock_time: datetime.datetime, requests_mock: Any, ) -> None: enable_logging() test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi" register_mock_api(request_mock=requests_mock) pipeline = Pipeline.create( { "run_id": "powerbi-test", "source": { "type": "powerbi", "config": { **default_source_config(), "extract_reports": True, }, }, "sink": { "type": "file", "config": { "filename": f"{tmp_path}/powerbi_report_mces.json", }, }, } ) pipeline.run() pipeline.raise_from_status() golden_file = "golden_test_report.json" mce_helpers.check_golden_file( pytestconfig, output_path=f"{tmp_path}/powerbi_report_mces.json", golden_path=f"{test_resources_dir}/{golden_file}", ) @freeze_time(FROZEN_TIME) @mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca) @pytest.mark.integration def test_extract_lineage( mock_msal: MagicMock, pytestconfig: pytest.Config, tmp_path: str, mock_time: datetime.datetime, requests_mock: Any, ) -> None: enable_logging() test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi" register_mock_api(request_mock=requests_mock) pipeline = Pipeline.create( { "run_id": "powerbi-lineage-test", "source": { "type": "powerbi", "config": { **default_source_config(), "extract_lineage": True, "dataset_type_mapping": { "PostgreSql": {"platform_instance": "operational_instance"}, "Oracle": { "platform_instance": "high_performance_production_unit" }, "Sql": {"platform_instance": "reporting-db"}, "Snowflake": {"platform_instance": "sn-2"}, }, }, }, "sink": { "type": "file", "config": { "filename": f"{tmp_path}/powerbi_lineage_mces.json", }, }, } ) pipeline.run() pipeline.raise_from_status() golden_file = "golden_test_lineage.json" mce_helpers.check_golden_file( pytestconfig, output_path=f"{tmp_path}/powerbi_lineage_mces.json", golden_path=f"{test_resources_dir}/{golden_file}", ) @freeze_time(FROZEN_TIME) @mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca) @pytest.mark.integration def test_extract_endorsements( mock_msal: MagicMock, pytestconfig: pytest.Config, tmp_path: str, mock_time: datetime.datetime, requests_mock: Any, ) -> None: test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi" register_mock_api(request_mock=requests_mock) pipeline = Pipeline.create( { "run_id": "powerbi-test", "source": { "type": "powerbi", "config": { **default_source_config(), "extract_reports": False, "extract_endorsements_to_tags": True, }, }, "sink": { "type": "file", "config": { "filename": f"{tmp_path}/powerbi_endorsement_mces.json", }, }, } ) pipeline.run() pipeline.raise_from_status() mce_out_file = "golden_test_endorsement.json" mce_helpers.check_golden_file( pytestconfig, output_path=f"{tmp_path}/powerbi_endorsement_mces.json", golden_path=f"{test_resources_dir}/{mce_out_file}", ) @freeze_time(FROZEN_TIME) @mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca) @pytest.mark.integration def test_admin_access_is_not_allowed( mock_msal: MagicMock, pytestconfig: pytest.Config, tmp_path: str, mock_time: datetime.datetime, requests_mock: Any, ) -> None: enable_logging() test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi" register_mock_api( request_mock=requests_mock, override_data={ "https://api.powerbi.com/v1.0/myorg/admin/workspaces/getInfo": { "method": "POST", "status_code": 403, "json": {}, }, }, ) pipeline = Pipeline.create( { "run_id": "powerbi-admin-api-disabled-test", "source": { "type": "powerbi", "config": { **default_source_config(), "extract_lineage": True, "dataset_type_mapping": { "PostgreSql": {"platform_instance": "operational_instance"}, "Oracle": { "platform_instance": "high_performance_production_unit" }, "Sql": {"platform_instance": "reporting-db"}, "Snowflake": {"platform_instance": "sn-2"}, }, }, }, "sink": { "type": "file", "config": { "filename": f"{tmp_path}/golden_test_admin_access_not_allowed_mces.json", }, }, } ) pipeline.run() pipeline.raise_from_status() golden_file = "golden_test_admin_access_not_allowed.json" mce_helpers.check_golden_file( pytestconfig, output_path=f"{tmp_path}/golden_test_admin_access_not_allowed_mces.json", golden_path=f"{test_resources_dir}/{golden_file}", ) @freeze_time(FROZEN_TIME) @mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca) def test_workspace_container( mock_msal: MagicMock, pytestconfig: pytest.Config, tmp_path: str, mock_time: datetime.datetime, requests_mock: Any, ) -> None: enable_logging() test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi" register_mock_api(request_mock=requests_mock) pipeline = Pipeline.create( { "run_id": "powerbi-test", "source": { "type": "powerbi", "config": { **default_source_config(), "workspace_id_pattern": { "deny": ["64ED5CAD-7322-4684-8180-826122881108"], }, "extract_workspaces_to_containers": True, "extract_datasets_to_containers": True, "extract_reports": True, }, }, "sink": { "type": "file", "config": { "filename": f"{tmp_path}/powerbi_container_mces.json", }, }, } ) pipeline.run() pipeline.raise_from_status() mce_out_file = "golden_test_container.json" mce_helpers.check_golden_file( pytestconfig, output_path=f"{tmp_path}/powerbi_container_mces.json", golden_path=f"{test_resources_dir}/{mce_out_file}", ) @mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca) def test_access_token_expiry_with_long_expiry( mock_msal: MagicMock, pytestconfig: pytest.Config, tmp_path: str, mock_time: datetime.datetime, requests_mock: Any, ) -> None: enable_logging() register_mock_api(request_mock=requests_mock) pipeline = Pipeline.create( { "run_id": "powerbi-test", "source": { "type": "powerbi", "config": { **default_source_config(), }, }, "sink": { "type": "file", "config": { "filename": f"{tmp_path}/powerbi_access_token_mces.json", }, }, } ) # for long expiry, the token should only be requested once. MsalClient.token = { "access_token": "dummy2", "expires_in": 3600, } MsalClient.reset() pipeline.run() # We expect the token to be requested twice (once for AdminApiResolver and one for RegularApiResolver) assert MsalClient.call_num == 2 @mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca) def test_access_token_expiry_with_short_expiry( mock_msal: MagicMock, pytestconfig: pytest.Config, tmp_path: str, mock_time: datetime.datetime, requests_mock: Any, ) -> None: enable_logging() register_mock_api(request_mock=requests_mock) pipeline = Pipeline.create( { "run_id": "powerbi-test", "source": { "type": "powerbi", "config": { **default_source_config(), }, }, "sink": { "type": "file", "config": { "filename": f"{tmp_path}/powerbi_access_token_mces.json", }, }, } ) # for short expiry, the token should be requested when expires. MsalClient.token = { "access_token": "dummy", "expires_in": 0, } pipeline.run() assert MsalClient.call_num > 2 def dataset_type_mapping_set_to_all_platform(pipeline: Pipeline) -> None: source_config: PowerBiDashboardSourceConfig = cast( PowerBiDashboardSource, pipeline.source ).source_config assert source_config.dataset_type_mapping is not None # Generate default dataset_type_mapping and compare it with source_config.dataset_type_mapping default_dataset_type_mapping: dict = {} for item in SupportedDataPlatform: default_dataset_type_mapping[ item.value.powerbi_data_platform_name ] = item.value.datahub_data_platform_name assert default_dataset_type_mapping == source_config.dataset_type_mapping @freeze_time(FROZEN_TIME) @mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca) @pytest.mark.integration def test_dataset_type_mapping_should_set_to_all( mock_msal, pytestconfig, tmp_path, mock_time, requests_mock ): """ Here we don't need to run the pipeline. We need to verify dataset_type_mapping is set to default dataplatform """ register_mock_api(request_mock=requests_mock) new_config: dict = {**default_source_config()} del new_config["dataset_type_mapping"] pipeline = Pipeline.create( { "run_id": "powerbi-test", "source": { "type": "powerbi", "config": { **new_config, }, }, "sink": { "type": "file", "config": { "filename": f"{tmp_path}/powerbi_lower_case_urn_mces.json", }, }, } ) dataset_type_mapping_set_to_all_platform(pipeline) @freeze_time(FROZEN_TIME) @mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca) @pytest.mark.integration def test_dataset_type_mapping_error( mock_msal, pytestconfig, tmp_path, mock_time, requests_mock ): """ Here we don't need to run the pipeline. We need to verify if both dataset_type_mapping and server_to_platform_instance are set then value error should get raised """ register_mock_api(request_mock=requests_mock) with pytest.raises(Exception, match=r"dataset_type_mapping is deprecated"): Pipeline.create( { "run_id": "powerbi-test", "source": { "type": "powerbi", "config": { **default_source_config(), "server_to_platform_instance": { "localhost": { "platform_instance": "test", } }, }, }, "sink": { "type": "file", "config": { "filename": f"{tmp_path}/powerbi_lower_case_urn_mces.json", }, }, } ) @freeze_time(FROZEN_TIME) @mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca) def test_server_to_platform_map( mock_msal, pytestconfig, tmp_path, mock_time, requests_mock ): enable_logging() test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi" new_config: dict = { **default_source_config(), "extract_lineage": True, "convert_lineage_urns_to_lowercase": True, } del new_config["dataset_type_mapping"] new_config["server_to_platform_instance"] = { "hp123rt5.ap-southeast-2.fakecomputing.com": { "platform_instance": "snowflake_production_instance", "env": "PROD", }, "my-test-project": { "platform_instance": "bigquery-computing-dev-account", "env": "QA", }, "localhost:1521": {"platform_instance": "oracle-sales-instance", "env": "PROD"}, } register_mock_api(request_mock=requests_mock) output_path: str = f"{tmp_path}/powerbi_server_to_platform_instance_mces.json" pipeline = Pipeline.create( { "run_id": "powerbi-test", "source": { "type": "powerbi", "config": new_config, }, "sink": { "type": "file", "config": { "filename": output_path, }, }, } ) pipeline.run() pipeline.raise_from_status() golden_file_path: str = ( f"{test_resources_dir}/golden_test_server_to_platform_instance.json" ) mce_helpers.check_golden_file( pytestconfig, output_path=output_path, golden_path=golden_file_path, ) # As server_to_platform_instance map is provided, the old dataset_type_mapping # should be set to all supported platform # to process all available upstream lineage even if mapping for platform instance is # not provided in server_to_platform_instance map dataset_type_mapping_set_to_all_platform(pipeline) def validate_pipeline(pipeline: Pipeline) -> None: mock_workspace: Workspace = Workspace( id="64ED5CAD-7C10-4684-8180-826122881108", name="demo-workspace", datasets={}, dashboards=[], reports=[], report_endorsements={}, dashboard_endorsements={}, scan_result={}, independent_datasets=[], ) # Fetch actual reports reports: List[Report] = cast( PowerBiDashboardSource, pipeline.source ).powerbi_client.get_reports(workspace=mock_workspace) assert len(reports) == 2 # Generate expected reports using mock reports mock_reports: List[Dict] = [ { "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", "id": "5b218778-e7a5-4d73-8187-f10824047715", "name": "SalesMarketing", "description": "Acryl sales marketing report", "pages": [ { "name": "ReportSection", "displayName": "Regional Sales Analysis", "order": "0", }, { "name": "ReportSection1", "displayName": "Geographic Analysis", "order": "1", }, ], }, { "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", "id": "e9fd6b0b-d8c8-4265-8c44-67e183aebf97", "name": "Product", "description": "Acryl product report", "pages": [], }, ] expected_reports: List[Report] = [ Report( id=report[Constant.ID], name=report[Constant.NAME], webUrl="", embedUrl="", description=report[Constant.DESCRIPTION], pages=[ Page( id="{}.{}".format( report[Constant.ID], page[Constant.NAME].replace(" ", "_") ), name=page[Constant.NAME], displayName=page[Constant.DISPLAY_NAME], order=page[Constant.ORDER], ) for page in report["pages"] ], users=[], tags=[], dataset=mock_workspace.datasets.get(report[Constant.DATASET_ID]), ) for report in mock_reports ] # Compare actual and expected reports for i in range(2): assert reports[i].id == expected_reports[i].id assert reports[i].name == expected_reports[i].name assert reports[i].description == expected_reports[i].description assert reports[i].dataset == expected_reports[i].dataset assert reports[i].pages == expected_reports[i].pages @freeze_time(FROZEN_TIME) @mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca) @pytest.mark.integration def test_reports_with_failed_page_request( mock_msal: MagicMock, pytestconfig: pytest.Config, tmp_path: str, mock_time: datetime.datetime, requests_mock: Any, ) -> None: """ Test that all reports are fetched even if a single page request fails """ register_mock_api( request_mock=requests_mock, override_data={ "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/reports": { "method": "GET", "status_code": 200, "json": { "value": [ { "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", "id": "5b218778-e7a5-4d73-8187-f10824047715", "name": "SalesMarketing", "description": "Acryl sales marketing report", "webUrl": "https://app.powerbi.com/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/5b218778-e7a5-4d73-8187-f10824047715", "embedUrl": "https://app.powerbi.com/reportEmbed?reportId=5b218778-e7a5-4d73-8187-f10824047715&groupId=64ED5CAD-7C10-4684-8180-826122881108", }, { "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", "id": "e9fd6b0b-d8c8-4265-8c44-67e183aebf97", "name": "Product", "description": "Acryl product report", "webUrl": "https://app.powerbi.com/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/e9fd6b0b-d8c8-4265-8c44-67e183aebf97", "embedUrl": "https://app.powerbi.com/reportEmbed?reportId=e9fd6b0b-d8c8-4265-8c44-67e183aebf97&groupId=64ED5CAD-7C10-4684-8180-826122881108", }, ] }, }, "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/5b218778-e7a5-4d73-8187-f10824047715": { "method": "GET", "status_code": 200, "json": { "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", "id": "5b218778-e7a5-4d73-8187-f10824047715", "name": "SalesMarketing", "description": "Acryl sales marketing report", "webUrl": "https://app.powerbi.com/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/5b218778-e7a5-4d73-8187-f10824047715", "embedUrl": "https://app.powerbi.com/reportEmbed?reportId=5b218778-e7a5-4d73-8187-f10824047715&groupId=64ED5CAD-7C10-4684-8180-826122881108", }, }, "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/e9fd6b0b-d8c8-4265-8c44-67e183aebf97": { "method": "GET", "status_code": 200, "json": { "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", "id": "e9fd6b0b-d8c8-4265-8c44-67e183aebf97", "name": "Product", "description": "Acryl product report", "webUrl": "https://app.powerbi.com/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/e9fd6b0b-d8c8-4265-8c44-67e183aebf97", "embedUrl": "https://app.powerbi.com/reportEmbed?reportId=e9fd6b0b-d8c8-4265-8c44-67e183aebf97&groupId=64ED5CAD-7C10-4684-8180-826122881108", }, }, "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/5b218778-e7a5-4d73-8187-f10824047715/pages": { "method": "GET", "status_code": 200, "json": { "value": [ { "displayName": "Regional Sales Analysis", "name": "ReportSection", "order": "0", }, { "displayName": "Geographic Analysis", "name": "ReportSection1", "order": "1", }, ] }, }, "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/e9fd6b0b-d8c8-4265-8c44-67e183aebf97/pages": { "method": "GET", "status_code": 400, "json": { "error": { "code": "InvalidRequest", "message": "Request is currently not supported for RDL reports", } }, }, }, ) pipeline = Pipeline.create( { "run_id": "powerbi-test", "source": { "type": "powerbi", "config": { **default_source_config(), "extract_reports": True, "platform_instance": "aws-ap-south-1", }, }, "sink": { "type": "file", "config": { "filename": f"{tmp_path}powerbi_reports_with_failed_page_request_mces.json", }, }, } ) validate_pipeline(pipeline) @freeze_time(FROZEN_TIME) @mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca) def test_independent_datasets_extraction( mock_msal: MagicMock, pytestconfig: pytest.Config, tmp_path: str, mock_time: datetime.datetime, requests_mock: Any, ) -> None: test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi" register_mock_api( request_mock=requests_mock, override_data={ "https://api.powerbi.com/v1.0/myorg/groups": { "method": "GET", "status_code": 200, "json": { "@odata.count": 3, "value": [ { "id": "64ED5CAD-7C10-4684-8180-826122881108", "isReadOnly": True, "name": "demo-workspace", "type": "Workspace", }, ], }, }, "https://api.powerbi.com/v1.0/myorg/admin/workspaces/scanResult/4674efd1-603c-4129-8d82-03cf2be05aff": { "method": "GET", "status_code": 200, "json": { "workspaces": [ { "id": "64ED5CAD-7C10-4684-8180-826122881108", "name": "demo-workspace", "state": "Active", "datasets": [ { "id": "91580e0e-1680-4b1c-bbf9-4f6764d7a5ff", "tables": [ { "name": "employee_ctc", "source": [ { "expression": "dummy", } ], } ], }, ], }, ] }, }, "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/dashboards": { "method": "GET", "status_code": 200, "json": {"value": []}, }, }, ) pipeline = Pipeline.create( { "run_id": "powerbi-test", "source": { "type": "powerbi", "config": { **default_source_config(), "extract_independent_datasets": True, }, }, "sink": { "type": "file", "config": { "filename": f"{tmp_path}/powerbi_independent_mces.json", }, }, } ) pipeline.run() pipeline.raise_from_status() golden_file = "golden_test_independent_datasets.json" mce_helpers.check_golden_file( pytestconfig, output_path=f"{tmp_path}/powerbi_independent_mces.json", golden_path=f"{test_resources_dir}/{golden_file}", ) @freeze_time(FROZEN_TIME) @mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca) def test_cll_extraction( mock_msal: MagicMock, pytestconfig: pytest.Config, tmp_path: str, mock_time: datetime.datetime, requests_mock: Any, ) -> None: test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi" register_mock_api( request_mock=requests_mock, ) default_conf: dict = default_source_config() del default_conf[ "dataset_type_mapping" ] # delete this key so that connector set it to default (all dataplatform) pipeline = Pipeline.create( { "run_id": "powerbi-test", "source": { "type": "powerbi", "config": { **default_conf, "extract_lineage": True, "extract_column_level_lineage": True, "enable_advance_lineage_sql_construct": True, "native_query_parsing": True, "extract_independent_datasets": True, }, }, "sink": { "type": "file", "config": { "filename": f"{tmp_path}/powerbi_cll_mces.json", }, }, } ) pipeline.run() pipeline.raise_from_status() golden_file = "golden_test_cll.json" mce_helpers.check_golden_file( pytestconfig, output_path=f"{tmp_path}/powerbi_cll_mces.json", golden_path=f"{test_resources_dir}/{golden_file}", ) @freeze_time(FROZEN_TIME) @mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca) def test_cll_extraction_flags( mock_msal: MagicMock, pytestconfig: pytest.Config, tmp_path: str, mock_time: datetime.datetime, requests_mock: Any, ) -> None: register_mock_api( request_mock=requests_mock, ) default_conf: dict = default_source_config() pattern: str = re.escape( "Enable all these flags in recipe: ['native_query_parsing', 'enable_advance_lineage_sql_construct', 'extract_lineage']" ) with pytest.raises(Exception, match=pattern): Pipeline.create( { "run_id": "powerbi-test", "source": { "type": "powerbi", "config": { **default_conf, "extract_column_level_lineage": True, }, }, "sink": { "type": "file", "config": { "filename": f"{tmp_path}/powerbi_cll_mces.json", }, }, } )