2023-12-14 18:41:50 +05:30
import datetime
2024-10-16 20:58:05 +05:30
import json
2023-10-04 16:22:51 +05:30
import re
2024-10-16 20:58:05 +05:30
from pathlib import Path
from typing import Any , Dict , List , Optional , Union , cast
2022-03-01 07:07:22 +05:30
from unittest import mock
2023-12-14 18:41:50 +05:30
from unittest . mock import MagicMock
2022-03-01 07:07:22 +05:30
2023-02-15 16:32:53 +05:30
import pytest
2022-03-01 07:07:22 +05:30
from freezegun import freeze_time
2024-10-16 20:58:05 +05:30
from datahub . ingestion . api . source import StructuredLogLevel
2022-03-01 07:07:22 +05:30
from datahub . ingestion . run . pipeline import Pipeline
2023-03-21 21:57:29 +05:30
from datahub . ingestion . source . powerbi . config import (
2023-05-20 06:32:55 +05:30
Constant ,
2023-03-21 21:57:29 +05:30
PowerBiDashboardSourceConfig ,
SupportedDataPlatform ,
)
from datahub . ingestion . source . powerbi . powerbi import PowerBiDashboardSource
2023-05-20 06:32:55 +05:30
from datahub . ingestion . source . powerbi . rest_api_wrapper . data_classes import (
Page ,
Report ,
2024-10-11 21:24:50 +05:30
ReportType ,
2023-05-20 06:32:55 +05:30
Workspace ,
)
2025-05-19 08:39:53 +02:00
from datahub . testing import mce_helpers
from tests . test_helpers import test_connection_helpers
2022-03-01 07:07:22 +05:30
2023-10-05 09:31:32 +05:30
pytestmark = pytest . mark . integration_batch_2
2022-03-01 07:07:22 +05:30
FROZEN_TIME = " 2022-02-03 07:00:00 "
2024-12-26 15:34:00 -05:00
def mock_msal_cca ( * args , * * kwargs ) :
class MsalClient :
def __init__ ( self ) :
self . call_num = 0
self . token : Dict [ str , Any ] = {
" access_token " : " dummy " ,
}
2023-12-14 18:41:50 +05:30
2024-12-26 15:34:00 -05:00
def acquire_token_for_client ( self , * args , * * kwargs ) :
self . call_num + = 1
return self . token
2023-12-14 18:41:50 +05:30
2024-12-26 15:34:00 -05:00
def reset ( self ) :
self . call_num = 0
2022-03-01 07:07:22 +05:30
2022-08-08 23:56:00 +05:30
return MsalClient ( )
2022-03-01 07:07:22 +05:30
2022-08-08 23:56:00 +05:30
2023-01-03 21:38:11 +05:30
def scan_init_response ( request , context ) :
# Request mock is passing POST input in the form of workspaces=<workspace_id>
2023-04-21 13:58:45 +08:00
# If we scan 2 or more, it get messy like this. 'workspaces=64ED5CAD-7C10-4684-8180-826122881108&workspaces=64ED5CAD-7C22-4684-8180-826122881108'
workspace_id_list = request . text . replace ( " & " , " " ) . split ( " workspaces= " )
workspace_id = " || " . join ( workspace_id_list [ 1 : ] )
2023-01-03 21:38:11 +05:30
w_id_vs_response : Dict [ str , Any ] = {
" 64ED5CAD-7C10-4684-8180-826122881108 " : {
" id " : " 4674efd1-603c-4129-8d82-03cf2be05aff "
} ,
" 64ED5CAD-7C22-4684-8180-826122881108 " : {
" id " : " a674efd1-603c-4129-8d82-03cf2be05aff "
} ,
2023-04-21 13:58:45 +08:00
" 64ED5CAD-7C10-4684-8180-826122881108||64ED5CAD-7C22-4684-8180-826122881108 " : {
" id " : " a674efd1-603c-4129-8d82-03cf2be05aff "
} ,
2024-10-16 20:58:05 +05:30
" A8D655A6-F521-477E-8C22-255018583BF4 " : {
" id " : " 62DAF926-0B18-4FF1-982C-2A3EB6B8F0E4 "
} ,
" C5DA6EA8-625E-4AB1-90B6-CAEA0BF9F492 " : {
" id " : " 81B02907-E2A3-45C3-B505-3781839C8CAA " ,
2024-10-11 21:24:50 +05:30
} ,
2024-10-29 13:49:37 +05:30
" 8F756DE6-26AD-45FF-A201-44276FF1F561 " : {
" id " : " 6147FCEB-7531-4449-8FB6-1F7A5431BF2D " ,
} ,
2023-01-03 21:38:11 +05:30
}
return w_id_vs_response [ workspace_id ]
2022-11-28 18:17:15 +02:00
2024-10-16 20:58:05 +05:30
def read_mock_data ( path : Union [ Path , str ] ) - > dict :
with open ( path ) as p :
return json . load ( p )
def register_mock_api (
pytestconfig : pytest . Config , request_mock : Any , override_data : Optional [ dict ] = None
) - > None :
default_mock_data_path = (
pytestconfig . rootpath
/ " tests/integration/powerbi/mock_data/default_mock_response.json "
)
2022-08-08 23:56:00 +05:30
api_vs_response = {
2022-11-28 18:17:15 +02:00
" https://api.powerbi.com/v1.0/myorg/admin/workspaces/getInfo " : {
" method " : " POST " ,
" status_code " : 200 ,
" json " : scan_init_response ,
} ,
2022-08-08 23:56:00 +05:30
}
2024-10-16 20:58:05 +05:30
api_vs_response . update ( read_mock_data ( default_mock_data_path ) )
api_vs_response . update ( override_data or { } )
2023-02-14 23:28:34 +05:30
2025-02-28 17:49:52 +05:30
for url in api_vs_response :
2022-08-08 23:56:00 +05:30
request_mock . register_uri (
api_vs_response [ url ] [ " method " ] ,
url ,
2024-10-11 21:24:50 +05:30
json = api_vs_response [ url ] . get ( " json " ) ,
text = api_vs_response [ url ] . get ( " text " ) ,
2022-08-08 23:56:00 +05:30
status_code = api_vs_response [ url ] [ " status_code " ] ,
2022-03-01 07:07:22 +05:30
)
2022-08-08 23:56:00 +05:30
def default_source_config ( ) :
return {
" client_id " : " foo " ,
" client_secret " : " bar " ,
" tenant_id " : " 0B0C960B-FCDF-4D0F-8C45-2E03BB59DDEB " ,
2023-01-03 21:38:11 +05:30
" workspace_id " : " 64ED5CAD-7C10-4684-8180-826122881108 " ,
" extract_lineage " : False ,
" extract_reports " : False ,
2023-02-14 23:28:34 +05:30
" extract_ownership " : True ,
2023-01-03 21:38:11 +05:30
" convert_lineage_urns_to_lowercase " : False ,
2022-11-28 18:17:15 +02:00
" workspace_id_pattern " : { " allow " : [ " 64ED5CAD-7C10-4684-8180-826122881108 " ] } ,
2022-08-08 23:56:00 +05:30
" dataset_type_mapping " : {
" PostgreSql " : " postgres " ,
" Oracle " : " oracle " ,
} ,
" env " : " DEV " ,
2023-01-25 01:06:50 +02:00
" extract_workspaces_to_containers " : False ,
2024-05-20 15:20:39 +05:30
" enable_advance_lineage_sql_construct " : False ,
2022-08-08 23:56:00 +05:30
}
@freeze_time ( FROZEN_TIME )
@mock.patch ( " msal.ConfidentialClientApplication " , side_effect = mock_msal_cca )
2023-02-15 16:32:53 +05:30
@pytest.mark.integration
2023-12-14 18:41:50 +05:30
def test_powerbi_ingest (
mock_msal : MagicMock ,
pytestconfig : pytest . Config ,
tmp_path : str ,
mock_time : datetime . datetime ,
requests_mock : Any ,
) - > None :
2022-08-08 23:56:00 +05:30
test_resources_dir = pytestconfig . rootpath / " tests/integration/powerbi "
2024-10-16 20:58:05 +05:30
register_mock_api ( pytestconfig = pytestconfig , request_mock = requests_mock )
2022-08-08 23:56:00 +05:30
pipeline = Pipeline . create (
{
" run_id " : " powerbi-test " ,
" source " : {
" type " : " powerbi " ,
" config " : {
* * default_source_config ( ) ,
} ,
} ,
" sink " : {
" type " : " file " ,
" config " : {
" filename " : f " { tmp_path } /powerbi_mces.json " ,
} ,
} ,
}
)
pipeline . run ( )
pipeline . raise_from_status ( )
2023-01-03 21:38:11 +05:30
golden_file = " golden_test_ingest.json "
2022-08-08 23:56:00 +05:30
mce_helpers . check_golden_file (
pytestconfig ,
2023-12-14 18:41:50 +05:30
output_path = f " { tmp_path } /powerbi_mces.json " ,
2023-01-03 21:38:11 +05:30
golden_path = f " { test_resources_dir } / { golden_file } " ,
)
2024-10-11 21:24:50 +05:30
@freeze_time ( FROZEN_TIME )
@mock.patch ( " msal.ConfidentialClientApplication " , side_effect = mock_msal_cca )
@pytest.mark.integration
def test_powerbi_workspace_type_filter (
mock_msal : MagicMock ,
pytestconfig : pytest . Config ,
tmp_path : str ,
mock_time : datetime . datetime ,
requests_mock : Any ,
) - > None :
test_resources_dir = pytestconfig . rootpath / " tests/integration/powerbi "
register_mock_api (
request_mock = requests_mock ,
2024-10-16 20:58:05 +05:30
pytestconfig = pytestconfig ,
override_data = read_mock_data (
pytestconfig . rootpath
/ " tests/integration/powerbi/mock_data/workspace_type_filter.json "
) ,
2024-10-11 21:24:50 +05:30
)
default_config : dict = default_source_config ( )
del default_config [ " workspace_id " ]
del default_config [ " workspace_id_pattern " ]
pipeline = Pipeline . create (
{
" run_id " : " powerbi-test " ,
" source " : {
" type " : " powerbi " ,
" config " : {
* * default_config ,
" extract_workspaces_to_containers " : True ,
" workspace_type_filter " : [
" PersonalGroup " ,
] ,
} ,
} ,
" sink " : {
" type " : " file " ,
" config " : {
" filename " : f " { tmp_path } /powerbi_mces.json " ,
} ,
} ,
}
)
pipeline . run ( )
pipeline . raise_from_status ( )
golden_file = " golden_test_personal_ingest.json "
mce_helpers . check_golden_file (
pytestconfig ,
output_path = f " { tmp_path } /powerbi_mces.json " ,
golden_path = f " { test_resources_dir } / { golden_file } " ,
)
2024-08-01 00:10:09 +05:30
@freeze_time ( FROZEN_TIME )
@mock.patch ( " msal.ConfidentialClientApplication " , side_effect = mock_msal_cca )
@pytest.mark.integration
def test_powerbi_ingest_patch_disabled (
mock_msal : MagicMock ,
pytestconfig : pytest . Config ,
tmp_path : str ,
mock_time : datetime . datetime ,
requests_mock : Any ,
) - > None :
test_resources_dir = pytestconfig . rootpath / " tests/integration/powerbi "
2024-10-16 20:58:05 +05:30
register_mock_api ( pytestconfig = pytestconfig , request_mock = requests_mock )
2024-08-01 00:10:09 +05:30
pipeline = Pipeline . create (
{
" run_id " : " powerbi-test " ,
" source " : {
" type " : " powerbi " ,
" config " : {
* * default_source_config ( ) ,
" patch_metadata " : False ,
} ,
} ,
" sink " : {
" type " : " file " ,
" config " : {
" filename " : f " { tmp_path } /powerbi_mces.json " ,
} ,
} ,
}
)
pipeline . run ( )
pipeline . raise_from_status ( )
golden_file = " golden_test_ingest_patch_disabled.json "
mce_helpers . check_golden_file (
pytestconfig ,
output_path = f " { tmp_path } /powerbi_mces.json " ,
golden_path = f " { test_resources_dir } / { golden_file } " ,
)
2023-12-14 23:01:51 +05:30
@freeze_time ( FROZEN_TIME )
@mock.patch ( " msal.ConfidentialClientApplication " , side_effect = mock_msal_cca )
@pytest.mark.integration
def test_powerbi_test_connection_success ( mock_msal ) :
report = test_connection_helpers . run_test_connection (
PowerBiDashboardSource , default_source_config ( )
)
test_connection_helpers . assert_basic_connectivity_success ( report )
@freeze_time ( FROZEN_TIME )
@pytest.mark.integration
def test_powerbi_test_connection_failure ( ) :
report = test_connection_helpers . run_test_connection (
PowerBiDashboardSource , default_source_config ( )
)
test_connection_helpers . assert_basic_connectivity_failure (
report , " Unable to get authority configuration "
)
2023-03-21 21:37:31 +05:30
@freeze_time ( FROZEN_TIME )
@mock.patch ( " msal.ConfidentialClientApplication " , side_effect = mock_msal_cca )
@pytest.mark.integration
def test_powerbi_platform_instance_ingest (
2023-12-14 18:41:50 +05:30
mock_msal : MagicMock ,
pytestconfig : pytest . Config ,
tmp_path : str ,
mock_time : datetime . datetime ,
requests_mock : Any ,
) - > None :
2023-03-21 21:37:31 +05:30
test_resources_dir = pytestconfig . rootpath / " tests/integration/powerbi "
2024-10-16 20:58:05 +05:30
register_mock_api ( pytestconfig = pytestconfig , request_mock = requests_mock )
2023-03-21 21:37:31 +05:30
output_path : str = f " { tmp_path } /powerbi_platform_instance_mces.json "
pipeline = Pipeline . create (
{
" run_id " : " powerbi-test " ,
" source " : {
" type " : " powerbi " ,
" config " : {
* * default_source_config ( ) ,
" platform_instance " : " aws-ap-south-1 " ,
} ,
} ,
" sink " : {
" type " : " file " ,
" config " : {
" filename " : output_path ,
} ,
} ,
}
)
pipeline . run ( )
pipeline . raise_from_status ( )
golden_file = " golden_test_platform_instance_ingest.json "
mce_helpers . check_golden_file (
pytestconfig ,
output_path = output_path ,
golden_path = f " { test_resources_dir } / { golden_file } " ,
)
2023-01-03 21:38:11 +05:30
@freeze_time ( FROZEN_TIME )
@mock.patch ( " msal.ConfidentialClientApplication " , side_effect = mock_msal_cca )
2023-02-15 16:32:53 +05:30
@pytest.mark.integration
2023-01-03 21:38:11 +05:30
def test_powerbi_ingest_urn_lower_case (
2023-12-14 18:41:50 +05:30
mock_msal : MagicMock ,
pytestconfig : pytest . Config ,
tmp_path : str ,
mock_time : datetime . datetime ,
requests_mock : Any ,
) - > None :
2023-01-03 21:38:11 +05:30
test_resources_dir = pytestconfig . rootpath / " tests/integration/powerbi "
2024-10-16 20:58:05 +05:30
register_mock_api ( pytestconfig = pytestconfig , request_mock = requests_mock )
2023-01-03 21:38:11 +05:30
pipeline = Pipeline . create (
{
" run_id " : " powerbi-test " ,
" source " : {
" type " : " powerbi " ,
" config " : {
* * default_source_config ( ) ,
2024-08-07 09:32:21 +05:30
" env " : " PROD " ,
" platform_instance " : " myPlatformInstance " ,
2023-01-03 21:38:11 +05:30
" convert_urns_to_lowercase " : True ,
" convert_lineage_urns_to_lowercase " : True ,
} ,
} ,
" sink " : {
" type " : " file " ,
" config " : {
" filename " : f " { tmp_path } /powerbi_lower_case_urn_mces.json " ,
} ,
} ,
}
)
pipeline . run ( )
pipeline . raise_from_status ( )
golden_file = " golden_test_lower_case_urn_ingest.json "
mce_helpers . check_golden_file (
pytestconfig ,
output_path = f " { tmp_path } /powerbi_lower_case_urn_mces.json " ,
golden_path = f " { test_resources_dir } / { golden_file } " ,
2022-08-08 23:56:00 +05:30
)
@freeze_time ( FROZEN_TIME )
@mock.patch ( " msal.ConfidentialClientApplication " , side_effect = mock_msal_cca )
2023-02-15 16:32:53 +05:30
@pytest.mark.integration
2022-08-08 23:56:00 +05:30
def test_override_ownership (
2023-12-14 18:41:50 +05:30
mock_msal : MagicMock ,
pytestconfig : pytest . Config ,
tmp_path : str ,
mock_time : datetime . datetime ,
requests_mock : Any ,
) - > None :
2022-08-08 23:56:00 +05:30
test_resources_dir = pytestconfig . rootpath / " tests/integration/powerbi "
2024-10-16 20:58:05 +05:30
register_mock_api ( pytestconfig = pytestconfig , request_mock = requests_mock )
2022-08-08 23:56:00 +05:30
pipeline = Pipeline . create (
{
" run_id " : " powerbi-test " ,
" source " : {
" type " : " powerbi " ,
" config " : {
* * default_source_config ( ) ,
" extract_ownership " : False ,
} ,
} ,
" sink " : {
" type " : " file " ,
" config " : {
" filename " : f " { tmp_path } /powerbi_mces_disabled_ownership.json " ,
} ,
} ,
}
)
pipeline . run ( )
pipeline . raise_from_status ( )
mce_out_file = " golden_test_disabled_ownership.json "
mce_helpers . check_golden_file (
pytestconfig ,
2023-12-14 18:41:50 +05:30
output_path = f " { tmp_path } /powerbi_mces_disabled_ownership.json " ,
2022-08-08 23:56:00 +05:30
golden_path = f " { test_resources_dir } / { mce_out_file } " ,
)
2022-11-17 12:22:23 +05:30
2022-11-28 18:17:15 +02:00
@freeze_time ( FROZEN_TIME )
@mock.patch ( " msal.ConfidentialClientApplication " , side_effect = mock_msal_cca )
2023-02-15 16:32:53 +05:30
@pytest.mark.integration
2022-11-28 18:17:15 +02:00
def test_scan_all_workspaces (
2023-12-14 18:41:50 +05:30
mock_msal : MagicMock ,
pytestconfig : pytest . Config ,
tmp_path : str ,
mock_time : datetime . datetime ,
requests_mock : Any ,
) - > None :
2022-11-28 18:17:15 +02:00
test_resources_dir = pytestconfig . rootpath / " tests/integration/powerbi "
2024-10-16 20:58:05 +05:30
register_mock_api ( pytestconfig = pytestconfig , request_mock = requests_mock )
2022-11-28 18:17:15 +02:00
pipeline = Pipeline . create (
{
" run_id " : " powerbi-test " ,
" source " : {
" type " : " powerbi " ,
" config " : {
* * default_source_config ( ) ,
" extract_reports " : False ,
" extract_ownership " : False ,
" workspace_id_pattern " : {
" deny " : [ " 64ED5CAD-7322-4684-8180-826122881108 " ] ,
} ,
} ,
} ,
" sink " : {
" type " : " file " ,
" config " : {
" filename " : f " { tmp_path } /powerbi_mces_scan_all_workspaces.json " ,
} ,
} ,
}
)
pipeline . run ( )
pipeline . raise_from_status ( )
2023-01-03 21:38:11 +05:30
golden_file = " golden_test_scan_all_workspaces.json "
2022-11-28 18:17:15 +02:00
mce_helpers . check_golden_file (
pytestconfig ,
2023-12-14 18:41:50 +05:30
output_path = f " { tmp_path } /powerbi_mces_scan_all_workspaces.json " ,
2023-01-03 21:38:11 +05:30
golden_path = f " { test_resources_dir } / { golden_file } " ,
2022-11-28 18:17:15 +02:00
)
2022-11-17 12:22:23 +05:30
@freeze_time ( FROZEN_TIME )
@mock.patch ( " msal.ConfidentialClientApplication " , side_effect = mock_msal_cca )
2023-02-15 16:32:53 +05:30
@pytest.mark.integration
2023-12-14 18:41:50 +05:30
def test_extract_reports (
mock_msal : MagicMock ,
pytestconfig : pytest . Config ,
tmp_path : str ,
mock_time : datetime . datetime ,
requests_mock : Any ,
) - > None :
2022-11-17 12:22:23 +05:30
test_resources_dir = pytestconfig . rootpath / " tests/integration/powerbi "
2024-10-16 20:58:05 +05:30
register_mock_api ( pytestconfig = pytestconfig , request_mock = requests_mock )
2022-11-17 12:22:23 +05:30
pipeline = Pipeline . create (
{
" run_id " : " powerbi-test " ,
" source " : {
" type " : " powerbi " ,
" config " : {
* * default_source_config ( ) ,
2023-01-03 21:38:11 +05:30
" extract_reports " : True ,
2022-11-17 12:22:23 +05:30
} ,
} ,
" sink " : {
" type " : " file " ,
" config " : {
" filename " : f " { tmp_path } /powerbi_report_mces.json " ,
} ,
} ,
}
)
pipeline . run ( )
pipeline . raise_from_status ( )
2023-01-03 21:38:11 +05:30
golden_file = " golden_test_report.json "
2022-11-17 12:22:23 +05:30
mce_helpers . check_golden_file (
pytestconfig ,
2023-12-14 18:41:50 +05:30
output_path = f " { tmp_path } /powerbi_report_mces.json " ,
2023-01-03 21:38:11 +05:30
golden_path = f " { test_resources_dir } / { golden_file } " ,
)
@freeze_time ( FROZEN_TIME )
@mock.patch ( " msal.ConfidentialClientApplication " , side_effect = mock_msal_cca )
2023-02-15 16:32:53 +05:30
@pytest.mark.integration
2023-12-14 18:41:50 +05:30
def test_extract_lineage (
mock_msal : MagicMock ,
pytestconfig : pytest . Config ,
tmp_path : str ,
mock_time : datetime . datetime ,
requests_mock : Any ,
) - > None :
2023-01-03 21:38:11 +05:30
test_resources_dir = pytestconfig . rootpath / " tests/integration/powerbi "
2024-10-16 20:58:05 +05:30
register_mock_api ( pytestconfig = pytestconfig , request_mock = requests_mock )
2023-01-03 21:38:11 +05:30
pipeline = Pipeline . create (
{
" run_id " : " powerbi-lineage-test " ,
" source " : {
" type " : " powerbi " ,
" config " : {
* * default_source_config ( ) ,
" extract_lineage " : True ,
" dataset_type_mapping " : {
" PostgreSql " : { " platform_instance " : " operational_instance " } ,
" Oracle " : {
" platform_instance " : " high_performance_production_unit "
} ,
" Sql " : { " platform_instance " : " reporting-db " } ,
" Snowflake " : { " platform_instance " : " sn-2 " } ,
} ,
} ,
} ,
" sink " : {
" type " : " file " ,
" config " : {
" filename " : f " { tmp_path } /powerbi_lineage_mces.json " ,
} ,
} ,
}
)
pipeline . run ( )
pipeline . raise_from_status ( )
golden_file = " golden_test_lineage.json "
mce_helpers . check_golden_file (
pytestconfig ,
output_path = f " { tmp_path } /powerbi_lineage_mces.json " ,
golden_path = f " { test_resources_dir } / { golden_file } " ,
2022-11-17 12:22:23 +05:30
)
2023-01-18 05:47:15 +02:00
@freeze_time ( FROZEN_TIME )
@mock.patch ( " msal.ConfidentialClientApplication " , side_effect = mock_msal_cca )
2023-02-15 16:32:53 +05:30
@pytest.mark.integration
2023-01-18 05:47:15 +02:00
def test_extract_endorsements (
2023-12-14 18:41:50 +05:30
mock_msal : MagicMock ,
pytestconfig : pytest . Config ,
tmp_path : str ,
mock_time : datetime . datetime ,
requests_mock : Any ,
) - > None :
2023-01-18 05:47:15 +02:00
test_resources_dir = pytestconfig . rootpath / " tests/integration/powerbi "
2024-10-16 20:58:05 +05:30
register_mock_api ( pytestconfig = pytestconfig , request_mock = requests_mock )
2023-01-18 05:47:15 +02:00
pipeline = Pipeline . create (
{
" run_id " : " powerbi-test " ,
" source " : {
" type " : " powerbi " ,
" config " : {
* * default_source_config ( ) ,
" extract_reports " : False ,
" extract_endorsements_to_tags " : True ,
} ,
} ,
" sink " : {
" type " : " file " ,
" config " : {
" filename " : f " { tmp_path } /powerbi_endorsement_mces.json " ,
} ,
} ,
}
)
pipeline . run ( )
pipeline . raise_from_status ( )
mce_out_file = " golden_test_endorsement.json "
mce_helpers . check_golden_file (
pytestconfig ,
2023-12-14 18:41:50 +05:30
output_path = f " { tmp_path } /powerbi_endorsement_mces.json " ,
2023-01-18 05:47:15 +02:00
golden_path = f " { test_resources_dir } / { mce_out_file } " ,
)
2023-01-25 01:06:50 +02:00
2023-02-14 23:28:34 +05:30
@freeze_time ( FROZEN_TIME )
@mock.patch ( " msal.ConfidentialClientApplication " , side_effect = mock_msal_cca )
2023-02-15 16:32:53 +05:30
@pytest.mark.integration
2023-02-14 23:28:34 +05:30
def test_admin_access_is_not_allowed (
2023-12-14 18:41:50 +05:30
mock_msal : MagicMock ,
pytestconfig : pytest . Config ,
tmp_path : str ,
mock_time : datetime . datetime ,
requests_mock : Any ,
) - > None :
2023-02-14 23:28:34 +05:30
test_resources_dir = pytestconfig . rootpath / " tests/integration/powerbi "
register_mock_api (
2024-10-16 20:58:05 +05:30
pytestconfig = pytestconfig ,
2023-02-14 23:28:34 +05:30
request_mock = requests_mock ,
override_data = {
" https://api.powerbi.com/v1.0/myorg/admin/workspaces/getInfo " : {
" method " : " POST " ,
" status_code " : 403 ,
" json " : { } ,
} ,
} ,
)
pipeline = Pipeline . create (
{
" run_id " : " powerbi-admin-api-disabled-test " ,
" source " : {
" type " : " powerbi " ,
" config " : {
* * default_source_config ( ) ,
" extract_lineage " : True ,
" dataset_type_mapping " : {
" PostgreSql " : { " platform_instance " : " operational_instance " } ,
" Oracle " : {
" platform_instance " : " high_performance_production_unit "
} ,
" Sql " : { " platform_instance " : " reporting-db " } ,
" Snowflake " : { " platform_instance " : " sn-2 " } ,
} ,
} ,
} ,
" sink " : {
" type " : " file " ,
" config " : {
" filename " : f " { tmp_path } /golden_test_admin_access_not_allowed_mces.json " ,
} ,
} ,
}
)
pipeline . run ( )
pipeline . raise_from_status ( )
golden_file = " golden_test_admin_access_not_allowed.json "
mce_helpers . check_golden_file (
pytestconfig ,
output_path = f " { tmp_path } /golden_test_admin_access_not_allowed_mces.json " ,
golden_path = f " { test_resources_dir } / { golden_file } " ,
)
2023-01-25 01:06:50 +02:00
@freeze_time ( FROZEN_TIME )
@mock.patch ( " msal.ConfidentialClientApplication " , side_effect = mock_msal_cca )
def test_workspace_container (
2023-12-14 18:41:50 +05:30
mock_msal : MagicMock ,
pytestconfig : pytest . Config ,
tmp_path : str ,
mock_time : datetime . datetime ,
requests_mock : Any ,
) - > None :
2023-01-25 01:06:50 +02:00
test_resources_dir = pytestconfig . rootpath / " tests/integration/powerbi "
2024-10-16 20:58:05 +05:30
register_mock_api ( pytestconfig = pytestconfig , request_mock = requests_mock )
2023-01-25 01:06:50 +02:00
pipeline = Pipeline . create (
{
" run_id " : " powerbi-test " ,
" source " : {
" type " : " powerbi " ,
" config " : {
* * default_source_config ( ) ,
2023-11-10 02:48:06 +02:00
" workspace_id_pattern " : {
" deny " : [ " 64ED5CAD-7322-4684-8180-826122881108 " ] ,
} ,
2023-01-25 01:06:50 +02:00
" extract_workspaces_to_containers " : True ,
2023-11-10 02:48:06 +02:00
" extract_datasets_to_containers " : True ,
2023-01-25 01:06:50 +02:00
" extract_reports " : True ,
} ,
} ,
" sink " : {
" type " : " file " ,
" config " : {
" filename " : f " { tmp_path } /powerbi_container_mces.json " ,
} ,
} ,
}
)
pipeline . run ( )
pipeline . raise_from_status ( )
mce_out_file = " golden_test_container.json "
mce_helpers . check_golden_file (
pytestconfig ,
2023-12-14 18:41:50 +05:30
output_path = f " { tmp_path } /powerbi_container_mces.json " ,
2023-01-25 01:06:50 +02:00
golden_path = f " { test_resources_dir } / { mce_out_file } " ,
)
2023-03-21 21:57:29 +05:30
2023-12-14 18:41:50 +05:30
def test_access_token_expiry_with_long_expiry (
pytestconfig : pytest . Config ,
tmp_path : str ,
mock_time : datetime . datetime ,
requests_mock : Any ,
) - > None :
2024-10-16 20:58:05 +05:30
register_mock_api ( pytestconfig = pytestconfig , request_mock = requests_mock )
2023-12-14 18:41:50 +05:30
2024-12-26 15:34:00 -05:00
mock_msal = mock_msal_cca ( )
with mock . patch ( " msal.ConfidentialClientApplication " , return_value = mock_msal ) :
pipeline = Pipeline . create (
{
" run_id " : " powerbi-test " ,
" source " : {
" type " : " powerbi " ,
" config " : {
* * default_source_config ( ) ,
} ,
2023-12-14 18:41:50 +05:30
} ,
2024-12-26 15:34:00 -05:00
" sink " : {
" type " : " file " ,
" config " : {
" filename " : f " { tmp_path } /powerbi_access_token_mces.json " ,
} ,
2023-12-14 18:41:50 +05:30
} ,
2024-12-26 15:34:00 -05:00
}
)
2023-12-14 18:41:50 +05:30
# for long expiry, the token should only be requested once.
2024-12-26 15:34:00 -05:00
mock_msal . token = {
2023-12-14 18:41:50 +05:30
" access_token " : " dummy2 " ,
" expires_in " : 3600 ,
}
2024-12-26 15:34:00 -05:00
mock_msal . reset ( )
2023-12-14 18:41:50 +05:30
pipeline . run ( )
# We expect the token to be requested twice (once for AdminApiResolver and one for RegularApiResolver)
2024-12-26 15:34:00 -05:00
assert mock_msal . call_num == 2
2023-12-14 18:41:50 +05:30
def test_access_token_expiry_with_short_expiry (
pytestconfig : pytest . Config ,
tmp_path : str ,
mock_time : datetime . datetime ,
requests_mock : Any ,
) - > None :
2024-10-16 20:58:05 +05:30
register_mock_api ( pytestconfig = pytestconfig , request_mock = requests_mock )
2023-12-14 18:41:50 +05:30
2024-12-26 15:34:00 -05:00
mock_msal = mock_msal_cca ( )
with mock . patch ( " msal.ConfidentialClientApplication " , return_value = mock_msal ) :
pipeline = Pipeline . create (
{
" run_id " : " powerbi-test " ,
" source " : {
" type " : " powerbi " ,
" config " : {
* * default_source_config ( ) ,
} ,
2023-12-14 18:41:50 +05:30
} ,
2024-12-26 15:34:00 -05:00
" sink " : {
" type " : " file " ,
" config " : {
" filename " : f " { tmp_path } /powerbi_access_token_mces.json " ,
} ,
2023-12-14 18:41:50 +05:30
} ,
2024-12-26 15:34:00 -05:00
}
)
2023-12-14 18:41:50 +05:30
# for short expiry, the token should be requested when expires.
2024-12-26 15:34:00 -05:00
mock_msal . token = {
2023-12-14 18:41:50 +05:30
" access_token " : " dummy " ,
" expires_in " : 0 ,
}
2024-12-26 15:34:00 -05:00
mock_msal . reset ( )
2023-12-14 18:41:50 +05:30
pipeline . run ( )
2024-12-26 15:34:00 -05:00
assert mock_msal . call_num > 2
2023-12-14 18:41:50 +05:30
2023-03-21 21:57:29 +05:30
def dataset_type_mapping_set_to_all_platform ( pipeline : Pipeline ) - > None :
source_config : PowerBiDashboardSourceConfig = cast (
PowerBiDashboardSource , pipeline . source
) . source_config
assert source_config . dataset_type_mapping is not None
# Generate default dataset_type_mapping and compare it with source_config.dataset_type_mapping
default_dataset_type_mapping : dict = { }
for item in SupportedDataPlatform :
2025-01-18 15:06:20 +05:30
default_dataset_type_mapping [ item . value . powerbi_data_platform_name ] = (
item . value . datahub_data_platform_name
)
2023-03-21 21:57:29 +05:30
assert default_dataset_type_mapping == source_config . dataset_type_mapping
@freeze_time ( FROZEN_TIME )
@mock.patch ( " msal.ConfidentialClientApplication " , side_effect = mock_msal_cca )
@pytest.mark.integration
def test_dataset_type_mapping_should_set_to_all (
mock_msal , pytestconfig , tmp_path , mock_time , requests_mock
) :
"""
Here we don ' t need to run the pipeline. We need to verify dataset_type_mapping is set to default dataplatform
"""
2024-10-16 20:58:05 +05:30
register_mock_api ( pytestconfig = pytestconfig , request_mock = requests_mock )
2023-03-21 21:57:29 +05:30
new_config : dict = { * * default_source_config ( ) }
del new_config [ " dataset_type_mapping " ]
pipeline = Pipeline . create (
{
" run_id " : " powerbi-test " ,
" source " : {
" type " : " powerbi " ,
" config " : {
* * new_config ,
} ,
} ,
" sink " : {
" type " : " file " ,
" config " : {
" filename " : f " { tmp_path } /powerbi_lower_case_urn_mces.json " ,
} ,
} ,
}
)
dataset_type_mapping_set_to_all_platform ( pipeline )
@freeze_time ( FROZEN_TIME )
@mock.patch ( " msal.ConfidentialClientApplication " , side_effect = mock_msal_cca )
@pytest.mark.integration
def test_dataset_type_mapping_error (
mock_msal , pytestconfig , tmp_path , mock_time , requests_mock
) :
"""
Here we don ' t need to run the pipeline. We need to verify if both dataset_type_mapping and server_to_platform_instance
are set then value error should get raised
"""
2024-10-16 20:58:05 +05:30
register_mock_api ( pytestconfig = pytestconfig , request_mock = requests_mock )
2023-03-21 21:57:29 +05:30
2023-10-04 16:22:51 +05:30
with pytest . raises ( Exception , match = r " dataset_type_mapping is deprecated " ) :
2023-03-21 21:57:29 +05:30
Pipeline . create (
{
" run_id " : " powerbi-test " ,
" source " : {
" type " : " powerbi " ,
" config " : {
* * default_source_config ( ) ,
" server_to_platform_instance " : {
" localhost " : {
" platform_instance " : " test " ,
}
} ,
} ,
} ,
" sink " : {
" type " : " file " ,
" config " : {
" filename " : f " { tmp_path } /powerbi_lower_case_urn_mces.json " ,
} ,
} ,
}
)
@freeze_time ( FROZEN_TIME )
@mock.patch ( " msal.ConfidentialClientApplication " , side_effect = mock_msal_cca )
def test_server_to_platform_map (
mock_msal , pytestconfig , tmp_path , mock_time , requests_mock
) :
test_resources_dir = pytestconfig . rootpath / " tests/integration/powerbi "
new_config : dict = {
* * default_source_config ( ) ,
" extract_lineage " : True ,
" convert_lineage_urns_to_lowercase " : True ,
}
del new_config [ " dataset_type_mapping " ]
new_config [ " server_to_platform_instance " ] = {
" hp123rt5.ap-southeast-2.fakecomputing.com " : {
" platform_instance " : " snowflake_production_instance " ,
" env " : " PROD " ,
} ,
" my-test-project " : {
" platform_instance " : " bigquery-computing-dev-account " ,
" env " : " QA " ,
} ,
" localhost:1521 " : { " platform_instance " : " oracle-sales-instance " , " env " : " PROD " } ,
}
2024-10-16 20:58:05 +05:30
register_mock_api ( pytestconfig = pytestconfig , request_mock = requests_mock )
2023-03-21 21:57:29 +05:30
output_path : str = f " { tmp_path } /powerbi_server_to_platform_instance_mces.json "
pipeline = Pipeline . create (
{
" run_id " : " powerbi-test " ,
" source " : {
" type " : " powerbi " ,
" config " : new_config ,
} ,
" sink " : {
" type " : " file " ,
" config " : {
" filename " : output_path ,
} ,
} ,
}
)
pipeline . run ( )
pipeline . raise_from_status ( )
golden_file_path : str = (
f " { test_resources_dir } /golden_test_server_to_platform_instance.json "
)
mce_helpers . check_golden_file (
pytestconfig ,
output_path = output_path ,
golden_path = golden_file_path ,
)
# As server_to_platform_instance map is provided, the old dataset_type_mapping
# should be set to all supported platform
# to process all available upstream lineage even if mapping for platform instance is
# not provided in server_to_platform_instance map
dataset_type_mapping_set_to_all_platform ( pipeline )
2023-05-20 06:32:55 +05:30
def validate_pipeline ( pipeline : Pipeline ) - > None :
mock_workspace : Workspace = Workspace (
id = " 64ED5CAD-7C10-4684-8180-826122881108 " ,
name = " demo-workspace " ,
2024-10-11 21:24:50 +05:30
type = " Workspace " ,
2023-05-20 06:32:55 +05:30
datasets = { } ,
2025-01-23 08:41:46 +01:00
dashboards = { } ,
reports = { } ,
2023-05-20 06:32:55 +05:30
report_endorsements = { } ,
dashboard_endorsements = { } ,
scan_result = { } ,
2025-01-23 08:41:46 +01:00
independent_datasets = { } ,
2024-10-29 13:49:37 +05:30
app = None ,
2023-05-20 06:32:55 +05:30
)
# Fetch actual reports
2025-01-23 08:41:46 +01:00
reports : Dict [ str , Report ] = cast (
2023-05-20 06:32:55 +05:30
PowerBiDashboardSource , pipeline . source
) . powerbi_client . get_reports ( workspace = mock_workspace )
assert len ( reports ) == 2
# Generate expected reports using mock reports
mock_reports : List [ Dict ] = [
{
" datasetId " : " 05169CD2-E713-41E6-9600-1D8066D95445 " ,
" id " : " 5b218778-e7a5-4d73-8187-f10824047715 " ,
" name " : " SalesMarketing " ,
" description " : " Acryl sales marketing report " ,
" pages " : [
{
" name " : " ReportSection " ,
" displayName " : " Regional Sales Analysis " ,
" order " : " 0 " ,
} ,
{
" name " : " ReportSection1 " ,
" displayName " : " Geographic Analysis " ,
" order " : " 1 " ,
} ,
] ,
} ,
{
" datasetId " : " 05169CD2-E713-41E6-9600-1D8066D95445 " ,
" id " : " e9fd6b0b-d8c8-4265-8c44-67e183aebf97 " ,
" name " : " Product " ,
" description " : " Acryl product report " ,
" pages " : [ ] ,
} ,
]
2025-01-23 08:41:46 +01:00
expected_reports : Dict [ str , Report ] = {
report [ Constant . ID ] : Report (
2023-05-20 06:32:55 +05:30
id = report [ Constant . ID ] ,
name = report [ Constant . NAME ] ,
2024-10-11 21:24:50 +05:30
type = ReportType . PowerBIReport ,
2023-05-20 06:32:55 +05:30
webUrl = " " ,
embedUrl = " " ,
description = report [ Constant . DESCRIPTION ] ,
pages = [
Page (
id = " {} . {} " . format (
report [ Constant . ID ] , page [ Constant . NAME ] . replace ( " " , " _ " )
) ,
name = page [ Constant . NAME ] ,
displayName = page [ Constant . DISPLAY_NAME ] ,
order = page [ Constant . ORDER ] ,
)
for page in report [ " pages " ]
] ,
users = [ ] ,
tags = [ ] ,
2024-10-16 20:58:05 +05:30
dataset_id = report [ Constant . DATASET_ID ] ,
2023-05-20 06:32:55 +05:30
dataset = mock_workspace . datasets . get ( report [ Constant . DATASET_ID ] ) ,
)
for report in mock_reports
2025-01-23 08:41:46 +01:00
}
2023-05-20 06:32:55 +05:30
# Compare actual and expected reports
for i in range ( 2 ) :
2025-01-23 08:41:46 +01:00
report_id = mock_reports [ i ] [ Constant . ID ]
assert reports [ report_id ] . id == expected_reports [ report_id ] . id
assert reports [ report_id ] . name == expected_reports [ report_id ] . name
assert reports [ report_id ] . description == expected_reports [ report_id ] . description
assert reports [ report_id ] . dataset == expected_reports [ report_id ] . dataset
assert reports [ report_id ] . pages == expected_reports [ report_id ] . pages
2023-05-20 06:32:55 +05:30
@freeze_time ( FROZEN_TIME )
@mock.patch ( " msal.ConfidentialClientApplication " , side_effect = mock_msal_cca )
@pytest.mark.integration
def test_reports_with_failed_page_request (
2023-12-14 18:41:50 +05:30
mock_msal : MagicMock ,
pytestconfig : pytest . Config ,
tmp_path : str ,
mock_time : datetime . datetime ,
requests_mock : Any ,
) - > None :
2023-05-20 06:32:55 +05:30
"""
Test that all reports are fetched even if a single page request fails
"""
register_mock_api (
2024-10-16 20:58:05 +05:30
pytestconfig = pytestconfig ,
2023-05-20 06:32:55 +05:30
request_mock = requests_mock ,
override_data = {
" https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/reports " : {
" method " : " GET " ,
" status_code " : 200 ,
" json " : {
" value " : [
{
" datasetId " : " 05169CD2-E713-41E6-9600-1D8066D95445 " ,
" id " : " 5b218778-e7a5-4d73-8187-f10824047715 " ,
2024-10-11 21:24:50 +05:30
" reportType " : " PowerBIReport " ,
2023-05-20 06:32:55 +05:30
" name " : " SalesMarketing " ,
" description " : " Acryl sales marketing report " ,
" webUrl " : " https://app.powerbi.com/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/5b218778-e7a5-4d73-8187-f10824047715 " ,
" embedUrl " : " https://app.powerbi.com/reportEmbed?reportId=5b218778-e7a5-4d73-8187-f10824047715&groupId=64ED5CAD-7C10-4684-8180-826122881108 " ,
} ,
{
" datasetId " : " 05169CD2-E713-41E6-9600-1D8066D95445 " ,
" id " : " e9fd6b0b-d8c8-4265-8c44-67e183aebf97 " ,
2024-10-11 21:24:50 +05:30
" reportType " : " PaginatedReport " ,
2023-05-20 06:32:55 +05:30
" name " : " Product " ,
" description " : " Acryl product report " ,
" webUrl " : " https://app.powerbi.com/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/e9fd6b0b-d8c8-4265-8c44-67e183aebf97 " ,
" embedUrl " : " https://app.powerbi.com/reportEmbed?reportId=e9fd6b0b-d8c8-4265-8c44-67e183aebf97&groupId=64ED5CAD-7C10-4684-8180-826122881108 " ,
} ,
]
} ,
} ,
" https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/5b218778-e7a5-4d73-8187-f10824047715 " : {
" method " : " GET " ,
" status_code " : 200 ,
" json " : {
" datasetId " : " 05169CD2-E713-41E6-9600-1D8066D95445 " ,
" id " : " 5b218778-e7a5-4d73-8187-f10824047715 " ,
" name " : " SalesMarketing " ,
2024-10-11 21:24:50 +05:30
" reportType " : " PowerBIReport " ,
2023-05-20 06:32:55 +05:30
" description " : " Acryl sales marketing report " ,
" webUrl " : " https://app.powerbi.com/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/5b218778-e7a5-4d73-8187-f10824047715 " ,
" embedUrl " : " https://app.powerbi.com/reportEmbed?reportId=5b218778-e7a5-4d73-8187-f10824047715&groupId=64ED5CAD-7C10-4684-8180-826122881108 " ,
} ,
} ,
" https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/e9fd6b0b-d8c8-4265-8c44-67e183aebf97 " : {
" method " : " GET " ,
" status_code " : 200 ,
" json " : {
" datasetId " : " 05169CD2-E713-41E6-9600-1D8066D95445 " ,
" id " : " e9fd6b0b-d8c8-4265-8c44-67e183aebf97 " ,
2024-10-11 21:24:50 +05:30
" reportType " : " PowerBIReport " ,
2023-05-20 06:32:55 +05:30
" name " : " Product " ,
" description " : " Acryl product report " ,
" webUrl " : " https://app.powerbi.com/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/e9fd6b0b-d8c8-4265-8c44-67e183aebf97 " ,
" embedUrl " : " https://app.powerbi.com/reportEmbed?reportId=e9fd6b0b-d8c8-4265-8c44-67e183aebf97&groupId=64ED5CAD-7C10-4684-8180-826122881108 " ,
} ,
} ,
" https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/5b218778-e7a5-4d73-8187-f10824047715/pages " : {
" method " : " GET " ,
" status_code " : 200 ,
" json " : {
" value " : [
{
" displayName " : " Regional Sales Analysis " ,
" name " : " ReportSection " ,
" order " : " 0 " ,
} ,
{
" displayName " : " Geographic Analysis " ,
" name " : " ReportSection1 " ,
" order " : " 1 " ,
} ,
]
} ,
} ,
" https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/e9fd6b0b-d8c8-4265-8c44-67e183aebf97/pages " : {
" method " : " GET " ,
" status_code " : 400 ,
" json " : {
" error " : {
" code " : " InvalidRequest " ,
" message " : " Request is currently not supported for RDL reports " ,
}
} ,
} ,
} ,
)
pipeline = Pipeline . create (
{
" run_id " : " powerbi-test " ,
" source " : {
" type " : " powerbi " ,
" config " : {
* * default_source_config ( ) ,
" extract_reports " : True ,
" platform_instance " : " aws-ap-south-1 " ,
} ,
} ,
" sink " : {
" type " : " file " ,
" config " : {
" filename " : f " { tmp_path } powerbi_reports_with_failed_page_request_mces.json " ,
} ,
} ,
}
)
validate_pipeline ( pipeline )
2023-06-16 02:34:40 +05:30
@freeze_time ( FROZEN_TIME )
@mock.patch ( " msal.ConfidentialClientApplication " , side_effect = mock_msal_cca )
def test_independent_datasets_extraction (
2023-12-14 18:41:50 +05:30
mock_msal : MagicMock ,
pytestconfig : pytest . Config ,
tmp_path : str ,
mock_time : datetime . datetime ,
requests_mock : Any ,
) - > None :
2023-06-16 02:34:40 +05:30
test_resources_dir = pytestconfig . rootpath / " tests/integration/powerbi "
register_mock_api (
2024-10-16 20:58:05 +05:30
pytestconfig = pytestconfig ,
2023-06-16 02:34:40 +05:30
request_mock = requests_mock ,
override_data = {
2024-10-11 21:24:50 +05:30
" https://api.powerbi.com/v1.0/myorg/groups? %24s kip=0& % 24top=1000 " : {
2023-06-16 02:34:40 +05:30
" method " : " GET " ,
" status_code " : 200 ,
" json " : {
" value " : [
{
" id " : " 64ED5CAD-7C10-4684-8180-826122881108 " ,
" isReadOnly " : True ,
" name " : " demo-workspace " ,
" type " : " Workspace " ,
} ,
] ,
} ,
} ,
2024-10-11 21:24:50 +05:30
" https://api.powerbi.com/v1.0/myorg/groups? %24s kip=1000& % 24top=1000 " : {
" method " : " GET " ,
" status_code " : 200 ,
" json " : {
" value " : [ ] ,
} ,
} ,
2023-06-16 02:34:40 +05:30
" https://api.powerbi.com/v1.0/myorg/admin/workspaces/scanResult/4674efd1-603c-4129-8d82-03cf2be05aff " : {
" method " : " GET " ,
" status_code " : 200 ,
" json " : {
" workspaces " : [
{
" id " : " 64ED5CAD-7C10-4684-8180-826122881108 " ,
" name " : " demo-workspace " ,
2024-10-11 21:24:50 +05:30
" type " : " Workspace " ,
2023-06-16 02:34:40 +05:30
" state " : " Active " ,
" datasets " : [
{
" id " : " 91580e0e-1680-4b1c-bbf9-4f6764d7a5ff " ,
" tables " : [
{
" name " : " employee_ctc " ,
" source " : [
{
" expression " : " dummy " ,
}
] ,
}
] ,
} ,
] ,
} ,
]
} ,
} ,
" https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/dashboards " : {
" method " : " GET " ,
" status_code " : 200 ,
" json " : { " value " : [ ] } ,
} ,
} ,
)
pipeline = Pipeline . create (
{
" run_id " : " powerbi-test " ,
" source " : {
" type " : " powerbi " ,
" config " : {
* * default_source_config ( ) ,
" extract_independent_datasets " : True ,
} ,
} ,
" sink " : {
" type " : " file " ,
" config " : {
" filename " : f " { tmp_path } /powerbi_independent_mces.json " ,
} ,
} ,
}
)
pipeline . run ( )
pipeline . raise_from_status ( )
golden_file = " golden_test_independent_datasets.json "
mce_helpers . check_golden_file (
pytestconfig ,
2023-12-14 18:41:50 +05:30
output_path = f " { tmp_path } /powerbi_independent_mces.json " ,
2023-06-16 02:34:40 +05:30
golden_path = f " { test_resources_dir } / { golden_file } " ,
)
2023-10-04 16:22:51 +05:30
@freeze_time ( FROZEN_TIME )
@mock.patch ( " msal.ConfidentialClientApplication " , side_effect = mock_msal_cca )
2023-12-14 18:41:50 +05:30
def test_cll_extraction (
mock_msal : MagicMock ,
pytestconfig : pytest . Config ,
tmp_path : str ,
mock_time : datetime . datetime ,
requests_mock : Any ,
) - > None :
2023-10-04 16:22:51 +05:30
test_resources_dir = pytestconfig . rootpath / " tests/integration/powerbi "
register_mock_api (
2024-10-16 20:58:05 +05:30
pytestconfig = pytestconfig ,
2023-10-04 16:22:51 +05:30
request_mock = requests_mock ,
)
default_conf : dict = default_source_config ( )
del default_conf [
" dataset_type_mapping "
] # delete this key so that connector set it to default (all dataplatform)
pipeline = Pipeline . create (
{
" run_id " : " powerbi-test " ,
" source " : {
" type " : " powerbi " ,
" config " : {
* * default_conf ,
" extract_lineage " : True ,
" extract_column_level_lineage " : True ,
" enable_advance_lineage_sql_construct " : True ,
" native_query_parsing " : True ,
" extract_independent_datasets " : True ,
} ,
} ,
" sink " : {
" type " : " file " ,
" config " : {
" filename " : f " { tmp_path } /powerbi_cll_mces.json " ,
} ,
} ,
}
)
pipeline . run ( )
pipeline . raise_from_status ( )
golden_file = " golden_test_cll.json "
mce_helpers . check_golden_file (
pytestconfig ,
2023-12-14 18:41:50 +05:30
output_path = f " { tmp_path } /powerbi_cll_mces.json " ,
2023-10-04 16:22:51 +05:30
golden_path = f " { test_resources_dir } / { golden_file } " ,
)
@freeze_time ( FROZEN_TIME )
@mock.patch ( " msal.ConfidentialClientApplication " , side_effect = mock_msal_cca )
def test_cll_extraction_flags (
2023-12-14 18:41:50 +05:30
mock_msal : MagicMock ,
pytestconfig : pytest . Config ,
tmp_path : str ,
mock_time : datetime . datetime ,
requests_mock : Any ,
) - > None :
2023-10-04 16:22:51 +05:30
register_mock_api (
2024-10-16 20:58:05 +05:30
pytestconfig = pytestconfig ,
2023-10-04 16:22:51 +05:30
request_mock = requests_mock ,
)
default_conf : dict = default_source_config ( )
pattern : str = re . escape (
2025-03-28 13:22:25 -05:00
" Enable all these flags in recipe: [ ' native_query_parsing ' , ' enable_advance_lineage_sql_construct ' , ' extract_lineage ' , ' extract_dataset_schema ' ] "
2023-10-04 16:22:51 +05:30
)
with pytest . raises ( Exception , match = pattern ) :
Pipeline . create (
{
" run_id " : " powerbi-test " ,
" source " : {
" type " : " powerbi " ,
" config " : {
* * default_conf ,
" extract_column_level_lineage " : True ,
} ,
} ,
" sink " : {
" type " : " file " ,
" config " : {
" filename " : f " { tmp_path } /powerbi_cll_mces.json " ,
} ,
} ,
}
)
2024-10-16 20:58:05 +05:30
@freeze_time ( FROZEN_TIME )
@mock.patch ( " msal.ConfidentialClientApplication " , side_effect = mock_msal_cca )
@pytest.mark.integration
def test_powerbi_cross_workspace_reference_info_message (
mock_msal : MagicMock ,
pytestconfig : pytest . Config ,
tmp_path : str ,
mock_time : datetime . datetime ,
requests_mock : Any ,
) - > None :
register_mock_api (
pytestconfig = pytestconfig ,
request_mock = requests_mock ,
override_data = read_mock_data (
path = pytestconfig . rootpath
/ " tests/integration/powerbi/mock_data/cross_workspace_mock_response.json "
) ,
)
config = default_source_config ( )
del config [ " workspace_id " ]
config [ " workspace_id_pattern " ] = {
" allow " : [
" A8D655A6-F521-477E-8C22-255018583BF4 " ,
" C5DA6EA8-625E-4AB1-90B6-CAEA0BF9F492 " ,
]
}
config [ " include_workspace_name_in_dataset_urn " ] = True
pipeline = Pipeline . create (
{
" run_id " : " powerbi-test " ,
" source " : {
" type " : " powerbi " ,
" config " : {
* * config ,
} ,
} ,
" sink " : {
" type " : " file " ,
" config " : {
" filename " : f " { tmp_path } /powerbi_mces.json " ,
} ,
} ,
}
)
pipeline . run ( )
pipeline . raise_from_status ( )
assert isinstance ( pipeline . source , PowerBiDashboardSource ) # to silent the lint
info_entries : dict = pipeline . source . reporter . _structured_logs . _entries . get (
StructuredLogLevel . INFO , { }
) # type :ignore
is_entry_present : bool = False
# Printing INFO entries
2025-01-17 21:38:29 +05:30
for entry in info_entries . values ( ) :
2025-02-05 11:27:48 +01:00
if entry . title == " Missing Dataset Lineage For Tile " :
2024-10-16 20:58:05 +05:30
is_entry_present = True
break
2025-01-18 15:06:20 +05:30
assert is_entry_present , (
2025-02-05 11:27:48 +01:00
' Info message " Missing Dataset Lineage For Tile " should be present in reporter '
2025-01-18 15:06:20 +05:30
)
2024-10-16 20:58:05 +05:30
test_resources_dir = pytestconfig . rootpath / " tests/integration/powerbi "
golden_file = " golden_test_cross_workspace_dataset.json "
mce_helpers . check_golden_file (
pytestconfig ,
output_path = f " { tmp_path } /powerbi_mces.json " ,
golden_path = f " { test_resources_dir } / { golden_file } " ,
)
2024-10-29 13:49:37 +05:30
def common_app_ingest (
pytestconfig : pytest . Config ,
requests_mock : Any ,
output_mcp_path : str ,
2025-02-27 15:38:47 +05:30
override_config : Optional [ dict ] = None ,
2024-10-29 13:49:37 +05:30
) - > Pipeline :
2025-02-27 15:38:47 +05:30
if override_config is None :
override_config = { }
2024-10-29 13:49:37 +05:30
register_mock_api (
pytestconfig = pytestconfig ,
request_mock = requests_mock ,
override_data = read_mock_data (
path = pytestconfig . rootpath
/ " tests/integration/powerbi/mock_data/workspace_with_app_mock_response.json "
) ,
)
config = default_source_config ( )
del config [ " workspace_id " ]
config [ " workspace_id_pattern " ] = {
" allow " : [
" 8F756DE6-26AD-45FF-A201-44276FF1F561 " ,
]
}
config . update ( override_config )
pipeline = Pipeline . create (
{
" run_id " : " powerbi-test " ,
" source " : {
" type " : " powerbi " ,
" config " : {
* * config ,
} ,
} ,
" sink " : {
" type " : " file " ,
" config " : {
" filename " : output_mcp_path ,
} ,
} ,
}
)
pipeline . run ( )
pipeline . raise_from_status ( )
return pipeline
@freeze_time ( FROZEN_TIME )
@mock.patch ( " msal.ConfidentialClientApplication " , side_effect = mock_msal_cca )
@pytest.mark.integration
def test_powerbi_app_ingest (
mock_msal : MagicMock ,
pytestconfig : pytest . Config ,
tmp_path : str ,
mock_time : datetime . datetime ,
requests_mock : Any ,
) - > None :
common_app_ingest (
pytestconfig = pytestconfig ,
requests_mock = requests_mock ,
output_mcp_path = f " { tmp_path } /powerbi_mces.json " ,
override_config = {
" extract_app " : True ,
} ,
)
golden_file = " golden_test_app_ingest.json "
test_resources_dir = pytestconfig . rootpath / " tests/integration/powerbi "
mce_helpers . check_golden_file (
pytestconfig ,
output_path = f " { tmp_path } /powerbi_mces.json " ,
golden_path = f " { test_resources_dir } / { golden_file } " ,
)
@freeze_time ( FROZEN_TIME )
@mock.patch ( " msal.ConfidentialClientApplication " , side_effect = mock_msal_cca )
@pytest.mark.integration
def test_powerbi_app_ingest_info_message (
mock_msal : MagicMock ,
pytestconfig : pytest . Config ,
tmp_path : str ,
mock_time : datetime . datetime ,
requests_mock : Any ,
) - > None :
pipeline = common_app_ingest (
pytestconfig = pytestconfig ,
requests_mock = requests_mock ,
output_mcp_path = f " { tmp_path } /powerbi_mces.json " ,
)
assert isinstance ( pipeline . source , PowerBiDashboardSource ) # to silent the lint
info_entries : dict = pipeline . source . reporter . _structured_logs . _entries . get (
StructuredLogLevel . INFO , { }
) # type :ignore
is_entry_present : bool = False
# Printing INFO entries
2025-01-17 21:38:29 +05:30
for entry in info_entries . values ( ) :
2024-10-29 13:49:37 +05:30
if entry . title == " App Ingestion Is Disabled " :
is_entry_present = True
break
2025-01-18 15:06:20 +05:30
assert is_entry_present , (
" The extract_app flag should be set to false by default. We need to keep this flag as false until all GMS instances are updated to the latest release. "
)