mirror of
https://github.com/datahub-project/datahub.git
synced 2025-07-04 15:50:14 +00:00
1578 lines
50 KiB
Python
1578 lines
50 KiB
Python
import datetime
|
|
import json
|
|
import re
|
|
from pathlib import Path
|
|
from typing import Any, Dict, List, Optional, Union, cast
|
|
from unittest import mock
|
|
from unittest.mock import MagicMock
|
|
|
|
import pytest
|
|
from freezegun import freeze_time
|
|
|
|
from datahub.ingestion.api.source import StructuredLogLevel
|
|
from datahub.ingestion.run.pipeline import Pipeline
|
|
from datahub.ingestion.source.powerbi.config import (
|
|
Constant,
|
|
PowerBiDashboardSourceConfig,
|
|
SupportedDataPlatform,
|
|
)
|
|
from datahub.ingestion.source.powerbi.powerbi import PowerBiDashboardSource
|
|
from datahub.ingestion.source.powerbi.rest_api_wrapper.data_classes import (
|
|
Page,
|
|
Report,
|
|
ReportType,
|
|
Workspace,
|
|
)
|
|
from datahub.testing import mce_helpers
|
|
from tests.test_helpers import test_connection_helpers
|
|
|
|
pytestmark = pytest.mark.integration_batch_2
|
|
FROZEN_TIME = "2022-02-03 07:00:00"
|
|
|
|
|
|
def mock_msal_cca(*args, **kwargs):
|
|
class MsalClient:
|
|
def __init__(self):
|
|
self.call_num = 0
|
|
self.token: Dict[str, Any] = {
|
|
"access_token": "dummy",
|
|
}
|
|
|
|
def acquire_token_for_client(self, *args, **kwargs):
|
|
self.call_num += 1
|
|
return self.token
|
|
|
|
def reset(self):
|
|
self.call_num = 0
|
|
|
|
return MsalClient()
|
|
|
|
|
|
def scan_init_response(request, context):
|
|
# Request mock is passing POST input in the form of workspaces=<workspace_id>
|
|
# If we scan 2 or more, it get messy like this. 'workspaces=64ED5CAD-7C10-4684-8180-826122881108&workspaces=64ED5CAD-7C22-4684-8180-826122881108'
|
|
workspace_id_list = request.text.replace("&", "").split("workspaces=")
|
|
|
|
workspace_id = "||".join(workspace_id_list[1:])
|
|
|
|
w_id_vs_response: Dict[str, Any] = {
|
|
"64ED5CAD-7C10-4684-8180-826122881108": {
|
|
"id": "4674efd1-603c-4129-8d82-03cf2be05aff"
|
|
},
|
|
"64ED5CAD-7C22-4684-8180-826122881108": {
|
|
"id": "a674efd1-603c-4129-8d82-03cf2be05aff"
|
|
},
|
|
"64ED5CAD-7C10-4684-8180-826122881108||64ED5CAD-7C22-4684-8180-826122881108": {
|
|
"id": "a674efd1-603c-4129-8d82-03cf2be05aff"
|
|
},
|
|
"A8D655A6-F521-477E-8C22-255018583BF4": {
|
|
"id": "62DAF926-0B18-4FF1-982C-2A3EB6B8F0E4"
|
|
},
|
|
"C5DA6EA8-625E-4AB1-90B6-CAEA0BF9F492": {
|
|
"id": "81B02907-E2A3-45C3-B505-3781839C8CAA",
|
|
},
|
|
"8F756DE6-26AD-45FF-A201-44276FF1F561": {
|
|
"id": "6147FCEB-7531-4449-8FB6-1F7A5431BF2D",
|
|
},
|
|
}
|
|
|
|
return w_id_vs_response[workspace_id]
|
|
|
|
|
|
def read_mock_data(path: Union[Path, str]) -> dict:
|
|
with open(path) as p:
|
|
return json.load(p)
|
|
|
|
|
|
def register_mock_api(
|
|
pytestconfig: pytest.Config, request_mock: Any, override_data: Optional[dict] = None
|
|
) -> None:
|
|
default_mock_data_path = (
|
|
pytestconfig.rootpath
|
|
/ "tests/integration/powerbi/mock_data/default_mock_response.json"
|
|
)
|
|
|
|
api_vs_response = {
|
|
"https://api.powerbi.com/v1.0/myorg/admin/workspaces/getInfo": {
|
|
"method": "POST",
|
|
"status_code": 200,
|
|
"json": scan_init_response,
|
|
},
|
|
}
|
|
|
|
api_vs_response.update(read_mock_data(default_mock_data_path))
|
|
|
|
api_vs_response.update(override_data or {})
|
|
|
|
for url in api_vs_response:
|
|
request_mock.register_uri(
|
|
api_vs_response[url]["method"],
|
|
url,
|
|
json=api_vs_response[url].get("json"),
|
|
text=api_vs_response[url].get("text"),
|
|
status_code=api_vs_response[url]["status_code"],
|
|
)
|
|
|
|
|
|
def default_source_config():
|
|
return {
|
|
"client_id": "foo",
|
|
"client_secret": "bar",
|
|
"tenant_id": "0B0C960B-FCDF-4D0F-8C45-2E03BB59DDEB",
|
|
"workspace_id": "64ED5CAD-7C10-4684-8180-826122881108",
|
|
"extract_lineage": False,
|
|
"extract_reports": False,
|
|
"extract_ownership": True,
|
|
"convert_lineage_urns_to_lowercase": False,
|
|
"workspace_id_pattern": {"allow": ["64ED5CAD-7C10-4684-8180-826122881108"]},
|
|
"dataset_type_mapping": {
|
|
"PostgreSql": "postgres",
|
|
"Oracle": "oracle",
|
|
},
|
|
"env": "DEV",
|
|
"extract_workspaces_to_containers": False,
|
|
"enable_advance_lineage_sql_construct": False,
|
|
}
|
|
|
|
|
|
@freeze_time(FROZEN_TIME)
|
|
@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
|
|
@pytest.mark.integration
|
|
def test_powerbi_ingest(
|
|
mock_msal: MagicMock,
|
|
pytestconfig: pytest.Config,
|
|
tmp_path: str,
|
|
mock_time: datetime.datetime,
|
|
requests_mock: Any,
|
|
) -> None:
|
|
test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
|
|
|
|
register_mock_api(pytestconfig=pytestconfig, request_mock=requests_mock)
|
|
|
|
pipeline = Pipeline.create(
|
|
{
|
|
"run_id": "powerbi-test",
|
|
"source": {
|
|
"type": "powerbi",
|
|
"config": {
|
|
**default_source_config(),
|
|
},
|
|
},
|
|
"sink": {
|
|
"type": "file",
|
|
"config": {
|
|
"filename": f"{tmp_path}/powerbi_mces.json",
|
|
},
|
|
},
|
|
}
|
|
)
|
|
|
|
pipeline.run()
|
|
pipeline.raise_from_status()
|
|
golden_file = "golden_test_ingest.json"
|
|
|
|
mce_helpers.check_golden_file(
|
|
pytestconfig,
|
|
output_path=f"{tmp_path}/powerbi_mces.json",
|
|
golden_path=f"{test_resources_dir}/{golden_file}",
|
|
)
|
|
|
|
|
|
@freeze_time(FROZEN_TIME)
|
|
@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
|
|
@pytest.mark.integration
|
|
def test_powerbi_workspace_type_filter(
|
|
mock_msal: MagicMock,
|
|
pytestconfig: pytest.Config,
|
|
tmp_path: str,
|
|
mock_time: datetime.datetime,
|
|
requests_mock: Any,
|
|
) -> None:
|
|
test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
|
|
|
|
register_mock_api(
|
|
request_mock=requests_mock,
|
|
pytestconfig=pytestconfig,
|
|
override_data=read_mock_data(
|
|
pytestconfig.rootpath
|
|
/ "tests/integration/powerbi/mock_data/workspace_type_filter.json"
|
|
),
|
|
)
|
|
|
|
default_config: dict = default_source_config()
|
|
|
|
del default_config["workspace_id"]
|
|
del default_config["workspace_id_pattern"]
|
|
|
|
pipeline = Pipeline.create(
|
|
{
|
|
"run_id": "powerbi-test",
|
|
"source": {
|
|
"type": "powerbi",
|
|
"config": {
|
|
**default_config,
|
|
"extract_workspaces_to_containers": True,
|
|
"workspace_type_filter": [
|
|
"PersonalGroup",
|
|
],
|
|
},
|
|
},
|
|
"sink": {
|
|
"type": "file",
|
|
"config": {
|
|
"filename": f"{tmp_path}/powerbi_mces.json",
|
|
},
|
|
},
|
|
}
|
|
)
|
|
|
|
pipeline.run()
|
|
pipeline.raise_from_status()
|
|
golden_file = "golden_test_personal_ingest.json"
|
|
|
|
mce_helpers.check_golden_file(
|
|
pytestconfig,
|
|
output_path=f"{tmp_path}/powerbi_mces.json",
|
|
golden_path=f"{test_resources_dir}/{golden_file}",
|
|
)
|
|
|
|
|
|
@freeze_time(FROZEN_TIME)
|
|
@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
|
|
@pytest.mark.integration
|
|
def test_powerbi_ingest_patch_disabled(
|
|
mock_msal: MagicMock,
|
|
pytestconfig: pytest.Config,
|
|
tmp_path: str,
|
|
mock_time: datetime.datetime,
|
|
requests_mock: Any,
|
|
) -> None:
|
|
test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
|
|
|
|
register_mock_api(pytestconfig=pytestconfig, request_mock=requests_mock)
|
|
|
|
pipeline = Pipeline.create(
|
|
{
|
|
"run_id": "powerbi-test",
|
|
"source": {
|
|
"type": "powerbi",
|
|
"config": {
|
|
**default_source_config(),
|
|
"patch_metadata": False,
|
|
},
|
|
},
|
|
"sink": {
|
|
"type": "file",
|
|
"config": {
|
|
"filename": f"{tmp_path}/powerbi_mces.json",
|
|
},
|
|
},
|
|
}
|
|
)
|
|
|
|
pipeline.run()
|
|
pipeline.raise_from_status()
|
|
golden_file = "golden_test_ingest_patch_disabled.json"
|
|
|
|
mce_helpers.check_golden_file(
|
|
pytestconfig,
|
|
output_path=f"{tmp_path}/powerbi_mces.json",
|
|
golden_path=f"{test_resources_dir}/{golden_file}",
|
|
)
|
|
|
|
|
|
@freeze_time(FROZEN_TIME)
|
|
@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
|
|
@pytest.mark.integration
|
|
def test_powerbi_test_connection_success(mock_msal):
|
|
report = test_connection_helpers.run_test_connection(
|
|
PowerBiDashboardSource, default_source_config()
|
|
)
|
|
test_connection_helpers.assert_basic_connectivity_success(report)
|
|
|
|
|
|
@freeze_time(FROZEN_TIME)
|
|
@pytest.mark.integration
|
|
def test_powerbi_test_connection_failure():
|
|
report = test_connection_helpers.run_test_connection(
|
|
PowerBiDashboardSource, default_source_config()
|
|
)
|
|
test_connection_helpers.assert_basic_connectivity_failure(
|
|
report, "Unable to get authority configuration"
|
|
)
|
|
|
|
|
|
@freeze_time(FROZEN_TIME)
|
|
@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
|
|
@pytest.mark.integration
|
|
def test_powerbi_platform_instance_ingest(
|
|
mock_msal: MagicMock,
|
|
pytestconfig: pytest.Config,
|
|
tmp_path: str,
|
|
mock_time: datetime.datetime,
|
|
requests_mock: Any,
|
|
) -> None:
|
|
test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
|
|
|
|
register_mock_api(pytestconfig=pytestconfig, request_mock=requests_mock)
|
|
|
|
output_path: str = f"{tmp_path}/powerbi_platform_instance_mces.json"
|
|
|
|
pipeline = Pipeline.create(
|
|
{
|
|
"run_id": "powerbi-test",
|
|
"source": {
|
|
"type": "powerbi",
|
|
"config": {
|
|
**default_source_config(),
|
|
"platform_instance": "aws-ap-south-1",
|
|
},
|
|
},
|
|
"sink": {
|
|
"type": "file",
|
|
"config": {
|
|
"filename": output_path,
|
|
},
|
|
},
|
|
}
|
|
)
|
|
|
|
pipeline.run()
|
|
pipeline.raise_from_status()
|
|
golden_file = "golden_test_platform_instance_ingest.json"
|
|
|
|
mce_helpers.check_golden_file(
|
|
pytestconfig,
|
|
output_path=output_path,
|
|
golden_path=f"{test_resources_dir}/{golden_file}",
|
|
)
|
|
|
|
|
|
@freeze_time(FROZEN_TIME)
|
|
@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
|
|
@pytest.mark.integration
|
|
def test_powerbi_ingest_urn_lower_case(
|
|
mock_msal: MagicMock,
|
|
pytestconfig: pytest.Config,
|
|
tmp_path: str,
|
|
mock_time: datetime.datetime,
|
|
requests_mock: Any,
|
|
) -> None:
|
|
test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
|
|
|
|
register_mock_api(pytestconfig=pytestconfig, request_mock=requests_mock)
|
|
|
|
pipeline = Pipeline.create(
|
|
{
|
|
"run_id": "powerbi-test",
|
|
"source": {
|
|
"type": "powerbi",
|
|
"config": {
|
|
**default_source_config(),
|
|
"env": "PROD",
|
|
"platform_instance": "myPlatformInstance",
|
|
"convert_urns_to_lowercase": True,
|
|
"convert_lineage_urns_to_lowercase": True,
|
|
},
|
|
},
|
|
"sink": {
|
|
"type": "file",
|
|
"config": {
|
|
"filename": f"{tmp_path}/powerbi_lower_case_urn_mces.json",
|
|
},
|
|
},
|
|
}
|
|
)
|
|
|
|
pipeline.run()
|
|
pipeline.raise_from_status()
|
|
golden_file = "golden_test_lower_case_urn_ingest.json"
|
|
|
|
mce_helpers.check_golden_file(
|
|
pytestconfig,
|
|
output_path=f"{tmp_path}/powerbi_lower_case_urn_mces.json",
|
|
golden_path=f"{test_resources_dir}/{golden_file}",
|
|
)
|
|
|
|
|
|
@freeze_time(FROZEN_TIME)
|
|
@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
|
|
@pytest.mark.integration
|
|
def test_override_ownership(
|
|
mock_msal: MagicMock,
|
|
pytestconfig: pytest.Config,
|
|
tmp_path: str,
|
|
mock_time: datetime.datetime,
|
|
requests_mock: Any,
|
|
) -> None:
|
|
test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
|
|
|
|
register_mock_api(pytestconfig=pytestconfig, request_mock=requests_mock)
|
|
|
|
pipeline = Pipeline.create(
|
|
{
|
|
"run_id": "powerbi-test",
|
|
"source": {
|
|
"type": "powerbi",
|
|
"config": {
|
|
**default_source_config(),
|
|
"extract_ownership": False,
|
|
},
|
|
},
|
|
"sink": {
|
|
"type": "file",
|
|
"config": {
|
|
"filename": f"{tmp_path}/powerbi_mces_disabled_ownership.json",
|
|
},
|
|
},
|
|
}
|
|
)
|
|
|
|
pipeline.run()
|
|
pipeline.raise_from_status()
|
|
mce_out_file = "golden_test_disabled_ownership.json"
|
|
|
|
mce_helpers.check_golden_file(
|
|
pytestconfig,
|
|
output_path=f"{tmp_path}/powerbi_mces_disabled_ownership.json",
|
|
golden_path=f"{test_resources_dir}/{mce_out_file}",
|
|
)
|
|
|
|
|
|
@freeze_time(FROZEN_TIME)
|
|
@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
|
|
@pytest.mark.integration
|
|
def test_scan_all_workspaces(
|
|
mock_msal: MagicMock,
|
|
pytestconfig: pytest.Config,
|
|
tmp_path: str,
|
|
mock_time: datetime.datetime,
|
|
requests_mock: Any,
|
|
) -> None:
|
|
test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
|
|
|
|
register_mock_api(pytestconfig=pytestconfig, request_mock=requests_mock)
|
|
|
|
pipeline = Pipeline.create(
|
|
{
|
|
"run_id": "powerbi-test",
|
|
"source": {
|
|
"type": "powerbi",
|
|
"config": {
|
|
**default_source_config(),
|
|
"extract_reports": False,
|
|
"extract_ownership": False,
|
|
"workspace_id_pattern": {
|
|
"deny": ["64ED5CAD-7322-4684-8180-826122881108"],
|
|
},
|
|
},
|
|
},
|
|
"sink": {
|
|
"type": "file",
|
|
"config": {
|
|
"filename": f"{tmp_path}/powerbi_mces_scan_all_workspaces.json",
|
|
},
|
|
},
|
|
}
|
|
)
|
|
|
|
pipeline.run()
|
|
pipeline.raise_from_status()
|
|
|
|
golden_file = "golden_test_scan_all_workspaces.json"
|
|
|
|
mce_helpers.check_golden_file(
|
|
pytestconfig,
|
|
output_path=f"{tmp_path}/powerbi_mces_scan_all_workspaces.json",
|
|
golden_path=f"{test_resources_dir}/{golden_file}",
|
|
)
|
|
|
|
|
|
@freeze_time(FROZEN_TIME)
|
|
@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
|
|
@pytest.mark.integration
|
|
def test_extract_reports(
|
|
mock_msal: MagicMock,
|
|
pytestconfig: pytest.Config,
|
|
tmp_path: str,
|
|
mock_time: datetime.datetime,
|
|
requests_mock: Any,
|
|
) -> None:
|
|
test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
|
|
|
|
register_mock_api(pytestconfig=pytestconfig, request_mock=requests_mock)
|
|
|
|
pipeline = Pipeline.create(
|
|
{
|
|
"run_id": "powerbi-test",
|
|
"source": {
|
|
"type": "powerbi",
|
|
"config": {
|
|
**default_source_config(),
|
|
"extract_reports": True,
|
|
},
|
|
},
|
|
"sink": {
|
|
"type": "file",
|
|
"config": {
|
|
"filename": f"{tmp_path}/powerbi_report_mces.json",
|
|
},
|
|
},
|
|
}
|
|
)
|
|
|
|
pipeline.run()
|
|
pipeline.raise_from_status()
|
|
golden_file = "golden_test_report.json"
|
|
|
|
mce_helpers.check_golden_file(
|
|
pytestconfig,
|
|
output_path=f"{tmp_path}/powerbi_report_mces.json",
|
|
golden_path=f"{test_resources_dir}/{golden_file}",
|
|
)
|
|
|
|
|
|
@freeze_time(FROZEN_TIME)
|
|
@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
|
|
@pytest.mark.integration
|
|
def test_extract_lineage(
|
|
mock_msal: MagicMock,
|
|
pytestconfig: pytest.Config,
|
|
tmp_path: str,
|
|
mock_time: datetime.datetime,
|
|
requests_mock: Any,
|
|
) -> None:
|
|
test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
|
|
|
|
register_mock_api(pytestconfig=pytestconfig, request_mock=requests_mock)
|
|
|
|
pipeline = Pipeline.create(
|
|
{
|
|
"run_id": "powerbi-lineage-test",
|
|
"source": {
|
|
"type": "powerbi",
|
|
"config": {
|
|
**default_source_config(),
|
|
"extract_lineage": True,
|
|
"dataset_type_mapping": {
|
|
"PostgreSql": {"platform_instance": "operational_instance"},
|
|
"Oracle": {
|
|
"platform_instance": "high_performance_production_unit"
|
|
},
|
|
"Sql": {"platform_instance": "reporting-db"},
|
|
"Snowflake": {"platform_instance": "sn-2"},
|
|
},
|
|
},
|
|
},
|
|
"sink": {
|
|
"type": "file",
|
|
"config": {
|
|
"filename": f"{tmp_path}/powerbi_lineage_mces.json",
|
|
},
|
|
},
|
|
}
|
|
)
|
|
|
|
pipeline.run()
|
|
pipeline.raise_from_status()
|
|
golden_file = "golden_test_lineage.json"
|
|
|
|
mce_helpers.check_golden_file(
|
|
pytestconfig,
|
|
output_path=f"{tmp_path}/powerbi_lineage_mces.json",
|
|
golden_path=f"{test_resources_dir}/{golden_file}",
|
|
)
|
|
|
|
|
|
@freeze_time(FROZEN_TIME)
|
|
@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
|
|
@pytest.mark.integration
|
|
def test_extract_endorsements(
|
|
mock_msal: MagicMock,
|
|
pytestconfig: pytest.Config,
|
|
tmp_path: str,
|
|
mock_time: datetime.datetime,
|
|
requests_mock: Any,
|
|
) -> None:
|
|
test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
|
|
|
|
register_mock_api(pytestconfig=pytestconfig, request_mock=requests_mock)
|
|
|
|
pipeline = Pipeline.create(
|
|
{
|
|
"run_id": "powerbi-test",
|
|
"source": {
|
|
"type": "powerbi",
|
|
"config": {
|
|
**default_source_config(),
|
|
"extract_reports": False,
|
|
"extract_endorsements_to_tags": True,
|
|
},
|
|
},
|
|
"sink": {
|
|
"type": "file",
|
|
"config": {
|
|
"filename": f"{tmp_path}/powerbi_endorsement_mces.json",
|
|
},
|
|
},
|
|
}
|
|
)
|
|
|
|
pipeline.run()
|
|
pipeline.raise_from_status()
|
|
mce_out_file = "golden_test_endorsement.json"
|
|
|
|
mce_helpers.check_golden_file(
|
|
pytestconfig,
|
|
output_path=f"{tmp_path}/powerbi_endorsement_mces.json",
|
|
golden_path=f"{test_resources_dir}/{mce_out_file}",
|
|
)
|
|
|
|
|
|
@freeze_time(FROZEN_TIME)
|
|
@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
|
|
@pytest.mark.integration
|
|
def test_admin_access_is_not_allowed(
|
|
mock_msal: MagicMock,
|
|
pytestconfig: pytest.Config,
|
|
tmp_path: str,
|
|
mock_time: datetime.datetime,
|
|
requests_mock: Any,
|
|
) -> None:
|
|
test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
|
|
|
|
register_mock_api(
|
|
pytestconfig=pytestconfig,
|
|
request_mock=requests_mock,
|
|
override_data={
|
|
"https://api.powerbi.com/v1.0/myorg/admin/workspaces/getInfo": {
|
|
"method": "POST",
|
|
"status_code": 403,
|
|
"json": {},
|
|
},
|
|
},
|
|
)
|
|
|
|
pipeline = Pipeline.create(
|
|
{
|
|
"run_id": "powerbi-admin-api-disabled-test",
|
|
"source": {
|
|
"type": "powerbi",
|
|
"config": {
|
|
**default_source_config(),
|
|
"extract_lineage": True,
|
|
"dataset_type_mapping": {
|
|
"PostgreSql": {"platform_instance": "operational_instance"},
|
|
"Oracle": {
|
|
"platform_instance": "high_performance_production_unit"
|
|
},
|
|
"Sql": {"platform_instance": "reporting-db"},
|
|
"Snowflake": {"platform_instance": "sn-2"},
|
|
},
|
|
},
|
|
},
|
|
"sink": {
|
|
"type": "file",
|
|
"config": {
|
|
"filename": f"{tmp_path}/golden_test_admin_access_not_allowed_mces.json",
|
|
},
|
|
},
|
|
}
|
|
)
|
|
|
|
pipeline.run()
|
|
pipeline.raise_from_status()
|
|
golden_file = "golden_test_admin_access_not_allowed.json"
|
|
|
|
mce_helpers.check_golden_file(
|
|
pytestconfig,
|
|
output_path=f"{tmp_path}/golden_test_admin_access_not_allowed_mces.json",
|
|
golden_path=f"{test_resources_dir}/{golden_file}",
|
|
)
|
|
|
|
|
|
@freeze_time(FROZEN_TIME)
|
|
@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
|
|
def test_workspace_container(
|
|
mock_msal: MagicMock,
|
|
pytestconfig: pytest.Config,
|
|
tmp_path: str,
|
|
mock_time: datetime.datetime,
|
|
requests_mock: Any,
|
|
) -> None:
|
|
test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
|
|
|
|
register_mock_api(pytestconfig=pytestconfig, request_mock=requests_mock)
|
|
|
|
pipeline = Pipeline.create(
|
|
{
|
|
"run_id": "powerbi-test",
|
|
"source": {
|
|
"type": "powerbi",
|
|
"config": {
|
|
**default_source_config(),
|
|
"workspace_id_pattern": {
|
|
"deny": ["64ED5CAD-7322-4684-8180-826122881108"],
|
|
},
|
|
"extract_workspaces_to_containers": True,
|
|
"extract_datasets_to_containers": True,
|
|
"extract_reports": True,
|
|
},
|
|
},
|
|
"sink": {
|
|
"type": "file",
|
|
"config": {
|
|
"filename": f"{tmp_path}/powerbi_container_mces.json",
|
|
},
|
|
},
|
|
}
|
|
)
|
|
|
|
pipeline.run()
|
|
pipeline.raise_from_status()
|
|
mce_out_file = "golden_test_container.json"
|
|
|
|
mce_helpers.check_golden_file(
|
|
pytestconfig,
|
|
output_path=f"{tmp_path}/powerbi_container_mces.json",
|
|
golden_path=f"{test_resources_dir}/{mce_out_file}",
|
|
)
|
|
|
|
|
|
def test_access_token_expiry_with_long_expiry(
|
|
pytestconfig: pytest.Config,
|
|
tmp_path: str,
|
|
mock_time: datetime.datetime,
|
|
requests_mock: Any,
|
|
) -> None:
|
|
register_mock_api(pytestconfig=pytestconfig, request_mock=requests_mock)
|
|
|
|
mock_msal = mock_msal_cca()
|
|
|
|
with mock.patch("msal.ConfidentialClientApplication", return_value=mock_msal):
|
|
pipeline = Pipeline.create(
|
|
{
|
|
"run_id": "powerbi-test",
|
|
"source": {
|
|
"type": "powerbi",
|
|
"config": {
|
|
**default_source_config(),
|
|
},
|
|
},
|
|
"sink": {
|
|
"type": "file",
|
|
"config": {
|
|
"filename": f"{tmp_path}/powerbi_access_token_mces.json",
|
|
},
|
|
},
|
|
}
|
|
)
|
|
|
|
# for long expiry, the token should only be requested once.
|
|
mock_msal.token = {
|
|
"access_token": "dummy2",
|
|
"expires_in": 3600,
|
|
}
|
|
mock_msal.reset()
|
|
|
|
pipeline.run()
|
|
# We expect the token to be requested twice (once for AdminApiResolver and one for RegularApiResolver)
|
|
assert mock_msal.call_num == 2
|
|
|
|
|
|
def test_access_token_expiry_with_short_expiry(
|
|
pytestconfig: pytest.Config,
|
|
tmp_path: str,
|
|
mock_time: datetime.datetime,
|
|
requests_mock: Any,
|
|
) -> None:
|
|
register_mock_api(pytestconfig=pytestconfig, request_mock=requests_mock)
|
|
|
|
mock_msal = mock_msal_cca()
|
|
with mock.patch("msal.ConfidentialClientApplication", return_value=mock_msal):
|
|
pipeline = Pipeline.create(
|
|
{
|
|
"run_id": "powerbi-test",
|
|
"source": {
|
|
"type": "powerbi",
|
|
"config": {
|
|
**default_source_config(),
|
|
},
|
|
},
|
|
"sink": {
|
|
"type": "file",
|
|
"config": {
|
|
"filename": f"{tmp_path}/powerbi_access_token_mces.json",
|
|
},
|
|
},
|
|
}
|
|
)
|
|
|
|
# for short expiry, the token should be requested when expires.
|
|
mock_msal.token = {
|
|
"access_token": "dummy",
|
|
"expires_in": 0,
|
|
}
|
|
mock_msal.reset()
|
|
|
|
pipeline.run()
|
|
assert mock_msal.call_num > 2
|
|
|
|
|
|
def dataset_type_mapping_set_to_all_platform(pipeline: Pipeline) -> None:
|
|
source_config: PowerBiDashboardSourceConfig = cast(
|
|
PowerBiDashboardSource, pipeline.source
|
|
).source_config
|
|
|
|
assert source_config.dataset_type_mapping is not None
|
|
|
|
# Generate default dataset_type_mapping and compare it with source_config.dataset_type_mapping
|
|
default_dataset_type_mapping: dict = {}
|
|
for item in SupportedDataPlatform:
|
|
default_dataset_type_mapping[item.value.powerbi_data_platform_name] = (
|
|
item.value.datahub_data_platform_name
|
|
)
|
|
|
|
assert default_dataset_type_mapping == source_config.dataset_type_mapping
|
|
|
|
|
|
@freeze_time(FROZEN_TIME)
|
|
@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
|
|
@pytest.mark.integration
|
|
def test_dataset_type_mapping_should_set_to_all(
|
|
mock_msal, pytestconfig, tmp_path, mock_time, requests_mock
|
|
):
|
|
"""
|
|
Here we don't need to run the pipeline. We need to verify dataset_type_mapping is set to default dataplatform
|
|
"""
|
|
register_mock_api(pytestconfig=pytestconfig, request_mock=requests_mock)
|
|
|
|
new_config: dict = {**default_source_config()}
|
|
|
|
del new_config["dataset_type_mapping"]
|
|
|
|
pipeline = Pipeline.create(
|
|
{
|
|
"run_id": "powerbi-test",
|
|
"source": {
|
|
"type": "powerbi",
|
|
"config": {
|
|
**new_config,
|
|
},
|
|
},
|
|
"sink": {
|
|
"type": "file",
|
|
"config": {
|
|
"filename": f"{tmp_path}/powerbi_lower_case_urn_mces.json",
|
|
},
|
|
},
|
|
}
|
|
)
|
|
|
|
dataset_type_mapping_set_to_all_platform(pipeline)
|
|
|
|
|
|
@freeze_time(FROZEN_TIME)
|
|
@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
|
|
@pytest.mark.integration
|
|
def test_dataset_type_mapping_error(
|
|
mock_msal, pytestconfig, tmp_path, mock_time, requests_mock
|
|
):
|
|
"""
|
|
Here we don't need to run the pipeline. We need to verify if both dataset_type_mapping and server_to_platform_instance
|
|
are set then value error should get raised
|
|
"""
|
|
register_mock_api(pytestconfig=pytestconfig, request_mock=requests_mock)
|
|
|
|
with pytest.raises(Exception, match=r"dataset_type_mapping is deprecated"):
|
|
Pipeline.create(
|
|
{
|
|
"run_id": "powerbi-test",
|
|
"source": {
|
|
"type": "powerbi",
|
|
"config": {
|
|
**default_source_config(),
|
|
"server_to_platform_instance": {
|
|
"localhost": {
|
|
"platform_instance": "test",
|
|
}
|
|
},
|
|
},
|
|
},
|
|
"sink": {
|
|
"type": "file",
|
|
"config": {
|
|
"filename": f"{tmp_path}/powerbi_lower_case_urn_mces.json",
|
|
},
|
|
},
|
|
}
|
|
)
|
|
|
|
|
|
@freeze_time(FROZEN_TIME)
|
|
@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
|
|
def test_server_to_platform_map(
|
|
mock_msal, pytestconfig, tmp_path, mock_time, requests_mock
|
|
):
|
|
test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
|
|
new_config: dict = {
|
|
**default_source_config(),
|
|
"extract_lineage": True,
|
|
"convert_lineage_urns_to_lowercase": True,
|
|
}
|
|
|
|
del new_config["dataset_type_mapping"]
|
|
|
|
new_config["server_to_platform_instance"] = {
|
|
"hp123rt5.ap-southeast-2.fakecomputing.com": {
|
|
"platform_instance": "snowflake_production_instance",
|
|
"env": "PROD",
|
|
},
|
|
"my-test-project": {
|
|
"platform_instance": "bigquery-computing-dev-account",
|
|
"env": "QA",
|
|
},
|
|
"localhost:1521": {"platform_instance": "oracle-sales-instance", "env": "PROD"},
|
|
}
|
|
|
|
register_mock_api(pytestconfig=pytestconfig, request_mock=requests_mock)
|
|
|
|
output_path: str = f"{tmp_path}/powerbi_server_to_platform_instance_mces.json"
|
|
|
|
pipeline = Pipeline.create(
|
|
{
|
|
"run_id": "powerbi-test",
|
|
"source": {
|
|
"type": "powerbi",
|
|
"config": new_config,
|
|
},
|
|
"sink": {
|
|
"type": "file",
|
|
"config": {
|
|
"filename": output_path,
|
|
},
|
|
},
|
|
}
|
|
)
|
|
|
|
pipeline.run()
|
|
pipeline.raise_from_status()
|
|
golden_file_path: str = (
|
|
f"{test_resources_dir}/golden_test_server_to_platform_instance.json"
|
|
)
|
|
|
|
mce_helpers.check_golden_file(
|
|
pytestconfig,
|
|
output_path=output_path,
|
|
golden_path=golden_file_path,
|
|
)
|
|
# As server_to_platform_instance map is provided, the old dataset_type_mapping
|
|
# should be set to all supported platform
|
|
# to process all available upstream lineage even if mapping for platform instance is
|
|
# not provided in server_to_platform_instance map
|
|
dataset_type_mapping_set_to_all_platform(pipeline)
|
|
|
|
|
|
def validate_pipeline(pipeline: Pipeline) -> None:
|
|
mock_workspace: Workspace = Workspace(
|
|
id="64ED5CAD-7C10-4684-8180-826122881108",
|
|
name="demo-workspace",
|
|
type="Workspace",
|
|
datasets={},
|
|
dashboards={},
|
|
reports={},
|
|
report_endorsements={},
|
|
dashboard_endorsements={},
|
|
scan_result={},
|
|
independent_datasets={},
|
|
app=None,
|
|
)
|
|
# Fetch actual reports
|
|
reports: Dict[str, Report] = cast(
|
|
PowerBiDashboardSource, pipeline.source
|
|
).powerbi_client.get_reports(workspace=mock_workspace)
|
|
|
|
assert len(reports) == 2
|
|
# Generate expected reports using mock reports
|
|
mock_reports: List[Dict] = [
|
|
{
|
|
"datasetId": "05169CD2-E713-41E6-9600-1D8066D95445",
|
|
"id": "5b218778-e7a5-4d73-8187-f10824047715",
|
|
"name": "SalesMarketing",
|
|
"description": "Acryl sales marketing report",
|
|
"pages": [
|
|
{
|
|
"name": "ReportSection",
|
|
"displayName": "Regional Sales Analysis",
|
|
"order": "0",
|
|
},
|
|
{
|
|
"name": "ReportSection1",
|
|
"displayName": "Geographic Analysis",
|
|
"order": "1",
|
|
},
|
|
],
|
|
},
|
|
{
|
|
"datasetId": "05169CD2-E713-41E6-9600-1D8066D95445",
|
|
"id": "e9fd6b0b-d8c8-4265-8c44-67e183aebf97",
|
|
"name": "Product",
|
|
"description": "Acryl product report",
|
|
"pages": [],
|
|
},
|
|
]
|
|
expected_reports: Dict[str, Report] = {
|
|
report[Constant.ID]: Report(
|
|
id=report[Constant.ID],
|
|
name=report[Constant.NAME],
|
|
type=ReportType.PowerBIReport,
|
|
webUrl="",
|
|
embedUrl="",
|
|
description=report[Constant.DESCRIPTION],
|
|
pages=[
|
|
Page(
|
|
id="{}.{}".format(
|
|
report[Constant.ID], page[Constant.NAME].replace(" ", "_")
|
|
),
|
|
name=page[Constant.NAME],
|
|
displayName=page[Constant.DISPLAY_NAME],
|
|
order=page[Constant.ORDER],
|
|
)
|
|
for page in report["pages"]
|
|
],
|
|
users=[],
|
|
tags=[],
|
|
dataset_id=report[Constant.DATASET_ID],
|
|
dataset=mock_workspace.datasets.get(report[Constant.DATASET_ID]),
|
|
)
|
|
for report in mock_reports
|
|
}
|
|
# Compare actual and expected reports
|
|
for i in range(2):
|
|
report_id = mock_reports[i][Constant.ID]
|
|
assert reports[report_id].id == expected_reports[report_id].id
|
|
assert reports[report_id].name == expected_reports[report_id].name
|
|
assert reports[report_id].description == expected_reports[report_id].description
|
|
assert reports[report_id].dataset == expected_reports[report_id].dataset
|
|
assert reports[report_id].pages == expected_reports[report_id].pages
|
|
|
|
|
|
@freeze_time(FROZEN_TIME)
|
|
@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
|
|
@pytest.mark.integration
|
|
def test_reports_with_failed_page_request(
|
|
mock_msal: MagicMock,
|
|
pytestconfig: pytest.Config,
|
|
tmp_path: str,
|
|
mock_time: datetime.datetime,
|
|
requests_mock: Any,
|
|
) -> None:
|
|
"""
|
|
Test that all reports are fetched even if a single page request fails
|
|
"""
|
|
register_mock_api(
|
|
pytestconfig=pytestconfig,
|
|
request_mock=requests_mock,
|
|
override_data={
|
|
"https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/reports": {
|
|
"method": "GET",
|
|
"status_code": 200,
|
|
"json": {
|
|
"value": [
|
|
{
|
|
"datasetId": "05169CD2-E713-41E6-9600-1D8066D95445",
|
|
"id": "5b218778-e7a5-4d73-8187-f10824047715",
|
|
"reportType": "PowerBIReport",
|
|
"name": "SalesMarketing",
|
|
"description": "Acryl sales marketing report",
|
|
"webUrl": "https://app.powerbi.com/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/5b218778-e7a5-4d73-8187-f10824047715",
|
|
"embedUrl": "https://app.powerbi.com/reportEmbed?reportId=5b218778-e7a5-4d73-8187-f10824047715&groupId=64ED5CAD-7C10-4684-8180-826122881108",
|
|
},
|
|
{
|
|
"datasetId": "05169CD2-E713-41E6-9600-1D8066D95445",
|
|
"id": "e9fd6b0b-d8c8-4265-8c44-67e183aebf97",
|
|
"reportType": "PaginatedReport",
|
|
"name": "Product",
|
|
"description": "Acryl product report",
|
|
"webUrl": "https://app.powerbi.com/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/e9fd6b0b-d8c8-4265-8c44-67e183aebf97",
|
|
"embedUrl": "https://app.powerbi.com/reportEmbed?reportId=e9fd6b0b-d8c8-4265-8c44-67e183aebf97&groupId=64ED5CAD-7C10-4684-8180-826122881108",
|
|
},
|
|
]
|
|
},
|
|
},
|
|
"https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/5b218778-e7a5-4d73-8187-f10824047715": {
|
|
"method": "GET",
|
|
"status_code": 200,
|
|
"json": {
|
|
"datasetId": "05169CD2-E713-41E6-9600-1D8066D95445",
|
|
"id": "5b218778-e7a5-4d73-8187-f10824047715",
|
|
"name": "SalesMarketing",
|
|
"reportType": "PowerBIReport",
|
|
"description": "Acryl sales marketing report",
|
|
"webUrl": "https://app.powerbi.com/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/5b218778-e7a5-4d73-8187-f10824047715",
|
|
"embedUrl": "https://app.powerbi.com/reportEmbed?reportId=5b218778-e7a5-4d73-8187-f10824047715&groupId=64ED5CAD-7C10-4684-8180-826122881108",
|
|
},
|
|
},
|
|
"https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/e9fd6b0b-d8c8-4265-8c44-67e183aebf97": {
|
|
"method": "GET",
|
|
"status_code": 200,
|
|
"json": {
|
|
"datasetId": "05169CD2-E713-41E6-9600-1D8066D95445",
|
|
"id": "e9fd6b0b-d8c8-4265-8c44-67e183aebf97",
|
|
"reportType": "PowerBIReport",
|
|
"name": "Product",
|
|
"description": "Acryl product report",
|
|
"webUrl": "https://app.powerbi.com/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/e9fd6b0b-d8c8-4265-8c44-67e183aebf97",
|
|
"embedUrl": "https://app.powerbi.com/reportEmbed?reportId=e9fd6b0b-d8c8-4265-8c44-67e183aebf97&groupId=64ED5CAD-7C10-4684-8180-826122881108",
|
|
},
|
|
},
|
|
"https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/5b218778-e7a5-4d73-8187-f10824047715/pages": {
|
|
"method": "GET",
|
|
"status_code": 200,
|
|
"json": {
|
|
"value": [
|
|
{
|
|
"displayName": "Regional Sales Analysis",
|
|
"name": "ReportSection",
|
|
"order": "0",
|
|
},
|
|
{
|
|
"displayName": "Geographic Analysis",
|
|
"name": "ReportSection1",
|
|
"order": "1",
|
|
},
|
|
]
|
|
},
|
|
},
|
|
"https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/e9fd6b0b-d8c8-4265-8c44-67e183aebf97/pages": {
|
|
"method": "GET",
|
|
"status_code": 400,
|
|
"json": {
|
|
"error": {
|
|
"code": "InvalidRequest",
|
|
"message": "Request is currently not supported for RDL reports",
|
|
}
|
|
},
|
|
},
|
|
},
|
|
)
|
|
|
|
pipeline = Pipeline.create(
|
|
{
|
|
"run_id": "powerbi-test",
|
|
"source": {
|
|
"type": "powerbi",
|
|
"config": {
|
|
**default_source_config(),
|
|
"extract_reports": True,
|
|
"platform_instance": "aws-ap-south-1",
|
|
},
|
|
},
|
|
"sink": {
|
|
"type": "file",
|
|
"config": {
|
|
"filename": f"{tmp_path}powerbi_reports_with_failed_page_request_mces.json",
|
|
},
|
|
},
|
|
}
|
|
)
|
|
|
|
validate_pipeline(pipeline)
|
|
|
|
|
|
@freeze_time(FROZEN_TIME)
|
|
@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
|
|
def test_independent_datasets_extraction(
|
|
mock_msal: MagicMock,
|
|
pytestconfig: pytest.Config,
|
|
tmp_path: str,
|
|
mock_time: datetime.datetime,
|
|
requests_mock: Any,
|
|
) -> None:
|
|
test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
|
|
|
|
register_mock_api(
|
|
pytestconfig=pytestconfig,
|
|
request_mock=requests_mock,
|
|
override_data={
|
|
"https://api.powerbi.com/v1.0/myorg/groups?%24skip=0&%24top=1000": {
|
|
"method": "GET",
|
|
"status_code": 200,
|
|
"json": {
|
|
"value": [
|
|
{
|
|
"id": "64ED5CAD-7C10-4684-8180-826122881108",
|
|
"isReadOnly": True,
|
|
"name": "demo-workspace",
|
|
"type": "Workspace",
|
|
},
|
|
],
|
|
},
|
|
},
|
|
"https://api.powerbi.com/v1.0/myorg/groups?%24skip=1000&%24top=1000": {
|
|
"method": "GET",
|
|
"status_code": 200,
|
|
"json": {
|
|
"value": [],
|
|
},
|
|
},
|
|
"https://api.powerbi.com/v1.0/myorg/admin/workspaces/scanResult/4674efd1-603c-4129-8d82-03cf2be05aff": {
|
|
"method": "GET",
|
|
"status_code": 200,
|
|
"json": {
|
|
"workspaces": [
|
|
{
|
|
"id": "64ED5CAD-7C10-4684-8180-826122881108",
|
|
"name": "demo-workspace",
|
|
"type": "Workspace",
|
|
"state": "Active",
|
|
"datasets": [
|
|
{
|
|
"id": "91580e0e-1680-4b1c-bbf9-4f6764d7a5ff",
|
|
"tables": [
|
|
{
|
|
"name": "employee_ctc",
|
|
"source": [
|
|
{
|
|
"expression": "dummy",
|
|
}
|
|
],
|
|
}
|
|
],
|
|
},
|
|
],
|
|
},
|
|
]
|
|
},
|
|
},
|
|
"https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/dashboards": {
|
|
"method": "GET",
|
|
"status_code": 200,
|
|
"json": {"value": []},
|
|
},
|
|
},
|
|
)
|
|
|
|
pipeline = Pipeline.create(
|
|
{
|
|
"run_id": "powerbi-test",
|
|
"source": {
|
|
"type": "powerbi",
|
|
"config": {
|
|
**default_source_config(),
|
|
"extract_independent_datasets": True,
|
|
},
|
|
},
|
|
"sink": {
|
|
"type": "file",
|
|
"config": {
|
|
"filename": f"{tmp_path}/powerbi_independent_mces.json",
|
|
},
|
|
},
|
|
}
|
|
)
|
|
|
|
pipeline.run()
|
|
pipeline.raise_from_status()
|
|
golden_file = "golden_test_independent_datasets.json"
|
|
|
|
mce_helpers.check_golden_file(
|
|
pytestconfig,
|
|
output_path=f"{tmp_path}/powerbi_independent_mces.json",
|
|
golden_path=f"{test_resources_dir}/{golden_file}",
|
|
)
|
|
|
|
|
|
@freeze_time(FROZEN_TIME)
|
|
@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
|
|
def test_cll_extraction(
|
|
mock_msal: MagicMock,
|
|
pytestconfig: pytest.Config,
|
|
tmp_path: str,
|
|
mock_time: datetime.datetime,
|
|
requests_mock: Any,
|
|
) -> None:
|
|
test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
|
|
|
|
register_mock_api(
|
|
pytestconfig=pytestconfig,
|
|
request_mock=requests_mock,
|
|
)
|
|
|
|
default_conf: dict = default_source_config()
|
|
|
|
del default_conf[
|
|
"dataset_type_mapping"
|
|
] # delete this key so that connector set it to default (all dataplatform)
|
|
|
|
pipeline = Pipeline.create(
|
|
{
|
|
"run_id": "powerbi-test",
|
|
"source": {
|
|
"type": "powerbi",
|
|
"config": {
|
|
**default_conf,
|
|
"extract_lineage": True,
|
|
"extract_column_level_lineage": True,
|
|
"enable_advance_lineage_sql_construct": True,
|
|
"native_query_parsing": True,
|
|
"extract_independent_datasets": True,
|
|
},
|
|
},
|
|
"sink": {
|
|
"type": "file",
|
|
"config": {
|
|
"filename": f"{tmp_path}/powerbi_cll_mces.json",
|
|
},
|
|
},
|
|
}
|
|
)
|
|
|
|
pipeline.run()
|
|
pipeline.raise_from_status()
|
|
golden_file = "golden_test_cll.json"
|
|
|
|
mce_helpers.check_golden_file(
|
|
pytestconfig,
|
|
output_path=f"{tmp_path}/powerbi_cll_mces.json",
|
|
golden_path=f"{test_resources_dir}/{golden_file}",
|
|
)
|
|
|
|
|
|
@freeze_time(FROZEN_TIME)
|
|
@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
|
|
def test_cll_extraction_flags(
|
|
mock_msal: MagicMock,
|
|
pytestconfig: pytest.Config,
|
|
tmp_path: str,
|
|
mock_time: datetime.datetime,
|
|
requests_mock: Any,
|
|
) -> None:
|
|
register_mock_api(
|
|
pytestconfig=pytestconfig,
|
|
request_mock=requests_mock,
|
|
)
|
|
|
|
default_conf: dict = default_source_config()
|
|
pattern: str = re.escape(
|
|
"Enable all these flags in recipe: ['native_query_parsing', 'enable_advance_lineage_sql_construct', 'extract_lineage', 'extract_dataset_schema']"
|
|
)
|
|
|
|
with pytest.raises(Exception, match=pattern):
|
|
Pipeline.create(
|
|
{
|
|
"run_id": "powerbi-test",
|
|
"source": {
|
|
"type": "powerbi",
|
|
"config": {
|
|
**default_conf,
|
|
"extract_column_level_lineage": True,
|
|
},
|
|
},
|
|
"sink": {
|
|
"type": "file",
|
|
"config": {
|
|
"filename": f"{tmp_path}/powerbi_cll_mces.json",
|
|
},
|
|
},
|
|
}
|
|
)
|
|
|
|
|
|
@freeze_time(FROZEN_TIME)
|
|
@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
|
|
@pytest.mark.integration
|
|
def test_powerbi_cross_workspace_reference_info_message(
|
|
mock_msal: MagicMock,
|
|
pytestconfig: pytest.Config,
|
|
tmp_path: str,
|
|
mock_time: datetime.datetime,
|
|
requests_mock: Any,
|
|
) -> None:
|
|
register_mock_api(
|
|
pytestconfig=pytestconfig,
|
|
request_mock=requests_mock,
|
|
override_data=read_mock_data(
|
|
path=pytestconfig.rootpath
|
|
/ "tests/integration/powerbi/mock_data/cross_workspace_mock_response.json"
|
|
),
|
|
)
|
|
|
|
config = default_source_config()
|
|
|
|
del config["workspace_id"]
|
|
|
|
config["workspace_id_pattern"] = {
|
|
"allow": [
|
|
"A8D655A6-F521-477E-8C22-255018583BF4",
|
|
"C5DA6EA8-625E-4AB1-90B6-CAEA0BF9F492",
|
|
]
|
|
}
|
|
|
|
config["include_workspace_name_in_dataset_urn"] = True
|
|
|
|
pipeline = Pipeline.create(
|
|
{
|
|
"run_id": "powerbi-test",
|
|
"source": {
|
|
"type": "powerbi",
|
|
"config": {
|
|
**config,
|
|
},
|
|
},
|
|
"sink": {
|
|
"type": "file",
|
|
"config": {
|
|
"filename": f"{tmp_path}/powerbi_mces.json",
|
|
},
|
|
},
|
|
}
|
|
)
|
|
|
|
pipeline.run()
|
|
pipeline.raise_from_status()
|
|
|
|
assert isinstance(pipeline.source, PowerBiDashboardSource) # to silent the lint
|
|
|
|
info_entries: dict = pipeline.source.reporter._structured_logs._entries.get(
|
|
StructuredLogLevel.INFO, {}
|
|
) # type :ignore
|
|
|
|
is_entry_present: bool = False
|
|
# Printing INFO entries
|
|
for entry in info_entries.values():
|
|
if entry.title == "Missing Dataset Lineage For Tile":
|
|
is_entry_present = True
|
|
break
|
|
|
|
assert is_entry_present, (
|
|
'Info message "Missing Dataset Lineage For Tile" should be present in reporter'
|
|
)
|
|
|
|
test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
|
|
|
|
golden_file = "golden_test_cross_workspace_dataset.json"
|
|
|
|
mce_helpers.check_golden_file(
|
|
pytestconfig,
|
|
output_path=f"{tmp_path}/powerbi_mces.json",
|
|
golden_path=f"{test_resources_dir}/{golden_file}",
|
|
)
|
|
|
|
|
|
def common_app_ingest(
|
|
pytestconfig: pytest.Config,
|
|
requests_mock: Any,
|
|
output_mcp_path: str,
|
|
override_config: Optional[dict] = None,
|
|
) -> Pipeline:
|
|
if override_config is None:
|
|
override_config = {}
|
|
register_mock_api(
|
|
pytestconfig=pytestconfig,
|
|
request_mock=requests_mock,
|
|
override_data=read_mock_data(
|
|
path=pytestconfig.rootpath
|
|
/ "tests/integration/powerbi/mock_data/workspace_with_app_mock_response.json"
|
|
),
|
|
)
|
|
|
|
config = default_source_config()
|
|
|
|
del config["workspace_id"]
|
|
|
|
config["workspace_id_pattern"] = {
|
|
"allow": [
|
|
"8F756DE6-26AD-45FF-A201-44276FF1F561",
|
|
]
|
|
}
|
|
|
|
config.update(override_config)
|
|
|
|
pipeline = Pipeline.create(
|
|
{
|
|
"run_id": "powerbi-test",
|
|
"source": {
|
|
"type": "powerbi",
|
|
"config": {
|
|
**config,
|
|
},
|
|
},
|
|
"sink": {
|
|
"type": "file",
|
|
"config": {
|
|
"filename": output_mcp_path,
|
|
},
|
|
},
|
|
}
|
|
)
|
|
|
|
pipeline.run()
|
|
pipeline.raise_from_status()
|
|
|
|
return pipeline
|
|
|
|
|
|
@freeze_time(FROZEN_TIME)
|
|
@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
|
|
@pytest.mark.integration
|
|
def test_powerbi_app_ingest(
|
|
mock_msal: MagicMock,
|
|
pytestconfig: pytest.Config,
|
|
tmp_path: str,
|
|
mock_time: datetime.datetime,
|
|
requests_mock: Any,
|
|
) -> None:
|
|
common_app_ingest(
|
|
pytestconfig=pytestconfig,
|
|
requests_mock=requests_mock,
|
|
output_mcp_path=f"{tmp_path}/powerbi_mces.json",
|
|
override_config={
|
|
"extract_app": True,
|
|
},
|
|
)
|
|
|
|
golden_file = "golden_test_app_ingest.json"
|
|
|
|
test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
|
|
|
|
mce_helpers.check_golden_file(
|
|
pytestconfig,
|
|
output_path=f"{tmp_path}/powerbi_mces.json",
|
|
golden_path=f"{test_resources_dir}/{golden_file}",
|
|
)
|
|
|
|
|
|
@freeze_time(FROZEN_TIME)
|
|
@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
|
|
@pytest.mark.integration
|
|
def test_powerbi_app_ingest_info_message(
|
|
mock_msal: MagicMock,
|
|
pytestconfig: pytest.Config,
|
|
tmp_path: str,
|
|
mock_time: datetime.datetime,
|
|
requests_mock: Any,
|
|
) -> None:
|
|
pipeline = common_app_ingest(
|
|
pytestconfig=pytestconfig,
|
|
requests_mock=requests_mock,
|
|
output_mcp_path=f"{tmp_path}/powerbi_mces.json",
|
|
)
|
|
|
|
assert isinstance(pipeline.source, PowerBiDashboardSource) # to silent the lint
|
|
|
|
info_entries: dict = pipeline.source.reporter._structured_logs._entries.get(
|
|
StructuredLogLevel.INFO, {}
|
|
) # type :ignore
|
|
|
|
is_entry_present: bool = False
|
|
# Printing INFO entries
|
|
for entry in info_entries.values():
|
|
if entry.title == "App Ingestion Is Disabled":
|
|
is_entry_present = True
|
|
break
|
|
|
|
assert is_entry_present, (
|
|
"The extract_app flag should be set to false by default. We need to keep this flag as false until all GMS instances are updated to the latest release."
|
|
)
|