341 lines
13 KiB
Python

from typing import Any, Dict, Optional
from unittest import mock
from freezegun import freeze_time
from datahub.ingestion.run.pipeline import Pipeline
from datahub.testing import mce_helpers
FROZEN_TIME = "2022-02-23 07:00:00"
def scan_init_response(request, context):
# Request mock is passing POST input in the form of workspaces=<workspace_id>
workspace_id = request.text.split("=")[1]
w_id_vs_response: Dict[str, Any] = {
"64ED5CAD-7C10-4684-8180-826122881108": {
"id": "4674efd1-603c-4129-8d82-03cf2be05aff"
}
}
return w_id_vs_response[workspace_id]
def admin_datasets_response(request, context):
return {
"value": [
{
"id": "05169CD2-E713-41E6-9600-1D8066D95445",
"name": "library-dataset",
"webUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445",
}
]
}
def execute_queries_response(request, context):
query = request.json()["queries"][0]["query"]
if "unique_count" in query:
return {
"results": [
{
"tables": [
{
"rows": [
{
"[min]": 3,
"[max]": 34333,
"[unique_count]": 15,
},
]
}
]
}
],
}
elif "COUNTROWS" in query:
return {
"results": [
{
"tables": [
{
"rows": [
{
"[count]": 542300,
},
]
}
]
}
],
}
elif "TOPN" in query:
return {
"results": [
{
"tables": [
{
"rows": [
{
"[link]": "http://example.org",
"[description]": "this is a sample",
"[topic]": "urgent matters",
"[view_count]": 123455,
},
{
"[link]": "http://example.org/111/22/foo",
"[description]": "this describes content",
"[topic]": "urgent matters",
"[view_count]": 123455,
},
{
"[link]": "http://example.org/111/22",
"[description]": "sample, this is",
"[topic]": "normal matters",
"[view_count]": 123455,
},
]
}
]
}
],
}
def register_mock_admin_api(
request_mock: Any, override_data: Optional[dict] = None
) -> None:
if override_data is None:
override_data = {}
api_vs_response = {
"https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets": {
"method": "GET",
"status_code": 200,
"json": admin_datasets_response,
},
"https://api.powerbi.com/v1.0/myorg/groups?%24skip=0&%24top=1000": {
"method": "GET",
"status_code": 200,
"json": {
"value": [
{
"id": "64ED5CAD-7C10-4684-8180-826122881108",
"isReadOnly": True,
"name": "demo-workspace",
"type": "Workspace",
"state": "Active",
}
],
},
},
"https://api.powerbi.com/v1.0/myorg/groups?%24skip=1000&%24top=1000": {
"method": "GET",
"status_code": 200,
"json": {
"value": [],
},
},
"https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/dashboards": {
"method": "GET",
"status_code": 200,
"json": {"value": []},
},
"https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/datasources": {
"method": "GET",
"status_code": 200,
"json": {
"value": [
{
"datasourceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3",
"datasourceType": "PostgreSql",
"connectionDetails": {
"database": "library_db",
"server": "foo",
},
},
]
},
},
"https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/executeQueries": {
"method": "POST",
"status_code": 200,
"json": execute_queries_response,
},
"https://api.powerbi.com/v1.0/myorg/admin/workspaces/scanStatus/4674efd1-603c-4129-8d82-03cf2be05aff": {
"method": "GET",
"status_code": 200,
"json": {
"status": "SUCCEEDED",
},
},
"https://api.powerbi.com/v1.0/myorg/admin/workspaces/scanStatus/a674efd1-603c-4129-8d82-03cf2be05aff": {
"method": "GET",
"status_code": 200,
"json": {
"status": "SUCCEEDED",
},
},
"https://api.powerbi.com/v1.0/myorg/admin/workspaces/scanResult/4674efd1-603c-4129-8d82-03cf2be05aff": {
"method": "GET",
"status_code": 200,
"json": {
"workspaces": [
{
"id": "64ED5CAD-7C10-4684-8180-826122881108",
"name": "demo-workspace",
"state": "Active",
"type": "Workspace",
"datasets": [
{
"id": "05169CD2-E713-41E6-9600-1D8066D95445",
"endorsementDetails": {"endorsement": "Promoted"},
"name": "test_sf_pbi_test",
"tables": [
{
"name": "articles",
"source": [
{
"expression": 'let\n Source = PostgreSQL.Database("localhost" , "mics" ),\n public_order_date = Source{[Schema="public",Item="order_date"]}[Data] \n in \n public_order_date',
}
],
"datasourceUsages": [
{
"datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3",
}
],
"columns": [
{
"name": "link",
"description": "column description",
"dataType": "String",
"columnType": "DATA",
"isHidden": False,
},
{
"name": "description",
"description": "column description",
"dataType": "String",
"columnType": "DATA",
"isHidden": False,
},
{
"name": "topic",
"description": "column description",
"dataType": "String",
"columnType": "DATA",
"isHidden": False,
},
],
"measures": [
{
"name": "view_count",
"description": "column description",
"expression": "let\n x",
"isHidden": False,
}
],
},
],
},
],
"dashboards": [],
"reports": [],
},
]
},
},
"https://api.powerbi.com/v1.0/myorg/admin/workspaces/getInfo": {
"method": "POST",
"status_code": 200,
"json": scan_init_response,
},
"https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445": {
"method": "GET",
"status_code": 200,
"json": {
"id": "05169CD2-E713-41E6-9600-1D8066D95445",
"name": "library-dataset",
"description": "Library Dataset",
"webUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445",
},
},
}
api_vs_response.update(override_data)
for url in api_vs_response:
request_mock.register_uri(
api_vs_response[url]["method"],
url,
json=api_vs_response[url]["json"],
status_code=api_vs_response[url]["status_code"],
)
def mock_msal_cca(*args, **kwargs):
class MsalClient:
def acquire_token_for_client(self, *args, **kwargs):
return {
"access_token": "dummy",
}
return MsalClient()
def default_source_config():
return {
"client_id": "foo",
"client_secret": "bar",
"tenant_id": "0B0C960B-FCDF-4D0F-8C45-2E03BB59DDEB",
"workspace_id": "64ED5CAD-7C10-4684-8180-826122881108",
"extract_lineage": True,
"extract_reports": False,
"admin_apis_only": False,
"extract_ownership": True,
"convert_lineage_urns_to_lowercase": False,
"extract_independent_datasets": True,
"workspace_id_pattern": {"allow": ["64ED5CAD-7C10-4684-8180-826122881108"]},
"extract_workspaces_to_containers": False,
"profiling": {
"enabled": True,
},
"profile_pattern": {"allow": [".*"]},
}
@freeze_time(FROZEN_TIME)
@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
def test_profiling(mock_msal, pytestconfig, tmp_path, mock_time, requests_mock):
test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
register_mock_admin_api(request_mock=requests_mock)
pipeline = Pipeline.create(
{
"run_id": "powerbi-test",
"source": {
"type": "powerbi",
"config": {
**default_source_config(),
},
},
"sink": {
"type": "file",
"config": {
"filename": f"{tmp_path}/powerbi_profiling.json",
},
},
}
)
pipeline.run()
pipeline.raise_from_status()
golden_file = "golden_test_profiling.json"
mce_helpers.check_golden_file(
pytestconfig,
output_path=f"{tmp_path}/powerbi_profiling.json",
golden_path=f"{test_resources_dir}/{golden_file}",
)