295 lines
11 KiB
Python

import json
import pathlib
from typing import List
from unittest.mock import patch
from freezegun import freeze_time
from datahub.ingestion.run.pipeline import Pipeline
from datahub.ingestion.source.identity.azure_ad import AzureADConfig
from tests.test_helpers import mce_helpers
FROZEN_TIME = "2021-08-24 09:00:00"
def test_azure_ad_config():
config = AzureADConfig.parse_obj(
dict(
client_id="00000000-0000-0000-0000-000000000000",
tenant_id="00000000-0000-0000-0000-000000000000",
client_secret="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
redirect="https://login.microsoftonline.com/common/oauth2/nativeclient",
authority="https://login.microsoftonline.com/00000000-0000-0000-0000-000000000000",
token_url="https://login.microsoftonline.com/00000000-0000-0000-0000-000000000000/oauth2/token",
graph_url="https://graph.microsoft.com/v1.0",
ingest_users=True,
ingest_groups=True,
ingest_group_membership=True,
)
)
# Sanity on required configurations
assert config.client_id == "00000000-0000-0000-0000-000000000000"
assert config.tenant_id == "00000000-0000-0000-0000-000000000000"
assert config.client_secret == "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
assert (
config.redirect
== "https://login.microsoftonline.com/common/oauth2/nativeclient"
)
assert (
config.authority
== "https://login.microsoftonline.com/00000000-0000-0000-0000-000000000000"
)
assert (
config.token_url
== "https://login.microsoftonline.com/00000000-0000-0000-0000-000000000000/oauth2/token"
)
assert config.graph_url == "https://graph.microsoft.com/v1.0"
# assert on defaults
assert config.ingest_users
assert config.ingest_groups
assert config.ingest_group_membership
@freeze_time(FROZEN_TIME)
def test_azure_ad_source_default_configs(pytestconfig, tmp_path):
test_resources_dir: pathlib.Path = (
pytestconfig.rootpath / "tests/integration/azure_ad"
)
with patch(
"datahub.ingestion.source.identity.azure_ad.AzureADSource.get_token"
) as mock_token, patch(
"datahub.ingestion.source.identity.azure_ad.AzureADSource._get_azure_ad_users"
) as mock_users, patch(
"datahub.ingestion.source.identity.azure_ad.AzureADSource._get_azure_ad_groups"
) as mock_groups, patch(
"datahub.ingestion.source.identity.azure_ad.AzureADSource._get_azure_ad_group_members"
) as mock_group_users:
mocked_functions(
test_resources_dir, mock_token, mock_users, mock_groups, mock_group_users
)
# Run an azure usage ingestion run.
pipeline = Pipeline.create(
{
"run_id": "test-azure-ad",
"source": {
"type": "azure-ad",
"config": {
"client_id": "00000000-0000-0000-0000-000000000000",
"tenant_id": "00000000-0000-0000-0000-000000000000",
"client_secret": "client_secret",
"redirect": "https://login.microsoftonline.com/common/oauth2/nativeclient",
"authority": "https://login.microsoftonline.com/00000000-0000-0000-0000-000000000000",
"token_url": "https://login.microsoftonline.com/00000000-0000-0000-0000-000000000000/oauth2/token",
"graph_url": "https://graph.microsoft.com/v1.0",
"ingest_group_membership": True,
"ingest_groups": True,
"ingest_users": True,
},
},
"sink": {
"type": "file",
"config": {
"filename": f"{tmp_path}/azure_ad_mces_default_config.json",
},
},
}
)
pipeline.run()
pipeline.raise_from_status()
mce_helpers.check_golden_file(
pytestconfig,
output_path=tmp_path / "azure_ad_mces_default_config.json",
golden_path=test_resources_dir / "azure_ad_mces_golden_default_config.json",
)
@freeze_time(FROZEN_TIME)
def test_azure_ad_source_nested_groups(pytestconfig, tmp_path):
test_resources_dir: pathlib.Path = (
pytestconfig.rootpath / "tests/integration/azure_ad"
)
with patch(
"datahub.ingestion.source.identity.azure_ad.AzureADSource.get_token"
) as mock_token, patch(
"datahub.ingestion.source.identity.azure_ad.AzureADSource._get_azure_ad_users"
) as mock_users, patch(
"datahub.ingestion.source.identity.azure_ad.AzureADSource._get_azure_ad_groups"
) as mock_groups, patch(
"datahub.ingestion.source.identity.azure_ad.AzureADSource._get_azure_ad_group_members"
) as mock_group_users:
mocked_functions(
test_resources_dir,
mock_token,
mock_users,
mock_groups,
mock_group_users,
True,
)
# Run an azure usage ingestion run.
pipeline = Pipeline.create(
{
"run_id": "test-azure-ad",
"source": {
"type": "azure-ad",
"config": {
"client_id": "00000000-0000-0000-0000-000000000000",
"tenant_id": "00000000-0000-0000-0000-000000000000",
"client_secret": "client_secret",
"redirect": "https://login.microsoftonline.com/common/oauth2/nativeclient",
"authority": "https://login.microsoftonline.com/00000000-0000-0000-0000-000000000000",
"token_url": "https://login.microsoftonline.com/00000000-0000-0000-0000-000000000000/oauth2/token",
"graph_url": "https://graph.microsoft.com/v1.0",
"ingest_group_membership": True,
"ingest_groups": True,
"ingest_users": False,
},
},
"sink": {
"type": "file",
"config": {
"filename": f"{tmp_path}/azure_ad_mces_nested_groups.json",
},
},
}
)
pipeline.run()
pipeline.raise_from_status()
mce_helpers.check_golden_file(
pytestconfig,
output_path=tmp_path / "azure_ad_mces_nested_groups.json",
golden_path=test_resources_dir / "azure_ad_mces_golden_nested_groups.json",
)
@freeze_time(FROZEN_TIME)
def test_azure_source_ingestion_disabled(pytestconfig, tmp_path):
test_resources_dir: pathlib.Path = (
pytestconfig.rootpath / "tests/integration/azure_ad"
)
with patch(
"datahub.ingestion.source.identity.azure_ad.AzureADSource.get_token"
) as mock_token, patch(
"datahub.ingestion.source.identity.azure_ad.AzureADSource._get_azure_ad_users"
) as mock_users, patch(
"datahub.ingestion.source.identity.azure_ad.AzureADSource._get_azure_ad_groups"
) as mock_groups, patch(
"datahub.ingestion.source.identity.azure_ad.AzureADSource._get_azure_ad_group_members"
) as mock_group_users:
mocked_functions(
test_resources_dir, mock_token, mock_users, mock_groups, mock_group_users
)
# Run an Azure usage ingestion run.
pipeline = Pipeline.create(
{
"run_id": "test-azure-ad",
"source": {
"type": "azure-ad",
"config": {
"client_id": "00000000-0000-0000-0000-000000000000",
"tenant_id": "00000000-0000-0000-0000-000000000000",
"client_secret": "client_secret",
"redirect": "https://login.microsoftonline.com/common/oauth2/nativeclient",
"authority": "https://login.microsoftonline.com/00000000-0000-0000-0000-000000000000",
"token_url": "https://login.microsoftonline.com/00000000-0000-0000-0000-000000000000/oauth2/token",
"graph_url": "https://graph.microsoft.com/v1.0",
"ingest_group_membership": "False",
"ingest_groups": "False",
"ingest_users": "False",
},
},
"sink": {
"type": "file",
"config": {
"filename": f"{tmp_path}/azure_ad_mces_ingestion_disabled.json",
},
},
}
)
pipeline.run()
pipeline.raise_from_status()
mce_helpers.check_golden_file(
pytestconfig,
output_path=tmp_path / "azure_ad_mces_ingestion_disabled.json",
golden_path=test_resources_dir / "azure_ad_mces_golden_ingestion_disabled.json",
)
def load_test_resources(test_resources_dir):
azure_ad_users_json_file = test_resources_dir / "azure_ad_users.json"
azure_ad_groups_json_file = test_resources_dir / "azure_ad_groups.json"
azure_ad_nested_group_json_file = test_resources_dir / "azure_ad_nested_group.json"
azure_ad_nested_groups_members_json_file = (
test_resources_dir / "azure_ad_nested_groups_members.json"
)
with azure_ad_users_json_file.open() as azure_ad_users_json:
reference_users = json.loads(azure_ad_users_json.read())
with azure_ad_groups_json_file.open() as azure_ad_groups_json:
reference_groups = json.loads(azure_ad_groups_json.read())
with azure_ad_nested_group_json_file.open() as azure_ad_nested_group_json:
reference_nested_group = json.loads(azure_ad_nested_group_json.read())
with azure_ad_nested_groups_members_json_file.open() as azure_ad_nested_groups_users_json:
reference_nested_groups_users = json.loads(
azure_ad_nested_groups_users_json.read()
)
return (
reference_users,
reference_groups,
reference_nested_group,
reference_nested_groups_users,
)
#
# Azure offers a tool called 'graph-explorer' that you can use to generate fake data,
# or use your tenant's Azure AD to generate real data:
# https://developer.microsoft.com/graph/graph-explorer
#
def mocked_functions(
test_resources_dir,
mock_token,
mock_users,
mock_groups,
mock_groups_users,
return_nested_group=False,
):
# mock token response
mock_token.return_value = "xxxxxxxx"
# mock users and groups response
users, groups, nested_group, nested_group_members = load_test_resources(
test_resources_dir
)
mock_users.return_value = iter(list([users]))
mock_groups.return_value = (
iter(list([nested_group])) if return_nested_group else iter(list([groups]))
)
# For simplicity, each user is placed in ALL groups.
# Create a separate response mock for each group in our sample data.
# mock_groups_users.return_value = [users]
def mocked_group_members(azure_ad_group: dict) -> List:
group_id = azure_ad_group.get("id")
if group_id == "00000000-0000-0000-0000-000000000000":
return [users]
if group_id == "00000000-0000-0000-0000-0000000000001":
return [users]
if group_id == "99999999-9999-9999-9999-999999999999":
return [nested_group_members]
raise ValueError(f"Unexpected Azure AD group ID {group_id}")
mock_groups_users.side_effect = mocked_group_members