diff --git a/metadata-ingestion/src/datahub/ingestion/source/identity/azure_ad.py b/metadata-ingestion/src/datahub/ingestion/source/identity/azure_ad.py index 12f6b83d36..5675cd195d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/identity/azure_ad.py +++ b/metadata-ingestion/src/datahub/ingestion/source/identity/azure_ad.py @@ -419,18 +419,14 @@ class AzureADSource(Source): for user_count, datahub_corp_user_snapshot in enumerate( datahub_corp_user_snapshots ): - # Add GroupMembership if applicable - if ( - datahub_corp_user_snapshot.urn - in datahub_corp_user_urn_to_group_membership.keys() - ): - datahub_group_membership = ( - datahub_corp_user_urn_to_group_membership.get( - datahub_corp_user_snapshot.urn - ) - ) - assert datahub_group_membership - datahub_corp_user_snapshot.aspects.append(datahub_group_membership) + # TODO: Refactor common code between this and Okta to a common base class or utils + # Add group membership aspect + datahub_group_membership: GroupMembershipClass = ( + datahub_corp_user_urn_to_group_membership[ + datahub_corp_user_snapshot.urn + ] + ) + datahub_corp_user_snapshot.aspects.append(datahub_group_membership) mce = MetadataChangeEvent(proposedSnapshot=datahub_corp_user_snapshot) wu_id = f"user-snapshot-{user_count + 1 if self.config.mask_user_id else datahub_corp_user_snapshot.urn}" wu = MetadataWorkUnit(id=wu_id, mce=mce) diff --git a/metadata-ingestion/src/datahub/ingestion/source/identity/okta.py b/metadata-ingestion/src/datahub/ingestion/source/identity/okta.py index 239d8a3077..3b8bf4b595 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/identity/okta.py +++ b/metadata-ingestion/src/datahub/ingestion/source/identity/okta.py @@ -2,6 +2,7 @@ import asyncio import logging import re import urllib +from collections import defaultdict from dataclasses import dataclass, field from time import sleep from typing import Dict, Iterable, List, Optional, Union @@ -31,7 +32,7 @@ from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import ( CorpUserSnapshot, ) from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent -from datahub.metadata.schema_classes import ( # GroupMembershipClass, +from datahub.metadata.schema_classes import ( ChangeTypeClass, CorpGroupInfoClass, CorpUserInfoClass, @@ -316,7 +317,9 @@ class OktaSource(Source): yield group_status_wu # Step 2: Populate GroupMembership Aspects for CorpUsers - datahub_corp_user_urn_to_group_membership: Dict[str, GroupMembershipClass] = {} + datahub_corp_user_urn_to_group_membership: Dict[ + str, GroupMembershipClass + ] = defaultdict(lambda: GroupMembershipClass(groups=[])) if self.config.ingest_group_membership and okta_groups is not None: # Fetch membership for each group. for okta_group in okta_groups: @@ -341,20 +344,10 @@ class OktaSource(Source): self.report.report_failure("okta_user_mapping", error_str) continue - # Either update or create the GroupMembership aspect for this group member. - # TODO: Production of the GroupMembership aspect will overwrite the existing - # group membership for the DataHub user. - if ( + # Update the GroupMembership aspect for this group member. + datahub_corp_user_urn_to_group_membership[ datahub_corp_user_urn - in datahub_corp_user_urn_to_group_membership - ): - datahub_corp_user_urn_to_group_membership[ - datahub_corp_user_urn - ].groups.append(datahub_corp_group_urn) - else: - datahub_corp_user_urn_to_group_membership[ - datahub_corp_user_urn - ] = GroupMembershipClass(groups=[datahub_corp_group_urn]) + ].groups.append(datahub_corp_group_urn) # Step 3: Produce MetadataWorkUnits for CorpUsers. if self.config.ingest_users: @@ -364,18 +357,15 @@ class OktaSource(Source): for user_count, datahub_corp_user_snapshot in enumerate( datahub_corp_user_snapshots ): - # Add GroupMembership aspect populated in Step 2 if applicable. - if ( - datahub_corp_user_snapshot.urn - in datahub_corp_user_urn_to_group_membership - ): - datahub_group_membership = ( - datahub_corp_user_urn_to_group_membership.get( - datahub_corp_user_snapshot.urn - ) - ) - assert datahub_group_membership is not None - datahub_corp_user_snapshot.aspects.append(datahub_group_membership) + # TODO: Refactor common code between this and Okta to a common base class or utils + # Add GroupMembership aspect populated in Step 2. + datahub_group_membership: GroupMembershipClass = ( + datahub_corp_user_urn_to_group_membership[ + datahub_corp_user_snapshot.urn + ] + ) + assert datahub_group_membership is not None + datahub_corp_user_snapshot.aspects.append(datahub_group_membership) mce = MetadataChangeEvent(proposedSnapshot=datahub_corp_user_snapshot) wu_id = f"user-snapshot-{user_count + 1 if self.config.mask_user_id else datahub_corp_user_snapshot.urn}" wu = MetadataWorkUnit(id=wu_id, mce=mce) diff --git a/metadata-ingestion/src/datahub/ingestion/source/ldap.py b/metadata-ingestion/src/datahub/ingestion/source/ldap.py index c7b2f75d60..82e5abc5ea 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/ldap.py +++ b/metadata-ingestion/src/datahub/ingestion/source/ldap.py @@ -412,8 +412,7 @@ class LDAPSource(StatefulIngestionSourceBase): ], ) - if groups: - user_snapshot.aspects.append(GroupMembershipClass(groups=groups)) + user_snapshot.aspects.append(GroupMembershipClass(groups=groups)) return MetadataChangeEvent(proposedSnapshot=user_snapshot) diff --git a/metadata-ingestion/tests/integration/azure_ad/azure_ad_groups.json b/metadata-ingestion/tests/integration/azure_ad/azure_ad_groups.json index e4b9ecfa75..01b2aa1319 100644 --- a/metadata-ingestion/tests/integration/azure_ad/azure_ad_groups.json +++ b/metadata-ingestion/tests/integration/azure_ad/azure_ad_groups.json @@ -66,5 +66,39 @@ "theme": null, "visibility": null, "onPremisesProvisioningErrors": [] + }, + { + "id": "00000000-0000-0000-0000-0000000000002", + "deletedDateTime": null, + "classification": null, + "createdDateTime": "2021-08-20 11: 00: 00", + "creationOptions": [], + "description": "This is an interesting description", + "displayName": "groupDisplayName3", + "expirationDateTime": null, + "groupTypes": [], + "isAssignableToRole": null, + "mail": "groupDisplayName3@onmicrosoft.com", + "mailEnabled": false, + "mailNickname": "groupDisplayName3", + "membershipRule": null, + "membershipRuleProcessingState": null, + "onPremisesDomainName": null, + "onPremisesLastSyncDateTime": null, + "onPremisesNetBiosName": null, + "onPremisesSamAccountName": null, + "onPremisesSecurityIdentifier": null, + "onPremisesSyncEnabled": null, + "preferredDataLocation": null, + "preferredLanguage": null, + "proxyAddresses": [], + "renewedDateTime": "2021-08-20 11:00:00", + "resourceBehaviorOptions": [], + "resourceProvisioningOptions": [], + "securityEnabled": true, + "securityIdentifier": "xxxxx", + "theme": null, + "visibility": null, + "onPremisesProvisioningErrors": [] } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/azure_ad/azure_ad_mces_golden_default_config.json b/metadata-ingestion/tests/integration/azure_ad/azure_ad_mces_golden_default_config.json index 1801d5669a..504eb7f543 100644 --- a/metadata-ingestion/tests/integration/azure_ad/azure_ad_mces_golden_default_config.json +++ b/metadata-ingestion/tests/integration/azure_ad/azure_ad_mces_golden_default_config.json @@ -1,6 +1,5 @@ [ { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.CorpGroupSnapshot": { "urn": "urn:li:corpGroup:groupDisplayName1", @@ -8,66 +7,51 @@ { "com.linkedin.pegasus2avro.identity.CorpGroupInfo": { "displayName": "groupDisplayName1", - "email": null, "admins": [], "members": [], - "groups": [], - "description": null, - "slack": null + "groups": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1629795600000, - "runId": "test-azure-ad", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-azure-ad" } }, { - "auditHeader": null, "entityType": "corpGroup", "entityUrn": "urn:li:corpGroup:groupDisplayName1", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "origin", "aspect": { - "value": "{\"type\": \"EXTERNAL\", \"externalType\": \"AZURE_AD\"}", - "contentType": "application/json" + "json": { + "type": "EXTERNAL", + "externalType": "AZURE_AD" + } }, "systemMetadata": { "lastObserved": 1629795600000, - "runId": "test-azure-ad", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-azure-ad" } }, { - "auditHeader": null, "entityType": "corpGroup", "entityUrn": "urn:li:corpGroup:groupDisplayName1", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1629795600000, - "runId": "test-azure-ad", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-azure-ad" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.CorpGroupSnapshot": { "urn": "urn:li:corpGroup:groupDisplayName2", @@ -79,62 +63,103 @@ "admins": [], "members": [], "groups": [], - "description": "This is an interesting description", - "slack": null + "description": "This is an interesting description" } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1629795600000, - "runId": "test-azure-ad", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-azure-ad" } }, { - "auditHeader": null, "entityType": "corpGroup", "entityUrn": "urn:li:corpGroup:groupDisplayName2", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "origin", "aspect": { - "value": "{\"type\": \"EXTERNAL\", \"externalType\": \"AZURE_AD\"}", - "contentType": "application/json" + "json": { + "type": "EXTERNAL", + "externalType": "AZURE_AD" + } }, "systemMetadata": { "lastObserved": 1629795600000, - "runId": "test-azure-ad", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-azure-ad" } }, { - "auditHeader": null, "entityType": "corpGroup", "entityUrn": "urn:li:corpGroup:groupDisplayName2", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1629795600000, - "runId": "test-azure-ad", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-azure-ad" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.CorpGroupSnapshot": { + "urn": "urn:li:corpGroup:groupDisplayName3", + "aspects": [ + { + "com.linkedin.pegasus2avro.identity.CorpGroupInfo": { + "displayName": "groupDisplayName3", + "email": "groupDisplayName3@onmicrosoft.com", + "admins": [], + "members": [], + "groups": [], + "description": "This is an interesting description" + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1629795600000, + "runId": "test-azure-ad" + } +}, +{ + "entityType": "corpGroup", + "entityUrn": "urn:li:corpGroup:groupDisplayName3", + "changeType": "UPSERT", + "aspectName": "origin", + "aspect": { + "json": { + "type": "EXTERNAL", + "externalType": "AZURE_AD" + } + }, + "systemMetadata": { + "lastObserved": 1629795600000, + "runId": "test-azure-ad" + } +}, +{ + "entityType": "corpGroup", + "entityUrn": "urn:li:corpGroup:groupDisplayName3", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1629795600000, + "runId": "test-azure-ad" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.CorpUserSnapshot": { "urn": "urn:li:corpuser:johngreen@acryl.io", @@ -144,76 +169,60 @@ "active": true, "displayName": "John Green", "email": "johngreen@acryl.io", - "title": null, - "managerUrn": null, - "departmentId": null, - "departmentName": null, "firstName": "John", "lastName": "Green", - "fullName": "John Green", - "countryCode": null + "fullName": "John Green" } }, { "com.linkedin.pegasus2avro.identity.GroupMembership": { "groups": [ "urn:li:corpGroup:groupDisplayName1", - "urn:li:corpGroup:groupDisplayName2" + "urn:li:corpGroup:groupDisplayName2", + "urn:li:corpGroup:groupDisplayName3" ] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1629795600000, - "runId": "test-azure-ad", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-azure-ad" } }, { - "auditHeader": null, "entityType": "corpuser", "entityUrn": "urn:li:corpuser:johngreen@acryl.io", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "origin", "aspect": { - "value": "{\"type\": \"EXTERNAL\", \"externalType\": \"AZURE_AD\"}", - "contentType": "application/json" + "json": { + "type": "EXTERNAL", + "externalType": "AZURE_AD" + } }, "systemMetadata": { "lastObserved": 1629795600000, - "runId": "test-azure-ad", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-azure-ad" } }, { - "auditHeader": null, "entityType": "corpuser", "entityUrn": "urn:li:corpuser:johngreen@acryl.io", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1629795600000, - "runId": "test-azure-ad", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-azure-ad" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.CorpUserSnapshot": { "urn": "urn:li:corpuser:adamhall@acryl.io", @@ -223,14 +232,9 @@ "active": true, "displayName": "Adam Hall", "email": "adamhall@acryl.io", - "title": null, - "managerUrn": null, - "departmentId": null, - "departmentName": null, "firstName": "Adam", "lastName": "Hall", - "fullName": "Adam Hall", - "countryCode": null + "fullName": "Adam Hall" } }, { @@ -244,51 +248,40 @@ ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1629795600000, - "runId": "test-azure-ad", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-azure-ad" } }, { - "auditHeader": null, "entityType": "corpuser", "entityUrn": "urn:li:corpuser:adamhall@acryl.io", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "origin", "aspect": { - "value": "{\"type\": \"EXTERNAL\", \"externalType\": \"AZURE_AD\"}", - "contentType": "application/json" + "json": { + "type": "EXTERNAL", + "externalType": "AZURE_AD" + } }, "systemMetadata": { "lastObserved": 1629795600000, - "runId": "test-azure-ad", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-azure-ad" } }, { - "auditHeader": null, "entityType": "corpuser", "entityUrn": "urn:li:corpuser:adamhall@acryl.io", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1629795600000, - "runId": "test-azure-ad", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-azure-ad" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/azure_ad/azure_ad_mces_golden_empty_group_membership.json b/metadata-ingestion/tests/integration/azure_ad/azure_ad_mces_golden_empty_group_membership.json new file mode 100644 index 0000000000..5a12de1a06 --- /dev/null +++ b/metadata-ingestion/tests/integration/azure_ad/azure_ad_mces_golden_empty_group_membership.json @@ -0,0 +1,285 @@ +[ + { + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.CorpGroupSnapshot": { + "urn": "urn:li:corpGroup:groupDisplayName1", + "aspects": [ + { + "com.linkedin.pegasus2avro.identity.CorpGroupInfo": { + "displayName": "groupDisplayName1", + "admins": [], + "members": [], + "groups": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1629795600000, + "runId": "test-azure-ad" + } + }, + { + "entityType": "corpGroup", + "entityUrn": "urn:li:corpGroup:groupDisplayName1", + "changeType": "UPSERT", + "aspectName": "origin", + "aspect": { + "json": { + "type": "EXTERNAL", + "externalType": "AZURE_AD" + } + }, + "systemMetadata": { + "lastObserved": 1629795600000, + "runId": "test-azure-ad" + } + }, + { + "entityType": "corpGroup", + "entityUrn": "urn:li:corpGroup:groupDisplayName1", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1629795600000, + "runId": "test-azure-ad" + } + }, + { + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.CorpGroupSnapshot": { + "urn": "urn:li:corpGroup:groupDisplayName2", + "aspects": [ + { + "com.linkedin.pegasus2avro.identity.CorpGroupInfo": { + "displayName": "groupDisplayName2", + "email": "groupDisplayName2@onmicrosoft.com", + "admins": [], + "members": [], + "groups": [], + "description": "This is an interesting description" + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1629795600000, + "runId": "test-azure-ad" + } + }, + { + "entityType": "corpGroup", + "entityUrn": "urn:li:corpGroup:groupDisplayName2", + "changeType": "UPSERT", + "aspectName": "origin", + "aspect": { + "json": { + "type": "EXTERNAL", + "externalType": "AZURE_AD" + } + }, + "systemMetadata": { + "lastObserved": 1629795600000, + "runId": "test-azure-ad" + } + }, + { + "entityType": "corpGroup", + "entityUrn": "urn:li:corpGroup:groupDisplayName2", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1629795600000, + "runId": "test-azure-ad" + } + }, + { + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.CorpGroupSnapshot": { + "urn": "urn:li:corpGroup:groupDisplayName3", + "aspects": [ + { + "com.linkedin.pegasus2avro.identity.CorpGroupInfo": { + "displayName": "groupDisplayName3", + "email": "groupDisplayName3@onmicrosoft.com", + "admins": [], + "members": [], + "groups": [], + "description": "This is an interesting description" + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1629795600000, + "runId": "test-azure-ad" + } + }, + { + "entityType": "corpGroup", + "entityUrn": "urn:li:corpGroup:groupDisplayName3", + "changeType": "UPSERT", + "aspectName": "origin", + "aspect": { + "json": { + "type": "EXTERNAL", + "externalType": "AZURE_AD" + } + }, + "systemMetadata": { + "lastObserved": 1629795600000, + "runId": "test-azure-ad" + } + }, + { + "entityType": "corpGroup", + "entityUrn": "urn:li:corpGroup:groupDisplayName3", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1629795600000, + "runId": "test-azure-ad" + } + }, + { + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.CorpUserSnapshot": { + "urn": "urn:li:corpuser:johngreen@acryl.io", + "aspects": [ + { + "com.linkedin.pegasus2avro.identity.CorpUserInfo": { + "active": true, + "displayName": "John Green", + "email": "johngreen@acryl.io", + "firstName": "John", + "lastName": "Green", + "fullName": "John Green" + } + }, + { + "com.linkedin.pegasus2avro.identity.GroupMembership": { + "groups": [ + "urn:li:corpGroup:groupDisplayName1", + "urn:li:corpGroup:groupDisplayName2", + "urn:li:corpGroup:groupDisplayName3" + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1629795600000, + "runId": "test-azure-ad" + } + }, + { + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:johngreen@acryl.io", + "changeType": "UPSERT", + "aspectName": "origin", + "aspect": { + "json": { + "type": "EXTERNAL", + "externalType": "AZURE_AD" + } + }, + "systemMetadata": { + "lastObserved": 1629795600000, + "runId": "test-azure-ad" + } + }, + { + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:johngreen@acryl.io", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1629795600000, + "runId": "test-azure-ad" + } + }, + { + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.CorpUserSnapshot": { + "urn": "urn:li:corpuser:adamhall@acryl.io", + "aspects": [ + { + "com.linkedin.pegasus2avro.identity.CorpUserInfo": { + "active": true, + "displayName": "Adam Hall", + "email": "adamhall@acryl.io", + "firstName": "Adam", + "lastName": "Hall", + "fullName": "Adam Hall" + } + }, + { + "com.linkedin.pegasus2avro.identity.GroupMembership": { + "groups": [ + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1629795600000, + "runId": "test-azure-ad" + } + }, + { + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:adamhall@acryl.io", + "changeType": "UPSERT", + "aspectName": "origin", + "aspect": { + "json": { + "type": "EXTERNAL", + "externalType": "AZURE_AD" + } + }, + "systemMetadata": { + "lastObserved": 1629795600000, + "runId": "test-azure-ad" + } + }, + { + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:adamhall@acryl.io", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1629795600000, + "runId": "test-azure-ad" + } + } +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/azure_ad/test_azure_ad.py b/metadata-ingestion/tests/integration/azure_ad/test_azure_ad.py index cf82d7c5b4..43133f6bd3 100644 --- a/metadata-ingestion/tests/integration/azure_ad/test_azure_ad.py +++ b/metadata-ingestion/tests/integration/azure_ad/test_azure_ad.py @@ -107,6 +107,61 @@ def test_azure_ad_source_default_configs(pytestconfig, tmp_path): ) +@freeze_time(FROZEN_TIME) +def test_azure_ad_source_empty_group_membership(pytestconfig, tmp_path): + test_resources_dir: pathlib.Path = ( + pytestconfig.rootpath / "tests/integration/azure_ad" + ) + + with patch( + "datahub.ingestion.source.identity.azure_ad.AzureADSource.get_token" + ) as mock_token, patch( + "datahub.ingestion.source.identity.azure_ad.AzureADSource._get_azure_ad_users" + ) as mock_users, patch( + "datahub.ingestion.source.identity.azure_ad.AzureADSource._get_azure_ad_groups" + ) as mock_groups, patch( + "datahub.ingestion.source.identity.azure_ad.AzureADSource._get_azure_ad_group_members" + ) as mock_group_users: + mocked_functions( + test_resources_dir, mock_token, mock_users, mock_groups, mock_group_users + ) + # Run an azure usage ingestion run. + pipeline = Pipeline.create( + { + "run_id": "test-azure-ad", + "source": { + "type": "azure-ad", + "config": { + "client_id": "00000000-0000-0000-0000-000000000002", + "tenant_id": "00000000-0000-0000-0000-000000000002", + "client_secret": "client_secret", + "redirect": "https://login.microsoftonline.com/common/oauth2/nativeclient", + "authority": "https://login.microsoftonline.com/00000000-0000-0000-0000-000000000000", + "token_url": "https://login.microsoftonline.com/00000000-0000-0000-0000-000000000000/oauth2/token", + "graph_url": "https://graph.microsoft.com/v1.0", + "ingest_group_membership": True, + "ingest_groups": True, + "ingest_users": True, + }, + }, + "sink": { + "type": "file", + "config": { + "filename": f"{tmp_path}/azure_ad_mces_default_config.json", + }, + }, + } + ) + pipeline.run() + pipeline.raise_from_status() + + mce_helpers.check_golden_file( + pytestconfig, + output_path=tmp_path / "azure_ad_mces_default_config.json", + golden_path=test_resources_dir / "azure_ad_mces_golden_default_config.json", + ) + + @freeze_time(FROZEN_TIME) def test_azure_ad_source_nested_groups(pytestconfig, tmp_path): test_resources_dir: pathlib.Path = ( @@ -287,6 +342,8 @@ def mocked_functions( return [users] if group_id == "00000000-0000-0000-0000-0000000000001": return [users] + if group_id == "00000000-0000-0000-0000-0000000000002": + return [users[0:1]] if group_id == "99999999-9999-9999-9999-999999999999": return [nested_group_members] raise ValueError(f"Unexpected Azure AD group ID {group_id}") diff --git a/metadata-ingestion/tests/integration/ldap/ldap_mces_golden.json b/metadata-ingestion/tests/integration/ldap/ldap_mces_golden.json index 04d5642225..90b3f0119f 100644 --- a/metadata-ingestion/tests/integration/ldap/ldap_mces_golden.json +++ b/metadata-ingestion/tests/integration/ldap/ldap_mces_golden.json @@ -41,6 +41,11 @@ "lastName": "Simpson", "fullName": "Bart Simpson" } + }, + { + "com.linkedin.pegasus2avro.identity.GroupMembership": { + "groups": [] + } } ] } @@ -71,6 +76,11 @@ "lastName": "Simpson", "fullName": "Homer Simpson" } + }, + { + "com.linkedin.pegasus2avro.identity.GroupMembership": { + "groups": [] + } } ] } @@ -97,6 +107,11 @@ "lastName": "Simpson", "fullName": "Lisa Simpson" } + }, + { + "com.linkedin.pegasus2avro.identity.GroupMembership": { + "groups": [] + } } ] } @@ -123,6 +138,11 @@ "lastName": "Simpson", "fullName": "Maggie Simpson" } + }, + { + "com.linkedin.pegasus2avro.identity.GroupMembership": { + "groups": [] + } } ] } @@ -149,6 +169,11 @@ "lastName": "Bevan", "fullName": "Hester Bevan" } + }, + { + "com.linkedin.pegasus2avro.identity.GroupMembership": { + "groups": [] + } } ] } @@ -175,6 +200,11 @@ "lastName": "Haas", "fullName": "Evalyn Haas" } + }, + { + "com.linkedin.pegasus2avro.identity.GroupMembership": { + "groups": [] + } } ] } @@ -232,8 +262,9 @@ "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1615443388097, @@ -246,8 +277,9 @@ "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1615443388097, @@ -260,8 +292,9 @@ "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1615443388097, @@ -274,8 +307,9 @@ "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1615443388097, @@ -288,8 +322,9 @@ "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1615443388097, @@ -302,8 +337,9 @@ "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1615443388097, @@ -316,8 +352,9 @@ "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1615443388097, @@ -330,8 +367,9 @@ "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1615443388097, @@ -344,8 +382,9 @@ "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1615443388097, diff --git a/metadata-ingestion/tests/integration/ldap/ldap_mces_golden_deleted_stateful.json b/metadata-ingestion/tests/integration/ldap/ldap_mces_golden_deleted_stateful.json index 3c03500e8b..7462ea1458 100644 --- a/metadata-ingestion/tests/integration/ldap/ldap_mces_golden_deleted_stateful.json +++ b/metadata-ingestion/tests/integration/ldap/ldap_mces_golden_deleted_stateful.json @@ -15,6 +15,11 @@ "lastName": "Simpson", "fullName": "Bart Simpson" } + }, + { + "com.linkedin.pegasus2avro.identity.GroupMembership": { + "groups": [] + } } ] } @@ -30,8 +35,9 @@ "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1660460400000, @@ -44,8 +50,9 @@ "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": true}", - "contentType": "application/json" + "json": { + "removed": true + } }, "systemMetadata": { "lastObserved": 1660460400000, diff --git a/metadata-ingestion/tests/integration/ldap/ldap_mces_golden_stateful.json b/metadata-ingestion/tests/integration/ldap/ldap_mces_golden_stateful.json index eea0426516..d79bb56f1a 100644 --- a/metadata-ingestion/tests/integration/ldap/ldap_mces_golden_stateful.json +++ b/metadata-ingestion/tests/integration/ldap/ldap_mces_golden_stateful.json @@ -15,6 +15,11 @@ "lastName": "Simpson", "fullName": "Bart Simpson" } + }, + { + "com.linkedin.pegasus2avro.identity.GroupMembership": { + "groups": [] + } } ] } @@ -43,6 +48,11 @@ "lastName": "Simpson", "fullName": "Homer Simpson" } + }, + { + "com.linkedin.pegasus2avro.identity.GroupMembership": { + "groups": [] + } } ] } @@ -58,8 +68,9 @@ "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1660460400000, @@ -72,8 +83,9 @@ "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1660460400000, diff --git a/metadata-ingestion/tests/integration/okta/okta_mces_golden_custom_user_name_regex.json b/metadata-ingestion/tests/integration/okta/okta_mces_golden_custom_user_name_regex.json index 158a762439..784d79ff5b 100644 --- a/metadata-ingestion/tests/integration/okta/okta_mces_golden_custom_user_name_regex.json +++ b/metadata-ingestion/tests/integration/okta/okta_mces_golden_custom_user_name_regex.json @@ -1,6 +1,5 @@ [ { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.CorpGroupSnapshot": { "urn": "urn:li:corpGroup:All%20Employees", @@ -8,66 +7,52 @@ { "com.linkedin.pegasus2avro.identity.CorpGroupInfo": { "displayName": "All Employees", - "email": null, "admins": [], "members": [], "groups": [], - "description": "All Employees in the Test Company.", - "slack": null + "description": "All Employees in the Test Company." } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-okta-usage" } }, { - "auditHeader": null, "entityType": "corpGroup", "entityUrn": "urn:li:corpGroup:All%20Employees", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "origin", "aspect": { - "value": "{\"type\": \"EXTERNAL\", \"externalType\": \"OKTA\"}", - "contentType": "application/json" + "json": { + "type": "EXTERNAL", + "externalType": "OKTA" + } }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-okta-usage" } }, { - "auditHeader": null, "entityType": "corpGroup", "entityUrn": "urn:li:corpGroup:All%20Employees", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-okta-usage" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.CorpGroupSnapshot": { "urn": "urn:li:corpGroup:Engineering", @@ -75,66 +60,52 @@ { "com.linkedin.pegasus2avro.identity.CorpGroupInfo": { "displayName": "Engineering", - "email": null, "admins": [], "members": [], "groups": [], - "description": "Engineering team!", - "slack": null + "description": "Engineering team!" } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-okta-usage" } }, { - "auditHeader": null, "entityType": "corpGroup", "entityUrn": "urn:li:corpGroup:Engineering", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "origin", "aspect": { - "value": "{\"type\": \"EXTERNAL\", \"externalType\": \"OKTA\"}", - "contentType": "application/json" + "json": { + "type": "EXTERNAL", + "externalType": "OKTA" + } }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-okta-usage" } }, { - "auditHeader": null, "entityType": "corpGroup", "entityUrn": "urn:li:corpGroup:Engineering", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-okta-usage" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.CorpUserSnapshot": { "urn": "urn:li:corpuser:john.doe@test.com", @@ -144,14 +115,9 @@ "active": true, "displayName": "JDoe", "email": "john.doe@test.com", - "title": null, - "managerUrn": null, - "departmentId": null, - "departmentName": null, "firstName": "John", "lastName": "Doe", - "fullName": "John Doe", - "countryCode": null + "fullName": "John Doe" } }, { @@ -165,55 +131,43 @@ ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-okta-usage" } }, { - "auditHeader": null, "entityType": "corpuser", "entityUrn": "urn:li:corpuser:john.doe@test.com", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "origin", "aspect": { - "value": "{\"type\": \"EXTERNAL\", \"externalType\": \"OKTA\"}", - "contentType": "application/json" + "json": { + "type": "EXTERNAL", + "externalType": "OKTA" + } }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-okta-usage" } }, { - "auditHeader": null, "entityType": "corpuser", "entityUrn": "urn:li:corpuser:john.doe@test.com", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-okta-usage" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.CorpUserSnapshot": { "urn": "urn:li:corpuser:mary.jane@test.com", @@ -224,8 +178,6 @@ "displayName": "Mary Jane", "email": "mary.jane@test.com", "title": "Software Engineer", - "managerUrn": null, - "departmentId": null, "departmentName": "Engineering", "firstName": "Mary", "lastName": "Jane", @@ -246,51 +198,102 @@ ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-okta-usage" } }, { - "auditHeader": null, "entityType": "corpuser", "entityUrn": "urn:li:corpuser:mary.jane@test.com", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "origin", "aspect": { - "value": "{\"type\": \"EXTERNAL\", \"externalType\": \"OKTA\"}", - "contentType": "application/json" + "json": { + "type": "EXTERNAL", + "externalType": "OKTA" + } }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-okta-usage" } }, { - "auditHeader": null, "entityType": "corpuser", "entityUrn": "urn:li:corpuser:mary.jane@test.com", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-okta-usage" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.CorpUserSnapshot": { + "urn": "urn:li:corpuser:good.test@test.com", + "aspects": [ + { + "com.linkedin.pegasus2avro.identity.CorpUserInfo": { + "active": true, + "displayName": "Good Test", + "email": "good.test@test.com", + "title": "Manager", + "departmentName": "Marketing", + "firstName": "Good", + "lastName": "Test", + "fullName": "Good Test", + "countryCode": "eu" + } + }, + { + "com.linkedin.pegasus2avro.identity.GroupMembership": { + "groups": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "test-okta-usage" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:good.test@test.com", + "changeType": "UPSERT", + "aspectName": "origin", + "aspect": { + "json": { + "type": "EXTERNAL", + "externalType": "OKTA" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "test-okta-usage" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:good.test@test.com", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "test-okta-usage" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/okta/okta_mces_golden_default_config.json b/metadata-ingestion/tests/integration/okta/okta_mces_golden_default_config.json index 570be49b20..d871550689 100644 --- a/metadata-ingestion/tests/integration/okta/okta_mces_golden_default_config.json +++ b/metadata-ingestion/tests/integration/okta/okta_mces_golden_default_config.json @@ -1,6 +1,5 @@ [ { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.CorpGroupSnapshot": { "urn": "urn:li:corpGroup:All%20Employees", @@ -8,66 +7,52 @@ { "com.linkedin.pegasus2avro.identity.CorpGroupInfo": { "displayName": "All Employees", - "email": null, "admins": [], "members": [], "groups": [], - "description": "All Employees in the Test Company.", - "slack": null + "description": "All Employees in the Test Company." } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-okta-usage" } }, { - "auditHeader": null, "entityType": "corpGroup", "entityUrn": "urn:li:corpGroup:All%20Employees", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "origin", "aspect": { - "value": "{\"type\": \"EXTERNAL\", \"externalType\": \"OKTA\"}", - "contentType": "application/json" + "json": { + "type": "EXTERNAL", + "externalType": "OKTA" + } }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-okta-usage" } }, { - "auditHeader": null, "entityType": "corpGroup", "entityUrn": "urn:li:corpGroup:All%20Employees", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-okta-usage" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.CorpGroupSnapshot": { "urn": "urn:li:corpGroup:Engineering", @@ -75,66 +60,52 @@ { "com.linkedin.pegasus2avro.identity.CorpGroupInfo": { "displayName": "Engineering", - "email": null, "admins": [], "members": [], "groups": [], - "description": "Engineering team!", - "slack": null + "description": "Engineering team!" } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-okta-usage" } }, { - "auditHeader": null, "entityType": "corpGroup", "entityUrn": "urn:li:corpGroup:Engineering", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "origin", "aspect": { - "value": "{\"type\": \"EXTERNAL\", \"externalType\": \"OKTA\"}", - "contentType": "application/json" + "json": { + "type": "EXTERNAL", + "externalType": "OKTA" + } }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-okta-usage" } }, { - "auditHeader": null, "entityType": "corpGroup", "entityUrn": "urn:li:corpGroup:Engineering", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-okta-usage" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.CorpUserSnapshot": { "urn": "urn:li:corpuser:john.doe", @@ -144,14 +115,9 @@ "active": true, "displayName": "JDoe", "email": "john.doe@test.com", - "title": null, - "managerUrn": null, - "departmentId": null, - "departmentName": null, "firstName": "John", "lastName": "Doe", - "fullName": "John Doe", - "countryCode": null + "fullName": "John Doe" } }, { @@ -165,55 +131,43 @@ ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-okta-usage" } }, { - "auditHeader": null, "entityType": "corpuser", "entityUrn": "urn:li:corpuser:john.doe", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "origin", "aspect": { - "value": "{\"type\": \"EXTERNAL\", \"externalType\": \"OKTA\"}", - "contentType": "application/json" + "json": { + "type": "EXTERNAL", + "externalType": "OKTA" + } }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-okta-usage" } }, { - "auditHeader": null, "entityType": "corpuser", "entityUrn": "urn:li:corpuser:john.doe", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-okta-usage" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.CorpUserSnapshot": { "urn": "urn:li:corpuser:mary.jane", @@ -224,8 +178,6 @@ "displayName": "Mary Jane", "email": "mary.jane@test.com", "title": "Software Engineer", - "managerUrn": null, - "departmentId": null, "departmentName": "Engineering", "firstName": "Mary", "lastName": "Jane", @@ -246,51 +198,102 @@ ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-okta-usage" } }, { - "auditHeader": null, "entityType": "corpuser", "entityUrn": "urn:li:corpuser:mary.jane", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "origin", "aspect": { - "value": "{\"type\": \"EXTERNAL\", \"externalType\": \"OKTA\"}", - "contentType": "application/json" + "json": { + "type": "EXTERNAL", + "externalType": "OKTA" + } }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-okta-usage" } }, { - "auditHeader": null, "entityType": "corpuser", "entityUrn": "urn:li:corpuser:mary.jane", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-okta-usage" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.CorpUserSnapshot": { + "urn": "urn:li:corpuser:good.test", + "aspects": [ + { + "com.linkedin.pegasus2avro.identity.CorpUserInfo": { + "active": true, + "displayName": "Good Test", + "email": "good.test@test.com", + "title": "Manager", + "departmentName": "Marketing", + "firstName": "Good", + "lastName": "Test", + "fullName": "Good Test", + "countryCode": "eu" + } + }, + { + "com.linkedin.pegasus2avro.identity.GroupMembership": { + "groups": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "test-okta-usage" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:good.test", + "changeType": "UPSERT", + "aspectName": "origin", + "aspect": { + "json": { + "type": "EXTERNAL", + "externalType": "OKTA" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "test-okta-usage" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:good.test", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "test-okta-usage" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/okta/okta_mces_golden_include_deprovisioned_suspended_users.json b/metadata-ingestion/tests/integration/okta/okta_mces_golden_include_deprovisioned_suspended_users.json index 94ddd99f20..0107d8c2a1 100644 --- a/metadata-ingestion/tests/integration/okta/okta_mces_golden_include_deprovisioned_suspended_users.json +++ b/metadata-ingestion/tests/integration/okta/okta_mces_golden_include_deprovisioned_suspended_users.json @@ -1,6 +1,5 @@ [ { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.CorpGroupSnapshot": { "urn": "urn:li:corpGroup:All%20Employees", @@ -8,66 +7,52 @@ { "com.linkedin.pegasus2avro.identity.CorpGroupInfo": { "displayName": "All Employees", - "email": null, "admins": [], "members": [], "groups": [], - "description": "All Employees in the Test Company.", - "slack": null + "description": "All Employees in the Test Company." } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-okta-usage" } }, { - "auditHeader": null, "entityType": "corpGroup", "entityUrn": "urn:li:corpGroup:All%20Employees", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "origin", "aspect": { - "value": "{\"type\": \"EXTERNAL\", \"externalType\": \"OKTA\"}", - "contentType": "application/json" + "json": { + "type": "EXTERNAL", + "externalType": "OKTA" + } }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-okta-usage" } }, { - "auditHeader": null, "entityType": "corpGroup", "entityUrn": "urn:li:corpGroup:All%20Employees", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-okta-usage" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.CorpGroupSnapshot": { "urn": "urn:li:corpGroup:Engineering", @@ -75,66 +60,52 @@ { "com.linkedin.pegasus2avro.identity.CorpGroupInfo": { "displayName": "Engineering", - "email": null, "admins": [], "members": [], "groups": [], - "description": "Engineering team!", - "slack": null + "description": "Engineering team!" } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-okta-usage" } }, { - "auditHeader": null, "entityType": "corpGroup", "entityUrn": "urn:li:corpGroup:Engineering", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "origin", "aspect": { - "value": "{\"type\": \"EXTERNAL\", \"externalType\": \"OKTA\"}", - "contentType": "application/json" + "json": { + "type": "EXTERNAL", + "externalType": "OKTA" + } }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-okta-usage" } }, { - "auditHeader": null, "entityType": "corpGroup", "entityUrn": "urn:li:corpGroup:Engineering", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-okta-usage" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.CorpUserSnapshot": { "urn": "urn:li:corpuser:john.doe", @@ -144,14 +115,9 @@ "active": true, "displayName": "JDoe", "email": "john.doe@test.com", - "title": null, - "managerUrn": null, - "departmentId": null, - "departmentName": null, "firstName": "John", "lastName": "Doe", - "fullName": "John Doe", - "countryCode": null + "fullName": "John Doe" } }, { @@ -165,55 +131,43 @@ ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-okta-usage" } }, { - "auditHeader": null, "entityType": "corpuser", "entityUrn": "urn:li:corpuser:john.doe", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "origin", "aspect": { - "value": "{\"type\": \"EXTERNAL\", \"externalType\": \"OKTA\"}", - "contentType": "application/json" + "json": { + "type": "EXTERNAL", + "externalType": "OKTA" + } }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-okta-usage" } }, { - "auditHeader": null, "entityType": "corpuser", "entityUrn": "urn:li:corpuser:john.doe", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-okta-usage" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.CorpUserSnapshot": { "urn": "urn:li:corpuser:mary.jane", @@ -224,8 +178,6 @@ "displayName": "Mary Jane", "email": "mary.jane@test.com", "title": "Software Engineer", - "managerUrn": null, - "departmentId": null, "departmentName": "Engineering", "firstName": "Mary", "lastName": "Jane", @@ -246,55 +198,105 @@ ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-okta-usage" } }, { - "auditHeader": null, "entityType": "corpuser", "entityUrn": "urn:li:corpuser:mary.jane", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "origin", "aspect": { - "value": "{\"type\": \"EXTERNAL\", \"externalType\": \"OKTA\"}", - "contentType": "application/json" + "json": { + "type": "EXTERNAL", + "externalType": "OKTA" + } }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-okta-usage" } }, { - "auditHeader": null, "entityType": "corpuser", "entityUrn": "urn:li:corpuser:mary.jane", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-okta-usage" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.CorpUserSnapshot": { + "urn": "urn:li:corpuser:good.test", + "aspects": [ + { + "com.linkedin.pegasus2avro.identity.CorpUserInfo": { + "active": true, + "displayName": "Good Test", + "email": "good.test@test.com", + "title": "Manager", + "departmentName": "Marketing", + "firstName": "Good", + "lastName": "Test", + "fullName": "Good Test", + "countryCode": "eu" + } + }, + { + "com.linkedin.pegasus2avro.identity.GroupMembership": { + "groups": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "test-okta-usage" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:good.test", + "changeType": "UPSERT", + "aspectName": "origin", + "aspect": { + "json": { + "type": "EXTERNAL", + "externalType": "OKTA" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "test-okta-usage" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:good.test", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "test-okta-usage" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.CorpUserSnapshot": { "urn": "urn:li:corpuser:mary.jane", @@ -305,8 +307,6 @@ "displayName": "Mary Jane", "email": "mary.jane@test.com", "title": "Software Engineer II", - "managerUrn": null, - "departmentId": null, "departmentName": "Engineering", "firstName": "Mary", "lastName": "Jane", @@ -327,55 +327,43 @@ ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-okta-usage" } }, { - "auditHeader": null, "entityType": "corpuser", "entityUrn": "urn:li:corpuser:mary.jane", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "origin", "aspect": { - "value": "{\"type\": \"EXTERNAL\", \"externalType\": \"OKTA\"}", - "contentType": "application/json" + "json": { + "type": "EXTERNAL", + "externalType": "OKTA" + } }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-okta-usage" } }, { - "auditHeader": null, "entityType": "corpuser", "entityUrn": "urn:li:corpuser:mary.jane", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-okta-usage" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.CorpUserSnapshot": { "urn": "urn:li:corpuser:bad.boyjones", @@ -385,14 +373,9 @@ "active": true, "displayName": "Bad Boy Jones", "email": "bad.boyjones@test.com", - "title": null, - "managerUrn": null, - "departmentId": null, - "departmentName": null, "firstName": "Bad", "lastName": "Boy Jones", - "fullName": "Bad Boy Jones", - "countryCode": null + "fullName": "Bad Boy Jones" } }, { @@ -406,55 +389,43 @@ ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-okta-usage" } }, { - "auditHeader": null, "entityType": "corpuser", "entityUrn": "urn:li:corpuser:bad.boyjones", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "origin", "aspect": { - "value": "{\"type\": \"EXTERNAL\", \"externalType\": \"OKTA\"}", - "contentType": "application/json" + "json": { + "type": "EXTERNAL", + "externalType": "OKTA" + } }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-okta-usage" } }, { - "auditHeader": null, "entityType": "corpuser", "entityUrn": "urn:li:corpuser:bad.boyjones", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-okta-usage" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.CorpUserSnapshot": { "urn": "urn:li:corpuser:bad.girlriri", @@ -465,8 +436,6 @@ "displayName": "Bad Girl Riri", "email": "bad.girlriri@test.com", "title": "Manager", - "managerUrn": null, - "departmentId": null, "departmentName": "Marketing", "firstName": "Bad", "lastName": "Girl Riri", @@ -478,6 +447,8 @@ "com.linkedin.pegasus2avro.identity.GroupMembership": { "groups": [ "urn:li:corpGroup:All%20Employees", + "urn:li:corpGroup:All%20Employees", + "urn:li:corpGroup:Engineering", "urn:li:corpGroup:Engineering" ] } @@ -485,51 +456,40 @@ ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-okta-usage" } }, { - "auditHeader": null, "entityType": "corpuser", "entityUrn": "urn:li:corpuser:bad.girlriri", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "origin", "aspect": { - "value": "{\"type\": \"EXTERNAL\", \"externalType\": \"OKTA\"}", - "contentType": "application/json" + "json": { + "type": "EXTERNAL", + "externalType": "OKTA" + } }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-okta-usage" } }, { - "auditHeader": null, "entityType": "corpuser", "entityUrn": "urn:li:corpuser:bad.girlriri", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-okta-usage" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/okta/okta_users.json b/metadata-ingestion/tests/integration/okta/okta_users.json index d558b228c4..85fef8becf 100644 --- a/metadata-ingestion/tests/integration/okta/okta_users.json +++ b/metadata-ingestion/tests/integration/okta/okta_users.json @@ -37,6 +37,26 @@ "countryCode": "us" } }, + { + "id": "5", + "status": "ACTIVE", + "created": "2013-07-02T21:37:25.344Z", + "activated": null, + "statusChanged": null, + "lastLogin": null, + "lastUpdated": "2013-07-02T21:37:25.344Z", + "passwordChanged": null, + "profile": { + "firstName": "Good", + "lastName": "Test", + "email": "good.test@test.com", + "login": "good.test@test.com", + "mobilePhone": "666-415-1337", + "title": "Manager", + "department": "Marketing", + "countryCode": "eu" + } + }, { "id": "2", "status": "DEPROVISIONED", diff --git a/metadata-ingestion/tests/integration/okta/test_okta.py b/metadata-ingestion/tests/integration/okta/test_okta.py index c34a03abaf..2ef86c874e 100644 --- a/metadata-ingestion/tests/integration/okta/test_okta.py +++ b/metadata-ingestion/tests/integration/okta/test_okta.py @@ -12,6 +12,7 @@ from datahub.ingestion.source.identity.okta import OktaConfig from tests.test_helpers import mce_helpers FROZEN_TIME = "2020-04-14 07:00:00" +USER_ID_NOT_IN_GROUPS = "5" def test_okta_config(): @@ -244,8 +245,6 @@ def _init_mock_okta_client(test_resources_dir, MockClient): # Create groups from JSON dicts groups = list(map(lambda groupJson: Group(groupJson), reference_groups)) - # For simplicity, each user is placed in ALL groups. - # Mock Client List response. users_resp_mock = Mock() users_resp_mock.has_next.side_effect = [True, False] @@ -281,7 +280,7 @@ def _init_mock_okta_client(test_resources_dir, MockClient): # Create a separate response mock for each group in our sample data. list_group_users_result_values = [] - for group in groups: + for _ in groups: # Mock Get Group Membership group_users_resp_mock = Mock() group_users_resp_mock.has_next.side_effect = [True, False] @@ -293,7 +292,11 @@ def _init_mock_okta_client(test_resources_dir, MockClient): group_users_resp_mock.next.return_value = group_users_next_future # users, resp, err list_group_users_future = asyncio.Future() # type: asyncio.Future - list_group_users_future.set_result((users[0:-1], group_users_resp_mock, None)) + # Exclude last user from being in any groups + filtered_users = [user for user in users if user.id != USER_ID_NOT_IN_GROUPS] + list_group_users_future.set_result( + (filtered_users, group_users_resp_mock, None) + ) list_group_users_result_values.append(list_group_users_future) MockClient().list_group_users.side_effect = list_group_users_result_values