mirror of
https://github.com/datahub-project/datahub.git
synced 2025-06-27 05:03:31 +00:00
feat(okta): adds ingest_groups_users config parameter (#12371)
Co-authored-by: Harshal Sheth <hsheth2@gmail.com>
This commit is contained in:
parent
08a5183267
commit
b20211536c
@ -5,7 +5,7 @@ import urllib
|
|||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from time import sleep
|
from time import sleep
|
||||||
from typing import Dict, Iterable, List, Optional, Union
|
from typing import Dict, Iterable, List, Optional, Set, Union
|
||||||
|
|
||||||
import nest_asyncio
|
import nest_asyncio
|
||||||
from okta.client import Client as OktaClient
|
from okta.client import Client as OktaClient
|
||||||
@ -77,6 +77,10 @@ class OktaConfig(StatefulIngestionConfigBase, ConfigModel):
|
|||||||
default=True,
|
default=True,
|
||||||
description="Whether group membership should be ingested into DataHub. ingest_groups must be True if this is True.",
|
description="Whether group membership should be ingested into DataHub. ingest_groups must be True if this is True.",
|
||||||
)
|
)
|
||||||
|
ingest_groups_users: bool = Field(
|
||||||
|
default=True,
|
||||||
|
description="Only ingest users belonging to the selected groups. This option is only useful when `ingest_users` is set to False and `ingest_group_membership` to True.",
|
||||||
|
)
|
||||||
|
|
||||||
# Optional: Customize the mapping to DataHub Username from an attribute appearing in the Okta User
|
# Optional: Customize the mapping to DataHub Username from an attribute appearing in the Okta User
|
||||||
# profile. Reference: https://developer.okta.com/docs/reference/api/users/
|
# profile. Reference: https://developer.okta.com/docs/reference/api/users/
|
||||||
@ -344,6 +348,7 @@ class OktaSource(StatefulIngestionSourceBase):
|
|||||||
aspect=StatusClass(removed=False),
|
aspect=StatusClass(removed=False),
|
||||||
).as_workunit()
|
).as_workunit()
|
||||||
|
|
||||||
|
okta_users: Set[User] = set()
|
||||||
# Step 2: Populate GroupMembership Aspects for CorpUsers
|
# Step 2: Populate GroupMembership Aspects for CorpUsers
|
||||||
datahub_corp_user_urn_to_group_membership: Dict[str, GroupMembershipClass] = (
|
datahub_corp_user_urn_to_group_membership: Dict[str, GroupMembershipClass] = (
|
||||||
defaultdict(lambda: GroupMembershipClass(groups=[]))
|
defaultdict(lambda: GroupMembershipClass(groups=[]))
|
||||||
@ -372,6 +377,9 @@ class OktaSource(StatefulIngestionSourceBase):
|
|||||||
self.report.report_failure("okta_user_mapping", error_str)
|
self.report.report_failure("okta_user_mapping", error_str)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
if self.config.ingest_groups_users:
|
||||||
|
okta_users.add(okta_user)
|
||||||
|
|
||||||
# Update the GroupMembership aspect for this group member.
|
# Update the GroupMembership aspect for this group member.
|
||||||
datahub_corp_user_urn_to_group_membership[
|
datahub_corp_user_urn_to_group_membership[
|
||||||
datahub_corp_user_urn
|
datahub_corp_user_urn
|
||||||
@ -379,7 +387,10 @@ class OktaSource(StatefulIngestionSourceBase):
|
|||||||
|
|
||||||
# Step 3: Produce MetadataWorkUnits for CorpUsers.
|
# Step 3: Produce MetadataWorkUnits for CorpUsers.
|
||||||
if self.config.ingest_users:
|
if self.config.ingest_users:
|
||||||
okta_users = self._get_okta_users(event_loop)
|
# we can just throw away collected okta users so far and fetch them all
|
||||||
|
okta_users = set(self._get_okta_users(event_loop))
|
||||||
|
|
||||||
|
if okta_users:
|
||||||
filtered_okta_users = filter(self._filter_okta_user, okta_users)
|
filtered_okta_users = filter(self._filter_okta_user, okta_users)
|
||||||
datahub_corp_user_snapshots = self._map_okta_users(filtered_okta_users)
|
datahub_corp_user_snapshots = self._map_okta_users(filtered_okta_users)
|
||||||
for user_count, datahub_corp_user_snapshot in enumerate(
|
for user_count, datahub_corp_user_snapshot in enumerate(
|
||||||
|
@ -0,0 +1,251 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"proposedSnapshot": {
|
||||||
|
"com.linkedin.pegasus2avro.metadata.snapshot.CorpGroupSnapshot": {
|
||||||
|
"urn": "urn:li:corpGroup:All%20Employees",
|
||||||
|
"aspects": [
|
||||||
|
{
|
||||||
|
"com.linkedin.pegasus2avro.identity.CorpGroupInfo": {
|
||||||
|
"displayName": "All Employees",
|
||||||
|
"admins": [],
|
||||||
|
"members": [],
|
||||||
|
"groups": [],
|
||||||
|
"description": "All Employees in the Test Company."
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1586847600000,
|
||||||
|
"runId": "test-okta-usage",
|
||||||
|
"lastRunId": "no-run-id-provided"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "corpGroup",
|
||||||
|
"entityUrn": "urn:li:corpGroup:All%20Employees",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "origin",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"type": "EXTERNAL",
|
||||||
|
"externalType": "OKTA"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1586847600000,
|
||||||
|
"runId": "test-okta-usage",
|
||||||
|
"lastRunId": "no-run-id-provided"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "corpGroup",
|
||||||
|
"entityUrn": "urn:li:corpGroup:All%20Employees",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "status",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"removed": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1586847600000,
|
||||||
|
"runId": "test-okta-usage",
|
||||||
|
"lastRunId": "no-run-id-provided"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"proposedSnapshot": {
|
||||||
|
"com.linkedin.pegasus2avro.metadata.snapshot.CorpGroupSnapshot": {
|
||||||
|
"urn": "urn:li:corpGroup:Engineering",
|
||||||
|
"aspects": [
|
||||||
|
{
|
||||||
|
"com.linkedin.pegasus2avro.identity.CorpGroupInfo": {
|
||||||
|
"displayName": "Engineering",
|
||||||
|
"admins": [],
|
||||||
|
"members": [],
|
||||||
|
"groups": [],
|
||||||
|
"description": "Engineering team!"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1586847600000,
|
||||||
|
"runId": "test-okta-usage",
|
||||||
|
"lastRunId": "no-run-id-provided"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "corpGroup",
|
||||||
|
"entityUrn": "urn:li:corpGroup:Engineering",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "origin",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"type": "EXTERNAL",
|
||||||
|
"externalType": "OKTA"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1586847600000,
|
||||||
|
"runId": "test-okta-usage",
|
||||||
|
"lastRunId": "no-run-id-provided"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "corpGroup",
|
||||||
|
"entityUrn": "urn:li:corpGroup:Engineering",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "status",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"removed": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1586847600000,
|
||||||
|
"runId": "test-okta-usage",
|
||||||
|
"lastRunId": "no-run-id-provided"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"proposedSnapshot": {
|
||||||
|
"com.linkedin.pegasus2avro.metadata.snapshot.CorpUserSnapshot": {
|
||||||
|
"urn": "urn:li:corpuser:mary.jane",
|
||||||
|
"aspects": [
|
||||||
|
{
|
||||||
|
"com.linkedin.pegasus2avro.identity.CorpUserInfo": {
|
||||||
|
"customProperties": {},
|
||||||
|
"active": true,
|
||||||
|
"displayName": "Mary Jane",
|
||||||
|
"email": "mary.jane@test.com",
|
||||||
|
"title": "Software Engineer",
|
||||||
|
"departmentName": "Engineering",
|
||||||
|
"firstName": "Mary",
|
||||||
|
"lastName": "Jane",
|
||||||
|
"fullName": "Mary Jane",
|
||||||
|
"countryCode": "us"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"com.linkedin.pegasus2avro.identity.GroupMembership": {
|
||||||
|
"groups": [
|
||||||
|
"urn:li:corpGroup:All%20Employees",
|
||||||
|
"urn:li:corpGroup:All%20Employees",
|
||||||
|
"urn:li:corpGroup:Engineering",
|
||||||
|
"urn:li:corpGroup:Engineering"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1586847600000,
|
||||||
|
"runId": "test-okta-usage",
|
||||||
|
"lastRunId": "no-run-id-provided"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "corpuser",
|
||||||
|
"entityUrn": "urn:li:corpuser:mary.jane",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "origin",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"type": "EXTERNAL",
|
||||||
|
"externalType": "OKTA"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1586847600000,
|
||||||
|
"runId": "test-okta-usage",
|
||||||
|
"lastRunId": "no-run-id-provided"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "corpuser",
|
||||||
|
"entityUrn": "urn:li:corpuser:mary.jane",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "status",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"removed": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1586847600000,
|
||||||
|
"runId": "test-okta-usage",
|
||||||
|
"lastRunId": "no-run-id-provided"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"proposedSnapshot": {
|
||||||
|
"com.linkedin.pegasus2avro.metadata.snapshot.CorpUserSnapshot": {
|
||||||
|
"urn": "urn:li:corpuser:john.doe",
|
||||||
|
"aspects": [
|
||||||
|
{
|
||||||
|
"com.linkedin.pegasus2avro.identity.CorpUserInfo": {
|
||||||
|
"customProperties": {},
|
||||||
|
"active": true,
|
||||||
|
"displayName": "JDoe",
|
||||||
|
"email": "john.doe@test.com",
|
||||||
|
"firstName": "John",
|
||||||
|
"lastName": "Doe",
|
||||||
|
"fullName": "John Doe"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"com.linkedin.pegasus2avro.identity.GroupMembership": {
|
||||||
|
"groups": [
|
||||||
|
"urn:li:corpGroup:All%20Employees",
|
||||||
|
"urn:li:corpGroup:Engineering"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1586847600000,
|
||||||
|
"runId": "test-okta-usage",
|
||||||
|
"lastRunId": "no-run-id-provided"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "corpuser",
|
||||||
|
"entityUrn": "urn:li:corpuser:john.doe",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "origin",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"type": "EXTERNAL",
|
||||||
|
"externalType": "OKTA"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1586847600000,
|
||||||
|
"runId": "test-okta-usage",
|
||||||
|
"lastRunId": "no-run-id-provided"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "corpuser",
|
||||||
|
"entityUrn": "urn:li:corpuser:john.doe",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "status",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"removed": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1586847600000,
|
||||||
|
"runId": "test-okta-usage",
|
||||||
|
"lastRunId": "no-run-id-provided"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
@ -120,6 +120,32 @@ def test_okta_source_default_configs(pytestconfig, mock_datahub_graph, tmp_path)
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@freeze_time(FROZEN_TIME)
|
||||||
|
def test_okta_source_ingest_groups_users(pytestconfig, mock_datahub_graph, tmp_path):
|
||||||
|
test_resources_dir: pathlib.Path = pytestconfig.rootpath / "tests/integration/okta"
|
||||||
|
|
||||||
|
output_file_path = f"{tmp_path}/okta_mces_ingest_groups_users.json"
|
||||||
|
|
||||||
|
new_recipe = default_recipe(output_file_path)
|
||||||
|
new_recipe["source"]["config"]["ingest_users"] = False
|
||||||
|
new_recipe["source"]["config"]["ingest_groups"] = True
|
||||||
|
new_recipe["source"]["config"]["ingest_groups_users"] = True
|
||||||
|
|
||||||
|
run_ingest(
|
||||||
|
mock_datahub_graph=mock_datahub_graph,
|
||||||
|
mocked_functions_reference=partial(
|
||||||
|
_init_mock_okta_client, test_resources_dir=test_resources_dir
|
||||||
|
),
|
||||||
|
recipe=new_recipe,
|
||||||
|
)
|
||||||
|
|
||||||
|
mce_helpers.check_golden_file(
|
||||||
|
pytestconfig,
|
||||||
|
output_path=output_file_path,
|
||||||
|
golden_path=f"{test_resources_dir}/okta_mces_golden_ingest_groups_users.json",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@freeze_time(FROZEN_TIME)
|
@freeze_time(FROZEN_TIME)
|
||||||
def test_okta_source_ingestion_disabled(pytestconfig, mock_datahub_graph, tmp_path):
|
def test_okta_source_ingestion_disabled(pytestconfig, mock_datahub_graph, tmp_path):
|
||||||
test_resources_dir: pathlib.Path = pytestconfig.rootpath / "tests/integration/okta"
|
test_resources_dir: pathlib.Path = pytestconfig.rootpath / "tests/integration/okta"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user