feat(ingest) - update identity sources to add flags for masking sensitive work units (#4711)

This commit is contained in:
Aditya Radhakrishnan 2022-04-20 14:21:08 -07:00 committed by GitHub
parent c91d70f1ba
commit 15e90f6dd0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 75 additions and 35 deletions

View File

@ -113,14 +113,14 @@ For general pointers on writing and running a recipe, see our [main recipe guide
Note that a `.` is used to denote nested fields in the YAML configuration block.
| Field | Type | Required | Default | Description |
|------------------------------------|--------|----------|-------------|-----------------------------------------------------------------------------------------------------------------|
|----------------------------------------|-----------------|----------|-----------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `client_id` | string | ✅ | | Application ID. Found in your app registration on Azure AD Portal |
| `tenant_id` | string | ✅ | | Directory ID. Found in your app registration on Azure AD Portal |
| `client_secret` | string | ✅ | | Client secret. Found in your app registration on Azure AD Portal |
| `redirect` | string | ✅ | | Redirect URI. Found in your app registration on Azure AD Portal |
| `authority` | string | ✅ | | The [authority](https://docs.microsoft.com/en-us/azure/active-directory/develop/msal-client-application-configuration) is a URL that indicates a directory that MSAL can request tokens from. |
| `token_url` | string | ✅ | | The token URL that acquires a token from Azure AD for authorizing requests. This source will only work with v1.0 endpoint. |
| `graph_url` | string | ✅ | | [Microsoft Graph API endpoint](https://docs.microsoft.com/en-us/graph/use-the-api)
| `graph_url` | string | ✅ | | [Microsoft Graph API endpoint](https://docs.microsoft.com/en-us/graph/use-the-api) |
| `ingest_users` | bool | | `True` | Whether users should be ingested into DataHub. |
| `ingest_groups` | bool | | `True` | Whether groups should be ingested into DataHub. |
| `ingest_group_membership` | bool | | `True` | Whether group membership should be ingested into DataHub. ingest_groups must be True if this is True. |
@ -133,6 +133,8 @@ Note that a `.` is used to denote nested fields in the YAML configuration block.
| `groups_pattern.allow` | list of strings | | | List of regex patterns for groups to include in ingestion. The name against which compare the regexp is the DataHub group name, i.e. the one resulting from the action of `azure_ad_response_to_groupname_attr` and `azure_ad_response_to_groupname_regex` |
| `groups_pattern.deny` | list of strings | | | As above, but for exculing groups from ingestion. |
| `ingest_groups_users` | bool | | `True` | This option is useful only when `ingest_users` is set to False and `ingest_group_membership` to True. As effect, only the users which belongs to the selected groups will be ingested. |
| `mask_group_id` | bool | | `True` | Whether workunit ID's for groups should be masked. |
| `mask_user_id` | bool | | `True` | Whether workunit ID's for users should be masked. |
## Questions

View File

@ -123,6 +123,8 @@ Note that a `.` is used to denote nested fields in the YAML configuration block.
| `okta_users_search` | string | | `None` | Okta search expression (not regex) for ingesting users. Only one of `okta_users_filter` and `okta_users_search` can be set. See the [Okta API docs](https://developer.okta.com/docs/reference/api/users/#list-users-with-search) for more info. |
| `okta_groups_filter` | string | | `None` | Okta filter expression (not regex) for ingesting groups. Only one of `okta_groups_filter` and `okta_groups_search` can be set. See the [Okta API docs](https://developer.okta.com/docs/reference/api/groups/#filters) for more info. |
| `okta_users_search` | string | | `None` | Okta search expression (not regex) for ingesting groups. Only one of `okta_groups_filter` and `okta_groups_search` can be set. See the [Okta API docs](https://developer.okta.com/docs/reference/api/groups/#list-groups-with-search) for more info. |
| `mask_group_id` | bool | | `True` | Whether workunit ID's for groups should be masked. |
| `mask_user_id` | bool | | `True` | Whether workunit ID's for users should be masked. |
## Compatibility

View File

@ -63,6 +63,10 @@ class AzureADConfig(ConfigModel):
# If enabled, report will contain names of filtered users and groups.
filtered_tracking: bool = True
# Optional: Whether to mask sensitive information from workunit ID's. On by default.
mask_group_id: bool = True
mask_user_id: bool = True
@dataclass
class AzureADSourceReport(SourceReport):
@ -134,11 +138,18 @@ class AzureADSource(Source):
datahub_corp_group_snapshots = self._map_azure_ad_groups(
azure_ad_groups
)
for datahub_corp_group_snapshot in datahub_corp_group_snapshots:
for group_count, datahub_corp_group_snapshot in enumerate(
datahub_corp_group_snapshots
):
mce = MetadataChangeEvent(
proposedSnapshot=datahub_corp_group_snapshot
)
wu = MetadataWorkUnit(id=datahub_corp_group_snapshot.urn, mce=mce)
wu_id = (
f"group-{group_count + 1}"
if self.config.mask_group_id
else datahub_corp_group_snapshot.urn
)
wu = MetadataWorkUnit(id=wu_id, mce=mce)
self.report.report_workunit(wu)
yield wu
@ -241,7 +252,9 @@ class AzureADSource(Source):
datahub_corp_user_snapshots: Generator[CorpUserSnapshot, Any, None],
datahub_corp_user_urn_to_group_membership: dict,
) -> Generator[MetadataWorkUnit, Any, None]:
for datahub_corp_user_snapshot in datahub_corp_user_snapshots:
for user_count, datahub_corp_user_snapshot in enumerate(
datahub_corp_user_snapshots
):
# Add GroupMembership if applicable
if (
datahub_corp_user_snapshot.urn
@ -255,7 +268,12 @@ class AzureADSource(Source):
assert datahub_group_membership
datahub_corp_user_snapshot.aspects.append(datahub_group_membership)
mce = MetadataChangeEvent(proposedSnapshot=datahub_corp_user_snapshot)
wu = MetadataWorkUnit(id=datahub_corp_user_snapshot.urn, mce=mce)
wu_id = (
f"user-{user_count + 1}"
if self.config.mask_user_id
else datahub_corp_user_snapshot.urn
)
wu = MetadataWorkUnit(id=wu_id, mce=mce)
self.report.report_workunit(wu)
yield wu

View File

@ -70,6 +70,10 @@ class OktaConfig(ConfigModel):
okta_groups_filter: Optional[str] = None
okta_groups_search: Optional[str] = None
# Optional: Whether to mask sensitive information from workunit ID's. On by default.
mask_group_id: bool = True
mask_user_id: bool = True
@validator("okta_users_search")
def okta_users_one_of_filter_or_search(cls, v, values):
if v and values["okta_users_filter"]:
@ -130,9 +134,16 @@ class OktaSource(Source):
if self.config.ingest_groups:
okta_groups = list(self._get_okta_groups(event_loop))
datahub_corp_group_snapshots = self._map_okta_groups(okta_groups)
for datahub_corp_group_snapshot in datahub_corp_group_snapshots:
for group_count, datahub_corp_group_snapshot in enumerate(
datahub_corp_group_snapshots
):
mce = MetadataChangeEvent(proposedSnapshot=datahub_corp_group_snapshot)
wu = MetadataWorkUnit(id=datahub_corp_group_snapshot.urn, mce=mce)
wu_id = (
f"group-{group_count + 1}"
if self.config.mask_group_id
else datahub_corp_group_snapshot.urn
)
wu = MetadataWorkUnit(id=wu_id, mce=mce)
self.report.report_workunit(wu)
yield wu
@ -183,7 +194,9 @@ class OktaSource(Source):
okta_users = self._get_okta_users(event_loop)
filtered_okta_users = filter(self._filter_okta_user, okta_users)
datahub_corp_user_snapshots = self._map_okta_users(filtered_okta_users)
for datahub_corp_user_snapshot in datahub_corp_user_snapshots:
for user_count, datahub_corp_user_snapshot in enumerate(
datahub_corp_user_snapshots
):
# Add GroupMembership aspect populated in Step 2 if applicable.
if (
@ -198,7 +211,12 @@ class OktaSource(Source):
assert datahub_group_membership is not None
datahub_corp_user_snapshot.aspects.append(datahub_group_membership)
mce = MetadataChangeEvent(proposedSnapshot=datahub_corp_user_snapshot)
wu = MetadataWorkUnit(id=datahub_corp_user_snapshot.urn, mce=mce)
wu_id = (
f"user-{user_count + 1}"
if self.config.mask_user_id
else datahub_corp_user_snapshot.urn
)
wu = MetadataWorkUnit(id=wu_id, mce=mce)
self.report.report_workunit(wu)
yield wu