mirror of
https://github.com/datahub-project/datahub.git
synced 2025-07-30 04:45:28 +00:00
feat(okta) - add support for filtering/searching when ingesting Okta groups and users (#4586)
This commit is contained in:
parent
32349bf405
commit
aeafa7e63f
@ -38,7 +38,7 @@ and mapped to the DataHub `CorpUserInfo` aspect:
|
||||
- email
|
||||
- title
|
||||
- department
|
||||
- country code
|
||||
- country code
|
||||
|
||||
### Extracting DataHub Groups
|
||||
|
||||
@ -69,6 +69,12 @@ this should not matter.
|
||||
|
||||
This is a known limitation in our data model that is being tracked by [this ticket](https://github.com/datahub-project/datahub/issues/3065).
|
||||
|
||||
### Filtering and Searching
|
||||
You can also choose to ingest a subset of users or groups to Datahub by adding flags for filtering or searching. For
|
||||
users, set either the `okta_users_filter` or `okta_users_search` flag (only one can be set at a time). For groups, set
|
||||
either the `okta_groups_filter` or `okta_groups_search` flag. Note that these are not regular expressions. See [below](#config-details) for full configuration
|
||||
options.
|
||||
|
||||
|
||||
## Quickstart recipe
|
||||
|
||||
@ -98,21 +104,25 @@ For general pointers on writing and running a recipe, see our [main recipe guide
|
||||
|
||||
Note that a `.` is used to denote nested fields in the YAML configuration block.
|
||||
|
||||
| Field | Type | Required | Default | Description |
|
||||
|------------------------------------|--------|----------|-------------|-----------------------------------------------------------------------------------------------------------------|
|
||||
| `okta_domain` | string | ✅ | | The location of your Okta Domain, without a protocol. Can be found in Okta Developer console. |
|
||||
| `okta_api_token` | string | ✅ | | An API token generated for the DataHub application inside your Okta Developer Console. |
|
||||
| `ingest_users` | bool | | `True` | Whether users should be ingested into DataHub. |
|
||||
| `ingest_groups` | bool | | `True` | Whether groups should be ingested into DataHub. |
|
||||
| `ingest_group_membership` | bool | | `True` | Whether group membership should be ingested into DataHub. ingest_groups must be True if this is True. |
|
||||
| `okta_profile_to_username_attr` | string | | `"login"` | Which Okta User Profile attribute to use as input to DataHub username mapping. |
|
||||
| `okta_profile_to_username_regex` | string | | `"([^@]+)"` | A regex used to parse the DataHub username from the attribute specified in `okta_profile_to_username_attr`. |
|
||||
| `okta_profile_to_group_name_attr` | string | | `"name"` | Which Okta Group Profile attribute to use as input to DataHub group name mapping. |
|
||||
| `okta_profile_to_group_name_regex` | string | | `"(.*)"` | A regex used to parse the DataHub group name from the attribute specified in `okta_profile_to_group_name_attr`. |
|
||||
| `include_deprovisioned_users` | bool | | `False` | Whether to ingest users in the DEPROVISIONED state from Okta. |
|
||||
| `include_suspended_users` | bool | | `False` | Whether to ingest users in the SUSPENDED state from Okta. |
|
||||
| `page_size` | number | | `100` | The number of entities requested from Okta's REST APIs in one request. |
|
||||
| `delay_seconds` | number | | `0.01` | Number of seconds to wait between calls to Okta's REST APIs. (Okta rate limits). Defaults to 10ms. |
|
||||
| Field | Type | Required | Default | Description |
|
||||
|------------------------------------|--------|----------|-------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| `okta_domain` | string | ✅ | | The location of your Okta Domain, without a protocol. Can be found in Okta Developer console. |
|
||||
| `okta_api_token` | string | ✅ | | An API token generated for the DataHub application inside your Okta Developer Console. |
|
||||
| `ingest_users` | bool | | `True` | Whether users should be ingested into DataHub. |
|
||||
| `ingest_groups` | bool | | `True` | Whether groups should be ingested into DataHub. |
|
||||
| `ingest_group_membership` | bool | | `True` | Whether group membership should be ingested into DataHub. ingest_groups must be True if this is True. |
|
||||
| `okta_profile_to_username_attr` | string | | `"login"` | Which Okta User Profile attribute to use as input to DataHub username mapping. |
|
||||
| `okta_profile_to_username_regex` | string | | `"([^@]+)"` | A regex used to parse the DataHub username from the attribute specified in `okta_profile_to_username_attr`. |
|
||||
| `okta_profile_to_group_name_attr` | string | | `"name"` | Which Okta Group Profile attribute to use as input to DataHub group name mapping. |
|
||||
| `okta_profile_to_group_name_regex` | string | | `"(.*)"` | A regex used to parse the DataHub group name from the attribute specified in `okta_profile_to_group_name_attr`. |
|
||||
| `include_deprovisioned_users` | bool | | `False` | Whether to ingest users in the DEPROVISIONED state from Okta. |
|
||||
| `include_suspended_users` | bool | | `False` | Whether to ingest users in the SUSPENDED state from Okta. |
|
||||
| `page_size` | number | | `100` | The number of entities requested from Okta's REST APIs in one request. |
|
||||
| `delay_seconds` | number | | `0.01` | Number of seconds to wait between calls to Okta's REST APIs. (Okta rate limits). Defaults to 10ms. |
|
||||
| `okta_users_filter` | string | | `None` | Okta filter expression (not regex) for ingesting users. Only one of `okta_users_filter` and `okta_users_search` can be set. See the [Okta API docs](https://developer.okta.com/docs/reference/api/users/#list-users-with-a-filter) for more info. |
|
||||
| `okta_users_search` | string | | `None` | Okta search expression (not regex) for ingesting users. Only one of `okta_users_filter` and `okta_users_search` can be set. See the [Okta API docs](https://developer.okta.com/docs/reference/api/users/#list-users-with-search) for more info. |
|
||||
| `okta_groups_filter` | string | | `None` | Okta filter expression (not regex) for ingesting groups. Only one of `okta_groups_filter` and `okta_groups_search` can be set. See the [Okta API docs](https://developer.okta.com/docs/reference/api/groups/#filters) for more info. |
|
||||
| `okta_users_search` | string | | `None` | Okta search expression (not regex) for ingesting groups. Only one of `okta_groups_filter` and `okta_groups_search` can be set. See the [Okta API docs](https://developer.okta.com/docs/reference/api/groups/#list-groups-with-search) for more info. |
|
||||
|
||||
## Compatibility
|
||||
|
||||
|
@ -4,13 +4,15 @@ import re
|
||||
import urllib
|
||||
from dataclasses import dataclass, field
|
||||
from time import sleep
|
||||
from typing import Dict, Iterable, List, Union
|
||||
from typing import Dict, Iterable, List, Optional, Union
|
||||
|
||||
from okta.client import Client as OktaClient
|
||||
from okta.exceptions import OktaAPIException
|
||||
from okta.models import Group, GroupProfile, User, UserProfile, UserStatus
|
||||
from pydantic import validator
|
||||
|
||||
from datahub.configuration import ConfigModel
|
||||
from datahub.configuration.common import ConfigurationError
|
||||
from datahub.ingestion.api.common import PipelineContext
|
||||
from datahub.ingestion.api.source import Source, SourceReport
|
||||
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
||||
@ -60,6 +62,30 @@ class OktaConfig(ConfigModel):
|
||||
# Optional: Set the delay for fetching batches of entities from Okta. Okta has rate limiting in place.
|
||||
delay_seconds = 0.01
|
||||
|
||||
# Optional: Filter and search expression for ingesting a subset of users. Only one can be specified at a time.
|
||||
okta_users_filter: Optional[str] = None
|
||||
okta_users_search: Optional[str] = None
|
||||
|
||||
# Optional: Filter and search expression for ingesting a subset of groups. Only one can be specified at a time.
|
||||
okta_groups_filter: Optional[str] = None
|
||||
okta_groups_search: Optional[str] = None
|
||||
|
||||
@validator("okta_users_search")
|
||||
def okta_users_one_of_filter_or_search(cls, v, values):
|
||||
if v and values["okta_users_filter"]:
|
||||
raise ConfigurationError(
|
||||
"Only one of okta_users_filter or okta_users_search can be set"
|
||||
)
|
||||
return v
|
||||
|
||||
@validator("okta_groups_search")
|
||||
def okta_groups_one_of_filter_or_search(cls, v, values):
|
||||
if v and values["okta_groups_filter"]:
|
||||
raise ConfigurationError(
|
||||
"Only one of okta_groups_filter or okta_groups_search can be set"
|
||||
)
|
||||
return v
|
||||
|
||||
|
||||
@dataclass
|
||||
class OktaSourceReport(SourceReport):
|
||||
@ -201,7 +227,12 @@ class OktaSource(Source):
|
||||
logger.debug("Extracting all Okta groups")
|
||||
|
||||
# Note that this is not taking full advantage of Python AsyncIO, as we are blocking on calls.
|
||||
query_parameters = {"limit": self.config.page_size}
|
||||
query_parameters: Dict[str, Union[str, int]] = {"limit": self.config.page_size}
|
||||
if self.config.okta_groups_filter:
|
||||
query_parameters.update({"filter": self.config.okta_groups_filter})
|
||||
if self.config.okta_groups_search:
|
||||
query_parameters.update({"search": self.config.okta_groups_search})
|
||||
groups = resp = err = None
|
||||
try:
|
||||
groups, resp, err = event_loop.run_until_complete(
|
||||
self.okta_client.list_groups(query_parameters)
|
||||
@ -238,6 +269,7 @@ class OktaSource(Source):
|
||||
|
||||
# Note that this is not taking full advantage of Python AsyncIO; we are blocking on calls.
|
||||
query_parameters = {"limit": self.config.page_size}
|
||||
users = resp = err = None
|
||||
try:
|
||||
users, resp, err = event_loop.run_until_complete(
|
||||
self.okta_client.list_group_users(group.id, query_parameters)
|
||||
@ -272,7 +304,12 @@ class OktaSource(Source):
|
||||
def _get_okta_users(self, event_loop: asyncio.AbstractEventLoop) -> Iterable[User]:
|
||||
logger.debug("Extracting all Okta users")
|
||||
|
||||
query_parameters = {"limit": self.config.page_size}
|
||||
query_parameters: Dict[str, Union[str, int]] = {"limit": self.config.page_size}
|
||||
if self.config.okta_users_filter:
|
||||
query_parameters.update({"filter": self.config.okta_users_filter})
|
||||
if self.config.okta_users_search:
|
||||
query_parameters.update({"search": self.config.okta_users_search})
|
||||
users = resp = err = None
|
||||
try:
|
||||
users, resp, err = event_loop.run_until_complete(
|
||||
self.okta_client.list_users(query_parameters)
|
||||
|
Loading…
x
Reference in New Issue
Block a user