mirror of
https://github.com/datahub-project/datahub.git
synced 2025-07-26 19:10:15 +00:00
feat(ingestion/okta): okta stateful ingestion (#7736)
Co-authored-by: MohdSiddiqueBagwan <mohdsiddique.bagwan@gslab.com> Co-authored-by: Harshal Sheth <hsheth2@gmail.com> Co-authored-by: John Joyce <john@acryl.io>
This commit is contained in:
parent
5fd7981532
commit
5e145cbb2d
@ -17,11 +17,12 @@ from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|||||||
from datahub.ingestion.api.common import PipelineContext
|
from datahub.ingestion.api.common import PipelineContext
|
||||||
from datahub.ingestion.api.decorators import ( # SourceCapability,; capability,
|
from datahub.ingestion.api.decorators import ( # SourceCapability,; capability,
|
||||||
SupportStatus,
|
SupportStatus,
|
||||||
|
capability,
|
||||||
config_class,
|
config_class,
|
||||||
platform_name,
|
platform_name,
|
||||||
support_status,
|
support_status,
|
||||||
)
|
)
|
||||||
from datahub.ingestion.api.source import SourceReport
|
from datahub.ingestion.api.source import SourceCapability, SourceReport
|
||||||
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
||||||
from datahub.ingestion.source.state.sql_common_state import (
|
from datahub.ingestion.source.state.sql_common_state import (
|
||||||
BaseSQLAlchemyCheckpointState,
|
BaseSQLAlchemyCheckpointState,
|
||||||
@ -152,7 +153,7 @@ class AzureADConfig(StatefulIngestionConfigBase, DatasetSourceConfigMixin):
|
|||||||
|
|
||||||
# Configuration for stateful ingestion
|
# Configuration for stateful ingestion
|
||||||
stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = Field(
|
stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = Field(
|
||||||
default=None, description="PowerBI Stateful Ingestion Config."
|
default=None, description="Azure AD Stateful Ingestion Config."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -174,6 +175,9 @@ class AzureADSourceReport(StaleEntityRemovalSourceReport):
|
|||||||
@platform_name("Azure AD")
|
@platform_name("Azure AD")
|
||||||
@config_class(AzureADConfig)
|
@config_class(AzureADConfig)
|
||||||
@support_status(SupportStatus.CERTIFIED)
|
@support_status(SupportStatus.CERTIFIED)
|
||||||
|
@capability(
|
||||||
|
SourceCapability.DELETION_DETECTION, "Optionally enabled via stateful_ingestion"
|
||||||
|
)
|
||||||
class AzureADSource(StatefulIngestionSourceBase):
|
class AzureADSource(StatefulIngestionSourceBase):
|
||||||
"""
|
"""
|
||||||
This plugin extracts the following:
|
This plugin extracts the following:
|
||||||
|
@ -13,7 +13,6 @@ from okta.models import Group, GroupProfile, User, UserProfile, UserStatus
|
|||||||
from pydantic import validator
|
from pydantic import validator
|
||||||
from pydantic.fields import Field
|
from pydantic.fields import Field
|
||||||
|
|
||||||
from datahub.configuration import ConfigModel
|
|
||||||
from datahub.configuration.common import ConfigurationError
|
from datahub.configuration.common import ConfigurationError
|
||||||
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
||||||
from datahub.ingestion.api.common import PipelineContext
|
from datahub.ingestion.api.common import PipelineContext
|
||||||
@ -25,8 +24,19 @@ from datahub.ingestion.api.decorators import (
|
|||||||
platform_name,
|
platform_name,
|
||||||
support_status,
|
support_status,
|
||||||
)
|
)
|
||||||
from datahub.ingestion.api.source import Source, SourceReport
|
|
||||||
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
||||||
|
from datahub.ingestion.source.state.sql_common_state import (
|
||||||
|
BaseSQLAlchemyCheckpointState,
|
||||||
|
)
|
||||||
|
from datahub.ingestion.source.state.stale_entity_removal_handler import (
|
||||||
|
StaleEntityRemovalHandler,
|
||||||
|
StaleEntityRemovalSourceReport,
|
||||||
|
StatefulStaleMetadataRemovalConfig,
|
||||||
|
)
|
||||||
|
from datahub.ingestion.source.state.stateful_ingestion_base import (
|
||||||
|
StatefulIngestionConfigBase,
|
||||||
|
StatefulIngestionSourceBase,
|
||||||
|
)
|
||||||
from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import (
|
from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import (
|
||||||
CorpGroupSnapshot,
|
CorpGroupSnapshot,
|
||||||
CorpUserSnapshot,
|
CorpUserSnapshot,
|
||||||
@ -41,11 +51,15 @@ from datahub.metadata.schema_classes import (
|
|||||||
OriginTypeClass,
|
OriginTypeClass,
|
||||||
StatusClass,
|
StatusClass,
|
||||||
)
|
)
|
||||||
|
from datahub.utilities.source_helpers import (
|
||||||
|
auto_stale_entity_removal,
|
||||||
|
auto_status_aspect,
|
||||||
|
)
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class OktaConfig(ConfigModel):
|
class OktaConfig(StatefulIngestionConfigBase):
|
||||||
# Required: Domain of the Okta deployment. Example: dev-33231928.okta.com
|
# Required: Domain of the Okta deployment. Example: dev-33231928.okta.com
|
||||||
okta_domain: str = Field(
|
okta_domain: str = Field(
|
||||||
description="The location of your Okta Domain, without a protocol. Can be found in Okta Developer console. e.g. dev-33231928.okta.com",
|
description="The location of your Okta Domain, without a protocol. Can be found in Okta Developer console. e.g. dev-33231928.okta.com",
|
||||||
@ -131,6 +145,11 @@ class OktaConfig(ConfigModel):
|
|||||||
description="Okta search expression (not regex) for ingesting groups. Only one of `okta_groups_filter` and `okta_groups_search` can be set. See (https://developer.okta.com/docs/reference/api/groups/#list-groups-with-search) for more info.",
|
description="Okta search expression (not regex) for ingesting groups. Only one of `okta_groups_filter` and `okta_groups_search` can be set. See (https://developer.okta.com/docs/reference/api/groups/#list-groups-with-search) for more info.",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Configuration for stateful ingestion
|
||||||
|
stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = Field(
|
||||||
|
default=None, description="Okta Stateful Ingestion Config."
|
||||||
|
)
|
||||||
|
|
||||||
# Optional: Whether to mask sensitive information from workunit ID's. On by default.
|
# Optional: Whether to mask sensitive information from workunit ID's. On by default.
|
||||||
mask_group_id: bool = True
|
mask_group_id: bool = True
|
||||||
mask_user_id: bool = True
|
mask_user_id: bool = True
|
||||||
@ -153,7 +172,7 @@ class OktaConfig(ConfigModel):
|
|||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class OktaSourceReport(SourceReport):
|
class OktaSourceReport(StaleEntityRemovalSourceReport):
|
||||||
filtered: List[str] = field(default_factory=list)
|
filtered: List[str] = field(default_factory=list)
|
||||||
|
|
||||||
def report_filtered(self, name: str) -> None:
|
def report_filtered(self, name: str) -> None:
|
||||||
@ -178,7 +197,10 @@ class OktaSourceReport(SourceReport):
|
|||||||
@config_class(OktaConfig)
|
@config_class(OktaConfig)
|
||||||
@support_status(SupportStatus.CERTIFIED)
|
@support_status(SupportStatus.CERTIFIED)
|
||||||
@capability(SourceCapability.DESCRIPTIONS, "Optionally enabled via configuration")
|
@capability(SourceCapability.DESCRIPTIONS, "Optionally enabled via configuration")
|
||||||
class OktaSource(Source):
|
@capability(
|
||||||
|
SourceCapability.DELETION_DETECTION, "Optionally enabled via stateful_ingestion"
|
||||||
|
)
|
||||||
|
class OktaSource(StatefulIngestionSourceBase):
|
||||||
"""
|
"""
|
||||||
This plugin extracts the following:
|
This plugin extracts the following:
|
||||||
|
|
||||||
@ -256,18 +278,32 @@ class OktaSource(Source):
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
config: OktaConfig
|
||||||
|
report: OktaSourceReport
|
||||||
|
okta_client: OktaClient
|
||||||
|
stale_entity_removal_handler: StaleEntityRemovalHandler
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def create(cls, config_dict, ctx):
|
def create(cls, config_dict, ctx):
|
||||||
config = OktaConfig.parse_obj(config_dict)
|
config = OktaConfig.parse_obj(config_dict)
|
||||||
return cls(config, ctx)
|
return cls(config, ctx)
|
||||||
|
|
||||||
def __init__(self, config: OktaConfig, ctx: PipelineContext):
|
def __init__(self, config: OktaConfig, ctx: PipelineContext):
|
||||||
super().__init__(ctx)
|
super(OktaSource, self).__init__(config, ctx)
|
||||||
self.config = config
|
self.config = config
|
||||||
self.report = OktaSourceReport()
|
self.report = OktaSourceReport()
|
||||||
self.okta_client = self._create_okta_client()
|
self.okta_client = self._create_okta_client()
|
||||||
|
|
||||||
def get_workunits(self) -> Iterable[MetadataWorkUnit]:
|
# Create and register the stateful ingestion use-case handler.
|
||||||
|
self.stale_entity_removal_handler = StaleEntityRemovalHandler(
|
||||||
|
source=self,
|
||||||
|
config=self.config,
|
||||||
|
state_type_class=BaseSQLAlchemyCheckpointState,
|
||||||
|
pipeline_name=ctx.pipeline_name,
|
||||||
|
run_id=ctx.run_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
|
||||||
# Step 0: get or create the event loop
|
# Step 0: get or create the event loop
|
||||||
# This method can be called on the main thread or an async thread, so we must create a new loop if one doesn't exist
|
# This method can be called on the main thread or an async thread, so we must create a new loop if one doesn't exist
|
||||||
# See https://docs.python.org/3/library/asyncio-eventloop.html for more info.
|
# See https://docs.python.org/3/library/asyncio-eventloop.html for more info.
|
||||||
@ -407,6 +443,12 @@ class OktaSource(Source):
|
|||||||
# Step 4: Close the event loop
|
# Step 4: Close the event loop
|
||||||
event_loop.close()
|
event_loop.close()
|
||||||
|
|
||||||
|
def get_workunits(self) -> Iterable[MetadataWorkUnit]:
|
||||||
|
return auto_stale_entity_removal(
|
||||||
|
self.stale_entity_removal_handler,
|
||||||
|
auto_status_aspect(self.get_workunits_internal()),
|
||||||
|
)
|
||||||
|
|
||||||
def get_report(self):
|
def get_report(self):
|
||||||
return self.report
|
return self.report
|
||||||
|
|
||||||
|
@ -0,0 +1,16 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"id": "0",
|
||||||
|
"created": "2015-02-06T10:11:28.000Z",
|
||||||
|
"lastUpdated": "2015-10-05T19:16:43.000Z",
|
||||||
|
"lastMembershipUpdated": "2015-11-28T19:15:32.000Z",
|
||||||
|
"objectClass": [
|
||||||
|
"okta:user_group"
|
||||||
|
],
|
||||||
|
"type": "OKTA_GROUP",
|
||||||
|
"profile": {
|
||||||
|
"name": "All Employees",
|
||||||
|
"description": "All Employees in the Test Company."
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
@ -1,5 +1,7 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
import pathlib
|
import pathlib
|
||||||
|
from functools import partial
|
||||||
|
from typing import Optional, cast
|
||||||
from unittest.mock import Mock, patch
|
from unittest.mock import Mock, patch
|
||||||
|
|
||||||
import jsonpickle
|
import jsonpickle
|
||||||
@ -8,12 +10,75 @@ from freezegun import freeze_time
|
|||||||
from okta.models import Group, User
|
from okta.models import Group, User
|
||||||
|
|
||||||
from datahub.ingestion.run.pipeline import Pipeline
|
from datahub.ingestion.run.pipeline import Pipeline
|
||||||
from datahub.ingestion.source.identity.okta import OktaConfig
|
from datahub.ingestion.source.identity.okta import OktaConfig, OktaSource
|
||||||
|
from datahub.ingestion.source.state.checkpoint import Checkpoint
|
||||||
|
from datahub.ingestion.source.state.entity_removal_state import GenericCheckpointState
|
||||||
from tests.test_helpers import mce_helpers
|
from tests.test_helpers import mce_helpers
|
||||||
|
from tests.test_helpers.state_helpers import (
|
||||||
|
validate_all_providers_have_committed_successfully,
|
||||||
|
)
|
||||||
|
|
||||||
FROZEN_TIME = "2020-04-14 07:00:00"
|
FROZEN_TIME = "2020-04-14 07:00:00"
|
||||||
USER_ID_NOT_IN_GROUPS = "5"
|
USER_ID_NOT_IN_GROUPS = "5"
|
||||||
|
|
||||||
|
GMS_PORT = 8080
|
||||||
|
GMS_SERVER = f"http://localhost:{GMS_PORT}"
|
||||||
|
|
||||||
|
|
||||||
|
def default_recipe(output_file_path):
|
||||||
|
return {
|
||||||
|
"run_id": "test-okta-usage",
|
||||||
|
"source": {
|
||||||
|
"type": "okta",
|
||||||
|
"config": {
|
||||||
|
"okta_domain": "mock-domain.okta.com",
|
||||||
|
"okta_api_token": "mock-okta-token",
|
||||||
|
"ingest_users": "True",
|
||||||
|
"ingest_groups": "True",
|
||||||
|
"ingest_group_membership": "True",
|
||||||
|
"okta_profile_to_username_attr": "login",
|
||||||
|
"okta_profile_to_username_regex": "([^@]+)",
|
||||||
|
"okta_profile_to_group_name_attr": "name",
|
||||||
|
"okta_profile_to_group_name_regex": "(.*)",
|
||||||
|
"include_deprovisioned_users": "False",
|
||||||
|
"include_suspended_users": "False",
|
||||||
|
"page_size": "2",
|
||||||
|
"delay_seconds": "0.00",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"sink": {
|
||||||
|
"type": "file",
|
||||||
|
"config": {
|
||||||
|
"filename": f"{output_file_path}",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def run_ingest(
|
||||||
|
mock_datahub_graph,
|
||||||
|
mocked_functions_reference,
|
||||||
|
recipe,
|
||||||
|
):
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"datahub.ingestion.source.identity.okta.OktaClient"
|
||||||
|
) as MockClient, patch(
|
||||||
|
"datahub.ingestion.source.state_provider.datahub_ingestion_checkpointing_provider.DataHubGraph",
|
||||||
|
mock_datahub_graph,
|
||||||
|
) as mock_checkpoint:
|
||||||
|
|
||||||
|
mock_checkpoint.return_value = mock_datahub_graph
|
||||||
|
|
||||||
|
mocked_functions_reference(MockClient=MockClient)
|
||||||
|
|
||||||
|
# Run an Okta usage ingestion run.
|
||||||
|
pipeline = Pipeline.create(recipe)
|
||||||
|
pipeline.run()
|
||||||
|
pipeline.raise_from_status()
|
||||||
|
|
||||||
|
return pipeline
|
||||||
|
|
||||||
|
|
||||||
def test_okta_config():
|
def test_okta_config():
|
||||||
config = OktaConfig.parse_obj(
|
config = OktaConfig.parse_obj(
|
||||||
@ -39,200 +104,201 @@ def test_okta_config():
|
|||||||
|
|
||||||
|
|
||||||
@freeze_time(FROZEN_TIME)
|
@freeze_time(FROZEN_TIME)
|
||||||
def test_okta_source_default_configs(pytestconfig, tmp_path):
|
def test_okta_source_default_configs(pytestconfig, mock_datahub_graph, tmp_path):
|
||||||
test_resources_dir: pathlib.Path = pytestconfig.rootpath / "tests/integration/okta"
|
test_resources_dir: pathlib.Path = pytestconfig.rootpath / "tests/integration/okta"
|
||||||
|
|
||||||
with patch("datahub.ingestion.source.identity.okta.OktaClient") as MockClient:
|
output_file_path = f"{tmp_path}/okta_mces_default_config.json"
|
||||||
_init_mock_okta_client(test_resources_dir, MockClient)
|
|
||||||
|
|
||||||
# Run an Okta usage ingestion run.
|
run_ingest(
|
||||||
pipeline = Pipeline.create(
|
mock_datahub_graph=mock_datahub_graph,
|
||||||
{
|
mocked_functions_reference=partial(
|
||||||
"run_id": "test-okta-usage",
|
_init_mock_okta_client, test_resources_dir=test_resources_dir
|
||||||
"source": {
|
),
|
||||||
"type": "okta",
|
recipe=default_recipe(output_file_path),
|
||||||
"config": {
|
|
||||||
"okta_domain": "mock-domain.okta.com",
|
|
||||||
"okta_api_token": "mock-okta-token",
|
|
||||||
"ingest_users": "True",
|
|
||||||
"ingest_groups": "True",
|
|
||||||
"ingest_group_membership": "True",
|
|
||||||
"okta_profile_to_username_attr": "login",
|
|
||||||
"okta_profile_to_username_regex": "([^@]+)",
|
|
||||||
"okta_profile_to_group_name_attr": "name",
|
|
||||||
"okta_profile_to_group_name_regex": "(.*)",
|
|
||||||
"include_deprovisioned_users": "False",
|
|
||||||
"include_suspended_users": "False",
|
|
||||||
"page_size": "2",
|
|
||||||
"delay_seconds": "0.00",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"sink": {
|
|
||||||
"type": "file",
|
|
||||||
"config": {
|
|
||||||
"filename": f"{tmp_path}/okta_mces_default_config.json",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
)
|
)
|
||||||
pipeline.run()
|
|
||||||
pipeline.raise_from_status()
|
|
||||||
|
|
||||||
mce_helpers.check_golden_file(
|
mce_helpers.check_golden_file(
|
||||||
pytestconfig,
|
pytestconfig,
|
||||||
output_path=tmp_path / "okta_mces_default_config.json",
|
output_path=output_file_path,
|
||||||
golden_path=test_resources_dir / "okta_mces_golden_default_config.json",
|
golden_path=f"{test_resources_dir}/okta_mces_golden_default_config.json",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@freeze_time(FROZEN_TIME)
|
@freeze_time(FROZEN_TIME)
|
||||||
def test_okta_source_ingestion_disabled(pytestconfig, tmp_path):
|
def test_okta_source_ingestion_disabled(pytestconfig, mock_datahub_graph, tmp_path):
|
||||||
test_resources_dir: pathlib.Path = pytestconfig.rootpath / "tests/integration/okta"
|
test_resources_dir: pathlib.Path = pytestconfig.rootpath / "tests/integration/okta"
|
||||||
|
|
||||||
with patch("datahub.ingestion.source.identity.okta.OktaClient") as MockClient:
|
output_file_path = f"{tmp_path}/okta_mces_ingestion_disabled.json"
|
||||||
_init_mock_okta_client(test_resources_dir, MockClient)
|
new_recipe = default_recipe(output_file_path)
|
||||||
|
new_recipe["source"]["config"]["ingest_users"] = False
|
||||||
|
new_recipe["source"]["config"]["ingest_groups"] = False
|
||||||
|
new_recipe["source"]["config"]["ingest_group_membership"] = False
|
||||||
|
|
||||||
# Run an Okta usage ingestion run.
|
run_ingest(
|
||||||
pipeline = Pipeline.create(
|
mock_datahub_graph=mock_datahub_graph,
|
||||||
{
|
mocked_functions_reference=partial(
|
||||||
"run_id": "test-okta-usage",
|
_init_mock_okta_client, test_resources_dir=test_resources_dir
|
||||||
"source": {
|
),
|
||||||
"type": "okta",
|
recipe=new_recipe,
|
||||||
"config": {
|
|
||||||
"okta_domain": "mock-domain.okta.com",
|
|
||||||
"okta_api_token": "mock-okta-token",
|
|
||||||
"ingest_users": "False",
|
|
||||||
"ingest_groups": "False",
|
|
||||||
"ingest_group_membership": "False",
|
|
||||||
"okta_profile_to_username_attr": "login",
|
|
||||||
"okta_profile_to_username_regex": "([^@]+)",
|
|
||||||
"okta_profile_to_group_name_attr": "name",
|
|
||||||
"okta_profile_to_group_name_regex": "(.*)",
|
|
||||||
"include_deprovisioned_users": "False",
|
|
||||||
"include_suspended_users": "False",
|
|
||||||
"page_size": "2",
|
|
||||||
"delay_seconds": "0.00",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"sink": {
|
|
||||||
"type": "file",
|
|
||||||
"config": {
|
|
||||||
"filename": f"{tmp_path}/okta_mces_ingestion_disabled.json",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
)
|
)
|
||||||
pipeline.run()
|
|
||||||
pipeline.raise_from_status()
|
|
||||||
|
|
||||||
mce_helpers.check_golden_file(
|
mce_helpers.check_golden_file(
|
||||||
pytestconfig,
|
pytestconfig,
|
||||||
output_path=tmp_path / "okta_mces_ingestion_disabled.json",
|
output_path=output_file_path,
|
||||||
golden_path=test_resources_dir / "okta_mces_golden_ingestion_disabled.json",
|
golden_path=f"{test_resources_dir}/okta_mces_golden_ingestion_disabled.json",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@freeze_time(FROZEN_TIME)
|
@freeze_time(FROZEN_TIME)
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
def test_okta_source_include_deprovisioned_suspended_users(pytestconfig, tmp_path):
|
def test_okta_source_include_deprovisioned_suspended_users(
|
||||||
|
pytestconfig, mock_datahub_graph, tmp_path
|
||||||
|
):
|
||||||
test_resources_dir: pathlib.Path = pytestconfig.rootpath / "tests/integration/okta"
|
test_resources_dir: pathlib.Path = pytestconfig.rootpath / "tests/integration/okta"
|
||||||
|
|
||||||
with patch("datahub.ingestion.source.identity.okta.OktaClient") as MockClient:
|
output_file_path = (
|
||||||
_init_mock_okta_client(test_resources_dir, MockClient)
|
f"{tmp_path}/okta_mces_include_deprovisioned_suspended_users.json"
|
||||||
|
)
|
||||||
# Run an Okta usage ingestion run.
|
new_recipe = default_recipe(output_file_path)
|
||||||
pipeline = Pipeline.create(
|
new_recipe["source"]["config"]["include_deprovisioned_users"] = True
|
||||||
{
|
new_recipe["source"]["config"]["include_suspended_users"] = True
|
||||||
"run_id": "test-okta-usage",
|
|
||||||
"source": {
|
run_ingest(
|
||||||
"type": "okta",
|
mock_datahub_graph=mock_datahub_graph,
|
||||||
"config": {
|
mocked_functions_reference=partial(
|
||||||
"okta_domain": "mock-domain.okta.com",
|
_init_mock_okta_client, test_resources_dir=test_resources_dir
|
||||||
"okta_api_token": "mock-okta-token",
|
),
|
||||||
"ingest_users": "True",
|
recipe=new_recipe,
|
||||||
"ingest_groups": "True",
|
|
||||||
"ingest_group_membership": "True",
|
|
||||||
"okta_profile_to_username_attr": "login",
|
|
||||||
"okta_profile_to_username_regex": "([^@]+)",
|
|
||||||
"okta_profile_to_group_name_attr": "name",
|
|
||||||
"okta_profile_to_group_name_regex": "(.*)",
|
|
||||||
"include_deprovisioned_users": "True",
|
|
||||||
"include_suspended_users": "True",
|
|
||||||
"page_size": "2",
|
|
||||||
"delay_seconds": "0.00",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"sink": {
|
|
||||||
"type": "file",
|
|
||||||
"config": {
|
|
||||||
"filename": f"{tmp_path}/okta_mces_include_deprovisioned_suspended_users.json",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
)
|
)
|
||||||
pipeline.run()
|
|
||||||
pipeline.raise_from_status()
|
|
||||||
|
|
||||||
mce_helpers.check_golden_file(
|
mce_helpers.check_golden_file(
|
||||||
pytestconfig,
|
pytestconfig,
|
||||||
output_path=tmp_path / "okta_mces_include_deprovisioned_suspended_users.json",
|
output_path=output_file_path,
|
||||||
golden_path=test_resources_dir
|
golden_path=f"{test_resources_dir}/okta_mces_golden_include_deprovisioned_suspended_users.json",
|
||||||
/ "okta_mces_golden_include_deprovisioned_suspended_users.json",
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@freeze_time(FROZEN_TIME)
|
@freeze_time(FROZEN_TIME)
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
def test_okta_source_custom_user_name_regex(pytestconfig, tmp_path):
|
def test_okta_source_custom_user_name_regex(pytestconfig, mock_datahub_graph, tmp_path):
|
||||||
test_resources_dir: pathlib.Path = pytestconfig.rootpath / "tests/integration/okta"
|
test_resources_dir: pathlib.Path = pytestconfig.rootpath / "tests/integration/okta"
|
||||||
|
|
||||||
with patch("datahub.ingestion.source.identity.okta.OktaClient") as MockClient:
|
output_file_path = f"{tmp_path}/okta_mces_custom_user_name_regex.json"
|
||||||
_init_mock_okta_client(test_resources_dir, MockClient)
|
new_recipe = default_recipe(output_file_path)
|
||||||
|
new_recipe["source"]["config"]["okta_profile_to_username_regex"] = "(.*)"
|
||||||
|
new_recipe["source"]["config"]["okta_profile_to_group_name_regex"] = "(.*)"
|
||||||
|
|
||||||
# Run an Okta usage ingestion run.
|
run_ingest(
|
||||||
pipeline = Pipeline.create(
|
mock_datahub_graph=mock_datahub_graph,
|
||||||
{
|
mocked_functions_reference=partial(
|
||||||
"run_id": "test-okta-usage",
|
_init_mock_okta_client, test_resources_dir=test_resources_dir
|
||||||
"source": {
|
),
|
||||||
"type": "okta",
|
recipe=new_recipe,
|
||||||
"config": {
|
|
||||||
"okta_domain": "mock-domain.okta.com",
|
|
||||||
"okta_api_token": "mock-okta-token",
|
|
||||||
"ingest_users": "True",
|
|
||||||
"ingest_groups": "True",
|
|
||||||
"ingest_group_membership": "True",
|
|
||||||
"okta_profile_to_username_attr": "email",
|
|
||||||
"okta_profile_to_username_regex": "(.*)",
|
|
||||||
"okta_profile_to_group_name_attr": "name",
|
|
||||||
"okta_profile_to_group_name_regex": "(.*)",
|
|
||||||
"include_deprovisioned_users": "False",
|
|
||||||
"include_suspended_users": "False",
|
|
||||||
"page_size": "2",
|
|
||||||
"delay_seconds": "0.00",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"sink": {
|
|
||||||
"type": "file",
|
|
||||||
"config": {
|
|
||||||
"filename": f"{tmp_path}/okta_mces_custom_user_name_regex.json",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
)
|
)
|
||||||
pipeline.run()
|
|
||||||
pipeline.raise_from_status()
|
|
||||||
|
|
||||||
mce_helpers.check_golden_file(
|
mce_helpers.check_golden_file(
|
||||||
pytestconfig,
|
pytestconfig,
|
||||||
output_path=tmp_path / "okta_mces_custom_user_name_regex.json",
|
output_path=output_file_path,
|
||||||
golden_path=test_resources_dir / "okta_mces_golden_custom_user_name_regex.json",
|
golden_path=f"{test_resources_dir}/okta_mces_golden_custom_user_name_regex.json",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def get_current_checkpoint_from_pipeline(
|
||||||
|
pipeline: Pipeline,
|
||||||
|
) -> Optional[Checkpoint[GenericCheckpointState]]:
|
||||||
|
azure_ad_source = cast(OktaSource, pipeline.source)
|
||||||
|
return azure_ad_source.get_current_checkpoint(
|
||||||
|
azure_ad_source.stale_entity_removal_handler.job_id
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@freeze_time(FROZEN_TIME)
|
||||||
|
def test_okta_stateful_ingestion(pytestconfig, tmp_path, mock_time, mock_datahub_graph):
|
||||||
|
test_resources_dir: pathlib.Path = pytestconfig.rootpath / "tests/integration/okta"
|
||||||
|
|
||||||
|
output_file_path = f"{tmp_path}/temporary_mces.json"
|
||||||
|
new_recipe = default_recipe(output_file_path)
|
||||||
|
|
||||||
|
new_recipe["pipeline_name"] = "okta_execution"
|
||||||
|
new_recipe["source"]["config"]["stateful_ingestion"] = {
|
||||||
|
"enabled": True,
|
||||||
|
"state_provider": {
|
||||||
|
"type": "datahub",
|
||||||
|
"config": {"datahub_api": {"server": GMS_SERVER}},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
pipeline1 = run_ingest(
|
||||||
|
mock_datahub_graph=mock_datahub_graph,
|
||||||
|
mocked_functions_reference=partial(
|
||||||
|
_init_mock_okta_client, test_resources_dir=test_resources_dir
|
||||||
|
),
|
||||||
|
recipe=new_recipe,
|
||||||
|
)
|
||||||
|
|
||||||
|
checkpoint1 = get_current_checkpoint_from_pipeline(pipeline1)
|
||||||
|
assert checkpoint1
|
||||||
|
assert checkpoint1.state
|
||||||
|
|
||||||
|
# Create new event loop as last one is closed because of previous ingestion run
|
||||||
|
event_loop = asyncio.new_event_loop()
|
||||||
|
asyncio.set_event_loop(event_loop)
|
||||||
|
|
||||||
|
pipeline2 = run_ingest(
|
||||||
|
mock_datahub_graph=mock_datahub_graph,
|
||||||
|
mocked_functions_reference=partial(
|
||||||
|
overwrite_group_in_mocked_data, test_resources_dir=test_resources_dir
|
||||||
|
),
|
||||||
|
recipe=new_recipe,
|
||||||
|
)
|
||||||
|
checkpoint2 = get_current_checkpoint_from_pipeline(pipeline2)
|
||||||
|
assert checkpoint2
|
||||||
|
assert checkpoint2.state
|
||||||
|
#
|
||||||
|
# Validate that all providers have committed successfully.
|
||||||
|
validate_all_providers_have_committed_successfully(
|
||||||
|
pipeline=pipeline1, expected_providers=1
|
||||||
|
)
|
||||||
|
validate_all_providers_have_committed_successfully(
|
||||||
|
pipeline=pipeline2, expected_providers=1
|
||||||
|
)
|
||||||
|
|
||||||
|
# Perform all assertions on the states. The deleted group should not be
|
||||||
|
# part of the second state
|
||||||
|
state1 = checkpoint1.state
|
||||||
|
state2 = checkpoint2.state
|
||||||
|
|
||||||
|
difference_group_urns = list(
|
||||||
|
state1.get_urns_not_in(type="corpGroup", other_checkpoint_state=state2)
|
||||||
|
)
|
||||||
|
|
||||||
|
assert len(difference_group_urns) == 1
|
||||||
|
assert difference_group_urns == ["urn:li:corpGroup:Engineering"]
|
||||||
|
|
||||||
|
|
||||||
|
def overwrite_group_in_mocked_data(test_resources_dir, MockClient):
|
||||||
|
_init_mock_okta_client(
|
||||||
|
test_resources_dir,
|
||||||
|
MockClient,
|
||||||
|
mock_groups_json=test_resources_dir / "okta_deleted_groups.json",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
# Initializes a Mock Okta Client to return users from okta_users.json and groups from okta_groups.json.
|
# Initializes a Mock Okta Client to return users from okta_users.json and groups from okta_groups.json.
|
||||||
def _init_mock_okta_client(test_resources_dir, MockClient):
|
def _init_mock_okta_client(
|
||||||
okta_users_json_file = test_resources_dir / "okta_users.json"
|
test_resources_dir, MockClient, mock_users_json=None, mock_groups_json=None
|
||||||
okta_groups_json_file = test_resources_dir / "okta_groups.json"
|
):
|
||||||
|
|
||||||
|
okta_users_json_file = (
|
||||||
|
test_resources_dir / "okta_users.json"
|
||||||
|
if mock_users_json is None
|
||||||
|
else mock_users_json
|
||||||
|
)
|
||||||
|
okta_groups_json_file = (
|
||||||
|
test_resources_dir / "okta_groups.json"
|
||||||
|
if mock_groups_json is None
|
||||||
|
else mock_groups_json
|
||||||
|
)
|
||||||
|
|
||||||
# Add mock Okta API responses.
|
# Add mock Okta API responses.
|
||||||
with okta_users_json_file.open() as okta_users_json:
|
with okta_users_json_file.open() as okta_users_json:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user