mirror of
				https://github.com/datahub-project/datahub.git
				synced 2025-10-31 10:49:00 +00:00 
			
		
		
		
	feat(ingestion/okta): okta stateful ingestion (#7736)
Co-authored-by: MohdSiddiqueBagwan <mohdsiddique.bagwan@gslab.com> Co-authored-by: Harshal Sheth <hsheth2@gmail.com> Co-authored-by: John Joyce <john@acryl.io>
This commit is contained in:
		
							parent
							
								
									5fd7981532
								
							
						
					
					
						commit
						5e145cbb2d
					
				| @ -17,11 +17,12 @@ from datahub.emitter.mcp import MetadataChangeProposalWrapper | |||||||
| from datahub.ingestion.api.common import PipelineContext | from datahub.ingestion.api.common import PipelineContext | ||||||
| from datahub.ingestion.api.decorators import (  # SourceCapability,; capability, | from datahub.ingestion.api.decorators import (  # SourceCapability,; capability, | ||||||
|     SupportStatus, |     SupportStatus, | ||||||
|  |     capability, | ||||||
|     config_class, |     config_class, | ||||||
|     platform_name, |     platform_name, | ||||||
|     support_status, |     support_status, | ||||||
| ) | ) | ||||||
| from datahub.ingestion.api.source import SourceReport | from datahub.ingestion.api.source import SourceCapability, SourceReport | ||||||
| from datahub.ingestion.api.workunit import MetadataWorkUnit | from datahub.ingestion.api.workunit import MetadataWorkUnit | ||||||
| from datahub.ingestion.source.state.sql_common_state import ( | from datahub.ingestion.source.state.sql_common_state import ( | ||||||
|     BaseSQLAlchemyCheckpointState, |     BaseSQLAlchemyCheckpointState, | ||||||
| @ -152,7 +153,7 @@ class AzureADConfig(StatefulIngestionConfigBase, DatasetSourceConfigMixin): | |||||||
| 
 | 
 | ||||||
|     # Configuration for stateful ingestion |     # Configuration for stateful ingestion | ||||||
|     stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = Field( |     stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = Field( | ||||||
|         default=None, description="PowerBI Stateful Ingestion Config." |         default=None, description="Azure AD Stateful Ingestion Config." | ||||||
|     ) |     ) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @ -174,6 +175,9 @@ class AzureADSourceReport(StaleEntityRemovalSourceReport): | |||||||
| @platform_name("Azure AD") | @platform_name("Azure AD") | ||||||
| @config_class(AzureADConfig) | @config_class(AzureADConfig) | ||||||
| @support_status(SupportStatus.CERTIFIED) | @support_status(SupportStatus.CERTIFIED) | ||||||
|  | @capability( | ||||||
|  |     SourceCapability.DELETION_DETECTION, "Optionally enabled via stateful_ingestion" | ||||||
|  | ) | ||||||
| class AzureADSource(StatefulIngestionSourceBase): | class AzureADSource(StatefulIngestionSourceBase): | ||||||
|     """ |     """ | ||||||
|     This plugin extracts the following: |     This plugin extracts the following: | ||||||
|  | |||||||
| @ -13,7 +13,6 @@ from okta.models import Group, GroupProfile, User, UserProfile, UserStatus | |||||||
| from pydantic import validator | from pydantic import validator | ||||||
| from pydantic.fields import Field | from pydantic.fields import Field | ||||||
| 
 | 
 | ||||||
| from datahub.configuration import ConfigModel |  | ||||||
| from datahub.configuration.common import ConfigurationError | from datahub.configuration.common import ConfigurationError | ||||||
| from datahub.emitter.mcp import MetadataChangeProposalWrapper | from datahub.emitter.mcp import MetadataChangeProposalWrapper | ||||||
| from datahub.ingestion.api.common import PipelineContext | from datahub.ingestion.api.common import PipelineContext | ||||||
| @ -25,8 +24,19 @@ from datahub.ingestion.api.decorators import ( | |||||||
|     platform_name, |     platform_name, | ||||||
|     support_status, |     support_status, | ||||||
| ) | ) | ||||||
| from datahub.ingestion.api.source import Source, SourceReport |  | ||||||
| from datahub.ingestion.api.workunit import MetadataWorkUnit | from datahub.ingestion.api.workunit import MetadataWorkUnit | ||||||
|  | from datahub.ingestion.source.state.sql_common_state import ( | ||||||
|  |     BaseSQLAlchemyCheckpointState, | ||||||
|  | ) | ||||||
|  | from datahub.ingestion.source.state.stale_entity_removal_handler import ( | ||||||
|  |     StaleEntityRemovalHandler, | ||||||
|  |     StaleEntityRemovalSourceReport, | ||||||
|  |     StatefulStaleMetadataRemovalConfig, | ||||||
|  | ) | ||||||
|  | from datahub.ingestion.source.state.stateful_ingestion_base import ( | ||||||
|  |     StatefulIngestionConfigBase, | ||||||
|  |     StatefulIngestionSourceBase, | ||||||
|  | ) | ||||||
| from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import ( | from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import ( | ||||||
|     CorpGroupSnapshot, |     CorpGroupSnapshot, | ||||||
|     CorpUserSnapshot, |     CorpUserSnapshot, | ||||||
| @ -41,11 +51,15 @@ from datahub.metadata.schema_classes import ( | |||||||
|     OriginTypeClass, |     OriginTypeClass, | ||||||
|     StatusClass, |     StatusClass, | ||||||
| ) | ) | ||||||
|  | from datahub.utilities.source_helpers import ( | ||||||
|  |     auto_stale_entity_removal, | ||||||
|  |     auto_status_aspect, | ||||||
|  | ) | ||||||
| 
 | 
 | ||||||
| logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class OktaConfig(ConfigModel): | class OktaConfig(StatefulIngestionConfigBase): | ||||||
|     # Required: Domain of the Okta deployment. Example: dev-33231928.okta.com |     # Required: Domain of the Okta deployment. Example: dev-33231928.okta.com | ||||||
|     okta_domain: str = Field( |     okta_domain: str = Field( | ||||||
|         description="The location of your Okta Domain, without a protocol. Can be found in Okta Developer console. e.g. dev-33231928.okta.com", |         description="The location of your Okta Domain, without a protocol. Can be found in Okta Developer console. e.g. dev-33231928.okta.com", | ||||||
| @ -131,6 +145,11 @@ class OktaConfig(ConfigModel): | |||||||
|         description="Okta search expression (not regex) for ingesting groups. Only one of `okta_groups_filter` and `okta_groups_search` can be set. See (https://developer.okta.com/docs/reference/api/groups/#list-groups-with-search) for more info.", |         description="Okta search expression (not regex) for ingesting groups. Only one of `okta_groups_filter` and `okta_groups_search` can be set. See (https://developer.okta.com/docs/reference/api/groups/#list-groups-with-search) for more info.", | ||||||
|     ) |     ) | ||||||
| 
 | 
 | ||||||
|  |     # Configuration for stateful ingestion | ||||||
|  |     stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = Field( | ||||||
|  |         default=None, description="Okta Stateful Ingestion Config." | ||||||
|  |     ) | ||||||
|  | 
 | ||||||
|     # Optional: Whether to mask sensitive information from workunit ID's. On by default. |     # Optional: Whether to mask sensitive information from workunit ID's. On by default. | ||||||
|     mask_group_id: bool = True |     mask_group_id: bool = True | ||||||
|     mask_user_id: bool = True |     mask_user_id: bool = True | ||||||
| @ -153,7 +172,7 @@ class OktaConfig(ConfigModel): | |||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @dataclass | @dataclass | ||||||
| class OktaSourceReport(SourceReport): | class OktaSourceReport(StaleEntityRemovalSourceReport): | ||||||
|     filtered: List[str] = field(default_factory=list) |     filtered: List[str] = field(default_factory=list) | ||||||
| 
 | 
 | ||||||
|     def report_filtered(self, name: str) -> None: |     def report_filtered(self, name: str) -> None: | ||||||
| @ -178,7 +197,10 @@ class OktaSourceReport(SourceReport): | |||||||
| @config_class(OktaConfig) | @config_class(OktaConfig) | ||||||
| @support_status(SupportStatus.CERTIFIED) | @support_status(SupportStatus.CERTIFIED) | ||||||
| @capability(SourceCapability.DESCRIPTIONS, "Optionally enabled via configuration") | @capability(SourceCapability.DESCRIPTIONS, "Optionally enabled via configuration") | ||||||
| class OktaSource(Source): | @capability( | ||||||
|  |     SourceCapability.DELETION_DETECTION, "Optionally enabled via stateful_ingestion" | ||||||
|  | ) | ||||||
|  | class OktaSource(StatefulIngestionSourceBase): | ||||||
|     """ |     """ | ||||||
|     This plugin extracts the following: |     This plugin extracts the following: | ||||||
| 
 | 
 | ||||||
| @ -256,18 +278,32 @@ class OktaSource(Source): | |||||||
| 
 | 
 | ||||||
|     """ |     """ | ||||||
| 
 | 
 | ||||||
|  |     config: OktaConfig | ||||||
|  |     report: OktaSourceReport | ||||||
|  |     okta_client: OktaClient | ||||||
|  |     stale_entity_removal_handler: StaleEntityRemovalHandler | ||||||
|  | 
 | ||||||
|     @classmethod |     @classmethod | ||||||
|     def create(cls, config_dict, ctx): |     def create(cls, config_dict, ctx): | ||||||
|         config = OktaConfig.parse_obj(config_dict) |         config = OktaConfig.parse_obj(config_dict) | ||||||
|         return cls(config, ctx) |         return cls(config, ctx) | ||||||
| 
 | 
 | ||||||
|     def __init__(self, config: OktaConfig, ctx: PipelineContext): |     def __init__(self, config: OktaConfig, ctx: PipelineContext): | ||||||
|         super().__init__(ctx) |         super(OktaSource, self).__init__(config, ctx) | ||||||
|         self.config = config |         self.config = config | ||||||
|         self.report = OktaSourceReport() |         self.report = OktaSourceReport() | ||||||
|         self.okta_client = self._create_okta_client() |         self.okta_client = self._create_okta_client() | ||||||
| 
 | 
 | ||||||
|     def get_workunits(self) -> Iterable[MetadataWorkUnit]: |         # Create and register the stateful ingestion use-case handler. | ||||||
|  |         self.stale_entity_removal_handler = StaleEntityRemovalHandler( | ||||||
|  |             source=self, | ||||||
|  |             config=self.config, | ||||||
|  |             state_type_class=BaseSQLAlchemyCheckpointState, | ||||||
|  |             pipeline_name=ctx.pipeline_name, | ||||||
|  |             run_id=ctx.run_id, | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |     def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: | ||||||
|         # Step 0: get or create the event loop |         # Step 0: get or create the event loop | ||||||
|         # This method can be called on the main thread or an async thread, so we must create a new loop if one doesn't exist |         # This method can be called on the main thread or an async thread, so we must create a new loop if one doesn't exist | ||||||
|         # See https://docs.python.org/3/library/asyncio-eventloop.html for more info. |         # See https://docs.python.org/3/library/asyncio-eventloop.html for more info. | ||||||
| @ -407,6 +443,12 @@ class OktaSource(Source): | |||||||
|         # Step 4: Close the event loop |         # Step 4: Close the event loop | ||||||
|         event_loop.close() |         event_loop.close() | ||||||
| 
 | 
 | ||||||
|  |     def get_workunits(self) -> Iterable[MetadataWorkUnit]: | ||||||
|  |         return auto_stale_entity_removal( | ||||||
|  |             self.stale_entity_removal_handler, | ||||||
|  |             auto_status_aspect(self.get_workunits_internal()), | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|     def get_report(self): |     def get_report(self): | ||||||
|         return self.report |         return self.report | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -0,0 +1,16 @@ | |||||||
|  | [ | ||||||
|  |     { | ||||||
|  |         "id": "0", | ||||||
|  |         "created": "2015-02-06T10:11:28.000Z", | ||||||
|  |         "lastUpdated": "2015-10-05T19:16:43.000Z", | ||||||
|  |         "lastMembershipUpdated": "2015-11-28T19:15:32.000Z", | ||||||
|  |         "objectClass": [ | ||||||
|  |           "okta:user_group" | ||||||
|  |         ], | ||||||
|  |         "type": "OKTA_GROUP", | ||||||
|  |         "profile": { | ||||||
|  |           "name": "All Employees", | ||||||
|  |           "description": "All Employees in the Test Company." | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | ] | ||||||
| @ -1,5 +1,7 @@ | |||||||
| import asyncio | import asyncio | ||||||
| import pathlib | import pathlib | ||||||
|  | from functools import partial | ||||||
|  | from typing import Optional, cast | ||||||
| from unittest.mock import Mock, patch | from unittest.mock import Mock, patch | ||||||
| 
 | 
 | ||||||
| import jsonpickle | import jsonpickle | ||||||
| @ -8,12 +10,75 @@ from freezegun import freeze_time | |||||||
| from okta.models import Group, User | from okta.models import Group, User | ||||||
| 
 | 
 | ||||||
| from datahub.ingestion.run.pipeline import Pipeline | from datahub.ingestion.run.pipeline import Pipeline | ||||||
| from datahub.ingestion.source.identity.okta import OktaConfig | from datahub.ingestion.source.identity.okta import OktaConfig, OktaSource | ||||||
|  | from datahub.ingestion.source.state.checkpoint import Checkpoint | ||||||
|  | from datahub.ingestion.source.state.entity_removal_state import GenericCheckpointState | ||||||
| from tests.test_helpers import mce_helpers | from tests.test_helpers import mce_helpers | ||||||
|  | from tests.test_helpers.state_helpers import ( | ||||||
|  |     validate_all_providers_have_committed_successfully, | ||||||
|  | ) | ||||||
| 
 | 
 | ||||||
| FROZEN_TIME = "2020-04-14 07:00:00" | FROZEN_TIME = "2020-04-14 07:00:00" | ||||||
| USER_ID_NOT_IN_GROUPS = "5" | USER_ID_NOT_IN_GROUPS = "5" | ||||||
| 
 | 
 | ||||||
|  | GMS_PORT = 8080 | ||||||
|  | GMS_SERVER = f"http://localhost:{GMS_PORT}" | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def default_recipe(output_file_path): | ||||||
|  |     return { | ||||||
|  |         "run_id": "test-okta-usage", | ||||||
|  |         "source": { | ||||||
|  |             "type": "okta", | ||||||
|  |             "config": { | ||||||
|  |                 "okta_domain": "mock-domain.okta.com", | ||||||
|  |                 "okta_api_token": "mock-okta-token", | ||||||
|  |                 "ingest_users": "True", | ||||||
|  |                 "ingest_groups": "True", | ||||||
|  |                 "ingest_group_membership": "True", | ||||||
|  |                 "okta_profile_to_username_attr": "login", | ||||||
|  |                 "okta_profile_to_username_regex": "([^@]+)", | ||||||
|  |                 "okta_profile_to_group_name_attr": "name", | ||||||
|  |                 "okta_profile_to_group_name_regex": "(.*)", | ||||||
|  |                 "include_deprovisioned_users": "False", | ||||||
|  |                 "include_suspended_users": "False", | ||||||
|  |                 "page_size": "2", | ||||||
|  |                 "delay_seconds": "0.00", | ||||||
|  |             }, | ||||||
|  |         }, | ||||||
|  |         "sink": { | ||||||
|  |             "type": "file", | ||||||
|  |             "config": { | ||||||
|  |                 "filename": f"{output_file_path}", | ||||||
|  |             }, | ||||||
|  |         }, | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def run_ingest( | ||||||
|  |     mock_datahub_graph, | ||||||
|  |     mocked_functions_reference, | ||||||
|  |     recipe, | ||||||
|  | ): | ||||||
|  | 
 | ||||||
|  |     with patch( | ||||||
|  |         "datahub.ingestion.source.identity.okta.OktaClient" | ||||||
|  |     ) as MockClient, patch( | ||||||
|  |         "datahub.ingestion.source.state_provider.datahub_ingestion_checkpointing_provider.DataHubGraph", | ||||||
|  |         mock_datahub_graph, | ||||||
|  |     ) as mock_checkpoint: | ||||||
|  | 
 | ||||||
|  |         mock_checkpoint.return_value = mock_datahub_graph | ||||||
|  | 
 | ||||||
|  |         mocked_functions_reference(MockClient=MockClient) | ||||||
|  | 
 | ||||||
|  |         # Run an Okta usage ingestion run. | ||||||
|  |         pipeline = Pipeline.create(recipe) | ||||||
|  |         pipeline.run() | ||||||
|  |         pipeline.raise_from_status() | ||||||
|  | 
 | ||||||
|  |         return pipeline | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| def test_okta_config(): | def test_okta_config(): | ||||||
|     config = OktaConfig.parse_obj( |     config = OktaConfig.parse_obj( | ||||||
| @ -39,200 +104,201 @@ def test_okta_config(): | |||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @freeze_time(FROZEN_TIME) | @freeze_time(FROZEN_TIME) | ||||||
| def test_okta_source_default_configs(pytestconfig, tmp_path): | def test_okta_source_default_configs(pytestconfig, mock_datahub_graph, tmp_path): | ||||||
|     test_resources_dir: pathlib.Path = pytestconfig.rootpath / "tests/integration/okta" |     test_resources_dir: pathlib.Path = pytestconfig.rootpath / "tests/integration/okta" | ||||||
| 
 | 
 | ||||||
|     with patch("datahub.ingestion.source.identity.okta.OktaClient") as MockClient: |     output_file_path = f"{tmp_path}/okta_mces_default_config.json" | ||||||
|         _init_mock_okta_client(test_resources_dir, MockClient) |  | ||||||
| 
 | 
 | ||||||
|         # Run an Okta usage ingestion run. |     run_ingest( | ||||||
|         pipeline = Pipeline.create( |         mock_datahub_graph=mock_datahub_graph, | ||||||
|             { |         mocked_functions_reference=partial( | ||||||
|                 "run_id": "test-okta-usage", |             _init_mock_okta_client, test_resources_dir=test_resources_dir | ||||||
|                 "source": { |         ), | ||||||
|                     "type": "okta", |         recipe=default_recipe(output_file_path), | ||||||
|                     "config": { |  | ||||||
|                         "okta_domain": "mock-domain.okta.com", |  | ||||||
|                         "okta_api_token": "mock-okta-token", |  | ||||||
|                         "ingest_users": "True", |  | ||||||
|                         "ingest_groups": "True", |  | ||||||
|                         "ingest_group_membership": "True", |  | ||||||
|                         "okta_profile_to_username_attr": "login", |  | ||||||
|                         "okta_profile_to_username_regex": "([^@]+)", |  | ||||||
|                         "okta_profile_to_group_name_attr": "name", |  | ||||||
|                         "okta_profile_to_group_name_regex": "(.*)", |  | ||||||
|                         "include_deprovisioned_users": "False", |  | ||||||
|                         "include_suspended_users": "False", |  | ||||||
|                         "page_size": "2", |  | ||||||
|                         "delay_seconds": "0.00", |  | ||||||
|                     }, |  | ||||||
|                 }, |  | ||||||
|                 "sink": { |  | ||||||
|                     "type": "file", |  | ||||||
|                     "config": { |  | ||||||
|                         "filename": f"{tmp_path}/okta_mces_default_config.json", |  | ||||||
|                     }, |  | ||||||
|                 }, |  | ||||||
|             } |  | ||||||
|     ) |     ) | ||||||
|         pipeline.run() |  | ||||||
|         pipeline.raise_from_status() |  | ||||||
| 
 | 
 | ||||||
|     mce_helpers.check_golden_file( |     mce_helpers.check_golden_file( | ||||||
|         pytestconfig, |         pytestconfig, | ||||||
|         output_path=tmp_path / "okta_mces_default_config.json", |         output_path=output_file_path, | ||||||
|         golden_path=test_resources_dir / "okta_mces_golden_default_config.json", |         golden_path=f"{test_resources_dir}/okta_mces_golden_default_config.json", | ||||||
|     ) |     ) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @freeze_time(FROZEN_TIME) | @freeze_time(FROZEN_TIME) | ||||||
| def test_okta_source_ingestion_disabled(pytestconfig, tmp_path): | def test_okta_source_ingestion_disabled(pytestconfig, mock_datahub_graph, tmp_path): | ||||||
|     test_resources_dir: pathlib.Path = pytestconfig.rootpath / "tests/integration/okta" |     test_resources_dir: pathlib.Path = pytestconfig.rootpath / "tests/integration/okta" | ||||||
| 
 | 
 | ||||||
|     with patch("datahub.ingestion.source.identity.okta.OktaClient") as MockClient: |     output_file_path = f"{tmp_path}/okta_mces_ingestion_disabled.json" | ||||||
|         _init_mock_okta_client(test_resources_dir, MockClient) |     new_recipe = default_recipe(output_file_path) | ||||||
|  |     new_recipe["source"]["config"]["ingest_users"] = False | ||||||
|  |     new_recipe["source"]["config"]["ingest_groups"] = False | ||||||
|  |     new_recipe["source"]["config"]["ingest_group_membership"] = False | ||||||
| 
 | 
 | ||||||
|         # Run an Okta usage ingestion run. |     run_ingest( | ||||||
|         pipeline = Pipeline.create( |         mock_datahub_graph=mock_datahub_graph, | ||||||
|             { |         mocked_functions_reference=partial( | ||||||
|                 "run_id": "test-okta-usage", |             _init_mock_okta_client, test_resources_dir=test_resources_dir | ||||||
|                 "source": { |         ), | ||||||
|                     "type": "okta", |         recipe=new_recipe, | ||||||
|                     "config": { |  | ||||||
|                         "okta_domain": "mock-domain.okta.com", |  | ||||||
|                         "okta_api_token": "mock-okta-token", |  | ||||||
|                         "ingest_users": "False", |  | ||||||
|                         "ingest_groups": "False", |  | ||||||
|                         "ingest_group_membership": "False", |  | ||||||
|                         "okta_profile_to_username_attr": "login", |  | ||||||
|                         "okta_profile_to_username_regex": "([^@]+)", |  | ||||||
|                         "okta_profile_to_group_name_attr": "name", |  | ||||||
|                         "okta_profile_to_group_name_regex": "(.*)", |  | ||||||
|                         "include_deprovisioned_users": "False", |  | ||||||
|                         "include_suspended_users": "False", |  | ||||||
|                         "page_size": "2", |  | ||||||
|                         "delay_seconds": "0.00", |  | ||||||
|                     }, |  | ||||||
|                 }, |  | ||||||
|                 "sink": { |  | ||||||
|                     "type": "file", |  | ||||||
|                     "config": { |  | ||||||
|                         "filename": f"{tmp_path}/okta_mces_ingestion_disabled.json", |  | ||||||
|                     }, |  | ||||||
|                 }, |  | ||||||
|             } |  | ||||||
|     ) |     ) | ||||||
|         pipeline.run() |  | ||||||
|         pipeline.raise_from_status() |  | ||||||
| 
 | 
 | ||||||
|     mce_helpers.check_golden_file( |     mce_helpers.check_golden_file( | ||||||
|         pytestconfig, |         pytestconfig, | ||||||
|         output_path=tmp_path / "okta_mces_ingestion_disabled.json", |         output_path=output_file_path, | ||||||
|         golden_path=test_resources_dir / "okta_mces_golden_ingestion_disabled.json", |         golden_path=f"{test_resources_dir}/okta_mces_golden_ingestion_disabled.json", | ||||||
|     ) |     ) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @freeze_time(FROZEN_TIME) | @freeze_time(FROZEN_TIME) | ||||||
| @pytest.mark.asyncio | @pytest.mark.asyncio | ||||||
| def test_okta_source_include_deprovisioned_suspended_users(pytestconfig, tmp_path): | def test_okta_source_include_deprovisioned_suspended_users( | ||||||
|  |     pytestconfig, mock_datahub_graph, tmp_path | ||||||
|  | ): | ||||||
|     test_resources_dir: pathlib.Path = pytestconfig.rootpath / "tests/integration/okta" |     test_resources_dir: pathlib.Path = pytestconfig.rootpath / "tests/integration/okta" | ||||||
| 
 | 
 | ||||||
|     with patch("datahub.ingestion.source.identity.okta.OktaClient") as MockClient: |     output_file_path = ( | ||||||
|         _init_mock_okta_client(test_resources_dir, MockClient) |         f"{tmp_path}/okta_mces_include_deprovisioned_suspended_users.json" | ||||||
| 
 |     ) | ||||||
|         # Run an Okta usage ingestion run. |     new_recipe = default_recipe(output_file_path) | ||||||
|         pipeline = Pipeline.create( |     new_recipe["source"]["config"]["include_deprovisioned_users"] = True | ||||||
|             { |     new_recipe["source"]["config"]["include_suspended_users"] = True | ||||||
|                 "run_id": "test-okta-usage", | 
 | ||||||
|                 "source": { |     run_ingest( | ||||||
|                     "type": "okta", |         mock_datahub_graph=mock_datahub_graph, | ||||||
|                     "config": { |         mocked_functions_reference=partial( | ||||||
|                         "okta_domain": "mock-domain.okta.com", |             _init_mock_okta_client, test_resources_dir=test_resources_dir | ||||||
|                         "okta_api_token": "mock-okta-token", |         ), | ||||||
|                         "ingest_users": "True", |         recipe=new_recipe, | ||||||
|                         "ingest_groups": "True", |  | ||||||
|                         "ingest_group_membership": "True", |  | ||||||
|                         "okta_profile_to_username_attr": "login", |  | ||||||
|                         "okta_profile_to_username_regex": "([^@]+)", |  | ||||||
|                         "okta_profile_to_group_name_attr": "name", |  | ||||||
|                         "okta_profile_to_group_name_regex": "(.*)", |  | ||||||
|                         "include_deprovisioned_users": "True", |  | ||||||
|                         "include_suspended_users": "True", |  | ||||||
|                         "page_size": "2", |  | ||||||
|                         "delay_seconds": "0.00", |  | ||||||
|                     }, |  | ||||||
|                 }, |  | ||||||
|                 "sink": { |  | ||||||
|                     "type": "file", |  | ||||||
|                     "config": { |  | ||||||
|                         "filename": f"{tmp_path}/okta_mces_include_deprovisioned_suspended_users.json", |  | ||||||
|                     }, |  | ||||||
|                 }, |  | ||||||
|             } |  | ||||||
|     ) |     ) | ||||||
|         pipeline.run() |  | ||||||
|         pipeline.raise_from_status() |  | ||||||
| 
 | 
 | ||||||
|     mce_helpers.check_golden_file( |     mce_helpers.check_golden_file( | ||||||
|         pytestconfig, |         pytestconfig, | ||||||
|         output_path=tmp_path / "okta_mces_include_deprovisioned_suspended_users.json", |         output_path=output_file_path, | ||||||
|         golden_path=test_resources_dir |         golden_path=f"{test_resources_dir}/okta_mces_golden_include_deprovisioned_suspended_users.json", | ||||||
|         / "okta_mces_golden_include_deprovisioned_suspended_users.json", |  | ||||||
|     ) |     ) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @freeze_time(FROZEN_TIME) | @freeze_time(FROZEN_TIME) | ||||||
| @pytest.mark.asyncio | @pytest.mark.asyncio | ||||||
| def test_okta_source_custom_user_name_regex(pytestconfig, tmp_path): | def test_okta_source_custom_user_name_regex(pytestconfig, mock_datahub_graph, tmp_path): | ||||||
|     test_resources_dir: pathlib.Path = pytestconfig.rootpath / "tests/integration/okta" |     test_resources_dir: pathlib.Path = pytestconfig.rootpath / "tests/integration/okta" | ||||||
| 
 | 
 | ||||||
|     with patch("datahub.ingestion.source.identity.okta.OktaClient") as MockClient: |     output_file_path = f"{tmp_path}/okta_mces_custom_user_name_regex.json" | ||||||
|         _init_mock_okta_client(test_resources_dir, MockClient) |     new_recipe = default_recipe(output_file_path) | ||||||
|  |     new_recipe["source"]["config"]["okta_profile_to_username_regex"] = "(.*)" | ||||||
|  |     new_recipe["source"]["config"]["okta_profile_to_group_name_regex"] = "(.*)" | ||||||
| 
 | 
 | ||||||
|         # Run an Okta usage ingestion run. |     run_ingest( | ||||||
|         pipeline = Pipeline.create( |         mock_datahub_graph=mock_datahub_graph, | ||||||
|             { |         mocked_functions_reference=partial( | ||||||
|                 "run_id": "test-okta-usage", |             _init_mock_okta_client, test_resources_dir=test_resources_dir | ||||||
|                 "source": { |         ), | ||||||
|                     "type": "okta", |         recipe=new_recipe, | ||||||
|                     "config": { |  | ||||||
|                         "okta_domain": "mock-domain.okta.com", |  | ||||||
|                         "okta_api_token": "mock-okta-token", |  | ||||||
|                         "ingest_users": "True", |  | ||||||
|                         "ingest_groups": "True", |  | ||||||
|                         "ingest_group_membership": "True", |  | ||||||
|                         "okta_profile_to_username_attr": "email", |  | ||||||
|                         "okta_profile_to_username_regex": "(.*)", |  | ||||||
|                         "okta_profile_to_group_name_attr": "name", |  | ||||||
|                         "okta_profile_to_group_name_regex": "(.*)", |  | ||||||
|                         "include_deprovisioned_users": "False", |  | ||||||
|                         "include_suspended_users": "False", |  | ||||||
|                         "page_size": "2", |  | ||||||
|                         "delay_seconds": "0.00", |  | ||||||
|                     }, |  | ||||||
|                 }, |  | ||||||
|                 "sink": { |  | ||||||
|                     "type": "file", |  | ||||||
|                     "config": { |  | ||||||
|                         "filename": f"{tmp_path}/okta_mces_custom_user_name_regex.json", |  | ||||||
|                     }, |  | ||||||
|                 }, |  | ||||||
|             } |  | ||||||
|     ) |     ) | ||||||
|         pipeline.run() |  | ||||||
|         pipeline.raise_from_status() |  | ||||||
| 
 | 
 | ||||||
|     mce_helpers.check_golden_file( |     mce_helpers.check_golden_file( | ||||||
|         pytestconfig, |         pytestconfig, | ||||||
|         output_path=tmp_path / "okta_mces_custom_user_name_regex.json", |         output_path=output_file_path, | ||||||
|         golden_path=test_resources_dir / "okta_mces_golden_custom_user_name_regex.json", |         golden_path=f"{test_resources_dir}/okta_mces_golden_custom_user_name_regex.json", | ||||||
|  |     ) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def get_current_checkpoint_from_pipeline( | ||||||
|  |     pipeline: Pipeline, | ||||||
|  | ) -> Optional[Checkpoint[GenericCheckpointState]]: | ||||||
|  |     azure_ad_source = cast(OktaSource, pipeline.source) | ||||||
|  |     return azure_ad_source.get_current_checkpoint( | ||||||
|  |         azure_ad_source.stale_entity_removal_handler.job_id | ||||||
|  |     ) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | @freeze_time(FROZEN_TIME) | ||||||
|  | def test_okta_stateful_ingestion(pytestconfig, tmp_path, mock_time, mock_datahub_graph): | ||||||
|  |     test_resources_dir: pathlib.Path = pytestconfig.rootpath / "tests/integration/okta" | ||||||
|  | 
 | ||||||
|  |     output_file_path = f"{tmp_path}/temporary_mces.json" | ||||||
|  |     new_recipe = default_recipe(output_file_path) | ||||||
|  | 
 | ||||||
|  |     new_recipe["pipeline_name"] = "okta_execution" | ||||||
|  |     new_recipe["source"]["config"]["stateful_ingestion"] = { | ||||||
|  |         "enabled": True, | ||||||
|  |         "state_provider": { | ||||||
|  |             "type": "datahub", | ||||||
|  |             "config": {"datahub_api": {"server": GMS_SERVER}}, | ||||||
|  |         }, | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     pipeline1 = run_ingest( | ||||||
|  |         mock_datahub_graph=mock_datahub_graph, | ||||||
|  |         mocked_functions_reference=partial( | ||||||
|  |             _init_mock_okta_client, test_resources_dir=test_resources_dir | ||||||
|  |         ), | ||||||
|  |         recipe=new_recipe, | ||||||
|  |     ) | ||||||
|  | 
 | ||||||
|  |     checkpoint1 = get_current_checkpoint_from_pipeline(pipeline1) | ||||||
|  |     assert checkpoint1 | ||||||
|  |     assert checkpoint1.state | ||||||
|  | 
 | ||||||
|  |     # Create new event loop as last one is closed because of previous ingestion run | ||||||
|  |     event_loop = asyncio.new_event_loop() | ||||||
|  |     asyncio.set_event_loop(event_loop) | ||||||
|  | 
 | ||||||
|  |     pipeline2 = run_ingest( | ||||||
|  |         mock_datahub_graph=mock_datahub_graph, | ||||||
|  |         mocked_functions_reference=partial( | ||||||
|  |             overwrite_group_in_mocked_data, test_resources_dir=test_resources_dir | ||||||
|  |         ), | ||||||
|  |         recipe=new_recipe, | ||||||
|  |     ) | ||||||
|  |     checkpoint2 = get_current_checkpoint_from_pipeline(pipeline2) | ||||||
|  |     assert checkpoint2 | ||||||
|  |     assert checkpoint2.state | ||||||
|  |     # | ||||||
|  |     # Validate that all providers have committed successfully. | ||||||
|  |     validate_all_providers_have_committed_successfully( | ||||||
|  |         pipeline=pipeline1, expected_providers=1 | ||||||
|  |     ) | ||||||
|  |     validate_all_providers_have_committed_successfully( | ||||||
|  |         pipeline=pipeline2, expected_providers=1 | ||||||
|  |     ) | ||||||
|  | 
 | ||||||
|  |     # Perform all assertions on the states. The deleted group should not be | ||||||
|  |     # part of the second state | ||||||
|  |     state1 = checkpoint1.state | ||||||
|  |     state2 = checkpoint2.state | ||||||
|  | 
 | ||||||
|  |     difference_group_urns = list( | ||||||
|  |         state1.get_urns_not_in(type="corpGroup", other_checkpoint_state=state2) | ||||||
|  |     ) | ||||||
|  | 
 | ||||||
|  |     assert len(difference_group_urns) == 1 | ||||||
|  |     assert difference_group_urns == ["urn:li:corpGroup:Engineering"] | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def overwrite_group_in_mocked_data(test_resources_dir, MockClient): | ||||||
|  |     _init_mock_okta_client( | ||||||
|  |         test_resources_dir, | ||||||
|  |         MockClient, | ||||||
|  |         mock_groups_json=test_resources_dir / "okta_deleted_groups.json", | ||||||
|     ) |     ) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| # Initializes a Mock Okta Client to return users from okta_users.json and groups from okta_groups.json. | # Initializes a Mock Okta Client to return users from okta_users.json and groups from okta_groups.json. | ||||||
| def _init_mock_okta_client(test_resources_dir, MockClient): | def _init_mock_okta_client( | ||||||
|     okta_users_json_file = test_resources_dir / "okta_users.json" |     test_resources_dir, MockClient, mock_users_json=None, mock_groups_json=None | ||||||
|     okta_groups_json_file = test_resources_dir / "okta_groups.json" | ): | ||||||
|  | 
 | ||||||
|  |     okta_users_json_file = ( | ||||||
|  |         test_resources_dir / "okta_users.json" | ||||||
|  |         if mock_users_json is None | ||||||
|  |         else mock_users_json | ||||||
|  |     ) | ||||||
|  |     okta_groups_json_file = ( | ||||||
|  |         test_resources_dir / "okta_groups.json" | ||||||
|  |         if mock_groups_json is None | ||||||
|  |         else mock_groups_json | ||||||
|  |     ) | ||||||
| 
 | 
 | ||||||
|     # Add mock Okta API responses. |     # Add mock Okta API responses. | ||||||
|     with okta_users_json_file.open() as okta_users_json: |     with okta_users_json_file.open() as okta_users_json: | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 mohdsiddique
						mohdsiddique