mirror of
https://github.com/datahub-project/datahub.git
synced 2025-07-21 00:21:06 +00:00
54 lines
2.4 KiB
Python
54 lines
2.4 KiB
Python
import pytest
|
|
|
|
from datahub.ingestion.source.state.entity_removal_state import GenericCheckpointState
|
|
|
|
|
|
@pytest.fixture
|
|
def other_checkpoint_state():
|
|
state = GenericCheckpointState()
|
|
state.add_checkpoint_urn("corpuser", "urn:li:corpuser:user1")
|
|
state.add_checkpoint_urn("corpuser", "urn:li:corpuser:user2")
|
|
state.add_checkpoint_urn("corpuser", "urn:li:corpuser:user3")
|
|
state.add_checkpoint_urn("corpuser", "urn:li:corpuser:user5")
|
|
state.add_checkpoint_urn("corpGroup", "urn:li:corpGroup:group1")
|
|
state.add_checkpoint_urn("corpGroup", "urn:li:corpGroup:group2")
|
|
state.add_checkpoint_urn("corpGroup", "urn:li:corpGroup:group3")
|
|
state.add_checkpoint_urn("corpGroup", "urn:li:corpGroup:group5")
|
|
return state
|
|
|
|
|
|
def test_add_checkpoint_urn():
|
|
state = GenericCheckpointState()
|
|
assert len(state.urns) == 0
|
|
state.add_checkpoint_urn("corpuser", "urn:li:corpuser:user1")
|
|
assert len(state.urns) == 1
|
|
state.add_checkpoint_urn("corpGroup", "urn:li:corpGroup:group2")
|
|
assert len(state.urns) != 1
|
|
|
|
|
|
def test_get_urns_not_in(other_checkpoint_state):
|
|
oldstate = GenericCheckpointState()
|
|
oldstate.add_checkpoint_urn("corpuser", "urn:li:corpuser:user1")
|
|
oldstate.add_checkpoint_urn("corpuser", "urn:li:corpuser:user2")
|
|
oldstate.add_checkpoint_urn("corpuser", "urn:li:corpuser:user4")
|
|
oldstate.add_checkpoint_urn("corpGroup", "urn:li:corpGroup:group1")
|
|
oldstate.add_checkpoint_urn("corpGroup", "urn:li:corpGroup:group2")
|
|
oldstate.add_checkpoint_urn("corpGroup", "urn:li:corpGroup:group4")
|
|
iterable = oldstate.get_urns_not_in("corpuser", other_checkpoint_state)
|
|
# urn:li:corpuser:user4 has been identified as a user to be deleted
|
|
for item in iterable:
|
|
assert item == "urn:li:corpuser:user4"
|
|
iterable = oldstate.get_urns_not_in("corpGroup", other_checkpoint_state)
|
|
for item in iterable:
|
|
assert item == "urn:li:corpGroup:group4"
|
|
|
|
|
|
def test_get_percent_entities_changed(other_checkpoint_state):
|
|
oldstate = GenericCheckpointState()
|
|
oldstate.add_checkpoint_urn("corpuser", "urn:li:corpuser:user1")
|
|
oldstate.add_checkpoint_urn("corpuser", "urn:li:corpuser:user2")
|
|
oldstate.add_checkpoint_urn("corpuser", "urn:li:corpuser:user4")
|
|
percent = oldstate.get_percent_entities_changed(other_checkpoint_state)
|
|
# two states have 75% of differences (total 8 elements, 6 elements are different (75%) and 2 (25%) are the same)
|
|
assert percent == 75.0
|