feat(ingest): start simplifying stateful ingestion state (#6740)

This commit is contained in:
Harshal Sheth 2022-12-13 04:05:57 -05:00 committed by GitHub
parent 7d63399d00
commit cf3db168ac
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 151 additions and 379 deletions

View File

@ -713,11 +713,9 @@ class DBTSourceBase(StatefulIngestionSourceBase):
if last_checkpoint is not None and is_conversion_required:
# Map the BaseSQLAlchemyCheckpointState to DbtCheckpointState
dbt_checkpoint_state: DbtCheckpointState = DbtCheckpointState()
dbt_checkpoint_state.encoded_node_urns = (
dbt_checkpoint_state.urns = (
cast(BaseSQLAlchemyCheckpointState, last_checkpoint.state)
).encoded_table_urns
# Old dbt source was not supporting the assertion
dbt_checkpoint_state.encoded_assertion_urns = []
).urns
last_checkpoint.state = dbt_checkpoint_state
return last_checkpoint

View File

@ -60,7 +60,7 @@ from datahub.ingestion.source.looker.looker_lib_wrapper import (
LookerAPI,
LookerAPIConfig,
)
from datahub.ingestion.source.state.looker_state import LookerCheckpointState
from datahub.ingestion.source.state.entity_removal_state import GenericCheckpointState
from datahub.ingestion.source.state.stale_entity_removal_handler import (
StaleEntityRemovalHandler,
StatefulStaleMetadataRemovalConfig,
@ -234,7 +234,7 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
self.stale_entity_removal_handler = StaleEntityRemovalHandler(
source=self,
config=self.source_config,
state_type_class=LookerCheckpointState,
state_type_class=GenericCheckpointState,
pipeline_name=self.ctx.pipeline_name,
run_id=self.ctx.run_id,
)

View File

@ -47,7 +47,7 @@ from datahub.ingestion.source.looker.looker_lib_wrapper import (
LookerAPIConfig,
TransportOptionsConfig,
)
from datahub.ingestion.source.state.lookml_state import LookMLCheckpointState
from datahub.ingestion.source.state.entity_removal_state import GenericCheckpointState
from datahub.ingestion.source.state.stale_entity_removal_handler import (
StaleEntityRemovalHandler,
StaleEntityRemovalSourceReport,
@ -1089,7 +1089,7 @@ class LookMLSource(StatefulIngestionSourceBase):
self.stale_entity_removal_handler = StaleEntityRemovalHandler(
source=self,
config=self.source_config,
state_type_class=LookMLCheckpointState,
state_type_class=GenericCheckpointState,
pipeline_name=self.ctx.pipeline_name,
run_id=self.ctx.run_id,
)

View File

@ -1,5 +1,6 @@
import base64
import bz2
import contextlib
import functools
import json
import logging
@ -128,7 +129,9 @@ class Checkpoint(Generic[StateType]):
)
elif checkpoint_aspect.state.serde == "base85":
state_obj = Checkpoint._from_base85_bytes(
checkpoint_aspect, functools.partial(bz2.decompress)
checkpoint_aspect,
functools.partial(bz2.decompress),
state_class,
)
elif checkpoint_aspect.state.serde == "base85-bz2-json":
state_obj = Checkpoint._from_base85_json_bytes(
@ -177,11 +180,18 @@ class Checkpoint(Generic[StateType]):
def _from_base85_bytes(
checkpoint_aspect: DatahubIngestionCheckpointClass,
decompressor: Callable[[bytes], bytes],
state_class: Type[StateType],
) -> StateType:
state: StateType = pickle.loads(
decompressor(base64.b85decode(checkpoint_aspect.state.payload)) # type: ignore
)
with contextlib.suppress(Exception):
# When loading from pickle, the pydantic validators don't run.
# By re-serializing and re-parsing, we ensure that the state is valid.
# However, we also suppress any exceptions to make sure this doesn't blow up.
state = state_class.parse_obj(state.dict())
# Because the base85 method is deprecated in favor of base85-bz2-json,
# we will automatically switch the serde.
state.serde = "base85-bz2-json"

View File

@ -1,102 +1,21 @@
import logging
from typing import Callable, Dict, Iterable, List
import pydantic
from datahub.emitter.mce_builder import make_assertion_urn
from datahub.ingestion.source.state.stale_entity_removal_handler import (
StaleEntityCheckpointStateBase,
from datahub.ingestion.source.state.entity_removal_state import (
GenericCheckpointState,
pydantic_state_migrator,
)
from datahub.utilities.checkpoint_state_util import CheckpointStateUtil
from datahub.utilities.urns.urn import Urn
logger = logging.getLogger(__name__)
class DbtCheckpointState(StaleEntityCheckpointStateBase["DbtCheckpointState"]):
class DbtCheckpointState(GenericCheckpointState):
"""
Class for representing the checkpoint state for DBT sources.
Stores all nodes and assertions being ingested and is used to remove any stale entities.
"""
encoded_node_urns: List[str] = pydantic.Field(default_factory=list)
encoded_assertion_urns: List[str] = pydantic.Field(default_factory=list)
@classmethod
def get_supported_types(cls) -> List[str]:
return ["assertion", "dataset"]
@staticmethod
def _get_assertion_lightweight_repr(assertion_urn: str) -> str:
"""Reduces the amount of text in the URNs for smaller state footprint."""
urn = Urn.create_from_string(assertion_urn)
key = urn.get_entity_id_as_string()
assert key is not None
return key
def _add_assertion_urn(self, assertion_urn: str) -> None:
self.encoded_assertion_urns.append(
self._get_assertion_lightweight_repr(assertion_urn)
)
def _get_assertion_urns_not_in(
self, checkpoint: "DbtCheckpointState"
) -> Iterable[str]:
"""
Dbt assertion are mapped to DataHub assertion concept
"""
difference = CheckpointStateUtil.get_encoded_urns_not_in(
self.encoded_assertion_urns, checkpoint.encoded_assertion_urns
)
for key in difference:
yield make_assertion_urn(key)
def _get_node_urns_not_in(self, checkpoint: "DbtCheckpointState") -> Iterable[str]:
"""
Dbt node are mapped to DataHub dataset concept
"""
yield from CheckpointStateUtil.get_dataset_urns_not_in(
self.encoded_node_urns, checkpoint.encoded_node_urns
)
def _add_node_urn(self, node_urn: str) -> None:
self.encoded_node_urns.append(
CheckpointStateUtil.get_dataset_lightweight_repr(node_urn)
)
def add_checkpoint_urn(self, type: str, urn: str) -> None:
supported_entities_add_handlers: Dict[str, Callable[[str], None]] = {
"dataset": self._add_node_urn,
"assertion": self._add_assertion_urn,
_migration = pydantic_state_migrator(
{
"encoded_node_urns": "dataset",
"encoded_assertion_urns": "assertion",
}
if type not in supported_entities_add_handlers:
logger.error(f"Can not save Unknown entity {type} to checkpoint.")
supported_entities_add_handlers[type](urn)
def get_urns_not_in(
self, type: str, other_checkpoint_state: "DbtCheckpointState"
) -> Iterable[str]:
assert type in self.get_supported_types()
if type == "dataset":
yield from self._get_node_urns_not_in(other_checkpoint_state)
elif type == "assertion":
yield from self._get_assertion_urns_not_in(other_checkpoint_state)
def get_percent_entities_changed(
self, old_checkpoint_state: "DbtCheckpointState"
) -> float:
return StaleEntityCheckpointStateBase.compute_percent_entities_changed(
[
(self.encoded_node_urns, old_checkpoint_state.encoded_node_urns),
(
self.encoded_assertion_urns,
old_checkpoint_state.encoded_assertion_urns,
),
]
)
def prepare_for_commit(self) -> None:
self.encoded_node_urns = list(set(self.encoded_node_urns))
self.encoded_assertion_urns = list(set(self.encoded_assertion_urns))
self.urns = list(set(self.urns))

View File

@ -0,0 +1,76 @@
from typing import Dict, Iterable, List, Type
import pydantic
from datahub.emitter.mce_builder import make_assertion_urn, make_container_urn
from datahub.ingestion.source.state.stale_entity_removal_handler import (
StaleEntityCheckpointStateBase,
)
from datahub.utilities.checkpoint_state_util import CheckpointStateUtil
from datahub.utilities.urns.urn import guess_entity_type
class GenericCheckpointState(StaleEntityCheckpointStateBase["GenericCheckpointState"]):
urns: List[str] = pydantic.Field(default_factory=list)
@classmethod
def get_supported_types(cls) -> List[str]:
return ["*"]
def add_checkpoint_urn(self, type: str, urn: str) -> None:
# TODO: dedup
self.urns.append(urn)
def get_urns_not_in(
self, type: str, other_checkpoint_state: "GenericCheckpointState"
) -> Iterable[str]:
diff = set(self.urns) - set(other_checkpoint_state.urns)
# To maintain backwards compatibility, we provide this filtering mechanism.
if type == "*":
yield from diff
else:
yield from (urn for urn in diff if guess_entity_type(urn) == type)
def get_percent_entities_changed(
self, old_checkpoint_state: "GenericCheckpointState"
) -> float:
return StaleEntityCheckpointStateBase.compute_percent_entities_changed(
[(self.urns, old_checkpoint_state.urns)]
)
def pydantic_state_migrator(mapping: Dict[str, str]) -> classmethod:
# mapping would be something like:
# {
# 'encoded_view_urns': 'dataset',
# 'encoded_container_urns': 'container',
# }
SUPPORTED_TYPES = [
"dataset",
"container",
"assertion",
]
assert set(mapping.values()) <= set(SUPPORTED_TYPES)
def _validate_field_rename(cls: Type, values: dict) -> dict:
values.setdefault("urns", [])
for old_field, mapped_type in mapping.items():
if old_field not in values:
continue
value = values.pop(old_field)
if mapped_type == "dataset":
values["urns"] += CheckpointStateUtil.get_dataset_urns_not_in(value, [])
elif mapped_type == "container":
values["urns"] += [make_container_urn(guid) for guid in value]
elif mapped_type == "assertion":
values["urns"] += [make_assertion_urn(encoded) for encoded in value]
else:
raise ValueError(f"Unsupported type {mapped_type}")
return values
return pydantic.root_validator(pre=True, allow_reuse=True)(_validate_field_rename)

View File

@ -1,46 +0,0 @@
import logging
from typing import Iterable, List
import pydantic
from datahub.ingestion.source.state.stale_entity_removal_handler import (
StaleEntityCheckpointStateBase,
)
from datahub.utilities.urns.urn import guess_entity_type
logger = logging.getLogger(__name__)
class LookerCheckpointState(StaleEntityCheckpointStateBase["LookerCheckpointState"]):
"""
Class for representing the checkpoint state for Looker sources.
Stores all datasets, charts and dashboards being ingested and is
used to remove any stale entities.
"""
urns: List[str] = pydantic.Field(default_factory=list)
@classmethod
def get_supported_types(cls) -> List[str]:
return ["*"]
def add_checkpoint_urn(self, type: str, urn: str) -> None:
self.urns.append(urn)
def get_urns_not_in(
self, type: str, other_checkpoint_state: "LookerCheckpointState"
) -> Iterable[str]:
diff = set(self.urns) - set(other_checkpoint_state.urns)
# To maintain backwards compatibility, we provide this filtering mechanism.
if type == "*":
yield from diff
else:
yield from (urn for urn in diff if guess_entity_type(urn) == type)
def get_percent_entities_changed(
self, old_checkpoint_state: "LookerCheckpointState"
) -> float:
return StaleEntityCheckpointStateBase.compute_percent_entities_changed(
[(self.urns, old_checkpoint_state.urns)]
)

View File

@ -1,46 +0,0 @@
import logging
from typing import Iterable, List
import pydantic
from datahub.ingestion.source.state.stale_entity_removal_handler import (
StaleEntityCheckpointStateBase,
)
from datahub.utilities.urns.urn import guess_entity_type
logger = logging.getLogger(__name__)
class LookMLCheckpointState(StaleEntityCheckpointStateBase["LookMLCheckpointState"]):
"""
Class for representing the checkpoint state for Looker sources.
Stores all datasets, charts and dashboards being ingested and is
used to remove any stale entities.
"""
urns: List[str] = pydantic.Field(default_factory=list)
@classmethod
def get_supported_types(cls) -> List[str]:
return ["*"]
def add_checkpoint_urn(self, type: str, urn: str) -> None:
self.urns.append(urn)
def get_urns_not_in(
self, type: str, other_checkpoint_state: "LookMLCheckpointState"
) -> Iterable[str]:
diff = set(self.urns) - set(other_checkpoint_state.urns)
# To maintain backwards compatibility, we provide this filtering mechanism.
if type == "*":
yield from diff
else:
yield from (urn for urn in diff if guess_entity_type(urn) == type)
def get_percent_entities_changed(
self, old_checkpoint_state: "LookMLCheckpointState"
) -> float:
return StaleEntityCheckpointStateBase.compute_percent_entities_changed(
[(self.urns, old_checkpoint_state.urns)]
)

View File

@ -1,142 +1,21 @@
from typing import Iterable, List
import pydantic
from datahub.emitter.mce_builder import (
assertion_urn_to_key,
container_urn_to_key,
make_assertion_urn,
make_container_urn,
from datahub.ingestion.source.state.entity_removal_state import (
GenericCheckpointState,
pydantic_state_migrator,
)
from datahub.ingestion.source.state.stale_entity_removal_handler import (
StaleEntityCheckpointStateBase,
)
from datahub.utilities.checkpoint_state_util import CheckpointStateUtil
class BaseSQLAlchemyCheckpointState(
StaleEntityCheckpointStateBase["BaseSQLAlchemyCheckpointState"]
):
class BaseSQLAlchemyCheckpointState(GenericCheckpointState):
"""
Base class for representing the checkpoint state for all SQLAlchemy based sources.
Stores all tables and views being ingested and is used to remove any stale entities.
Subclasses can define additional state as appropriate.
"""
encoded_table_urns: List[str] = pydantic.Field(default_factory=list)
encoded_view_urns: List[str] = pydantic.Field(default_factory=list)
encoded_container_urns: List[str] = pydantic.Field(default_factory=list)
encoded_assertion_urns: List[str] = pydantic.Field(default_factory=list)
@classmethod
def get_supported_types(cls) -> List[str]:
return ["assertion", "container", "table", "view"]
@staticmethod
def _get_lightweight_repr(dataset_urn: str) -> str:
"""Reduces the amount of text in the URNs for smaller state footprint."""
return CheckpointStateUtil.get_dataset_lightweight_repr(dataset_urn)
@staticmethod
def _get_container_lightweight_repr(container_urn: str) -> str:
"""Reduces the amount of text in the URNs for smaller state footprint."""
key = container_urn_to_key(container_urn)
assert key is not None
return f"{key.guid}"
@staticmethod
def _get_container_urns_not_in(
encoded_urns_1: List[str], encoded_urns_2: List[str]
) -> Iterable[str]:
difference = CheckpointStateUtil.get_encoded_urns_not_in(
encoded_urns_1, encoded_urns_2
)
for guid in difference:
yield make_container_urn(guid)
def _get_table_urns_not_in(
self, checkpoint: "BaseSQLAlchemyCheckpointState"
) -> Iterable[str]:
"""Tables are mapped to DataHub dataset concept."""
yield from CheckpointStateUtil.get_dataset_urns_not_in(
self.encoded_table_urns, checkpoint.encoded_table_urns
)
def _get_view_urns_not_in(
self, checkpoint: "BaseSQLAlchemyCheckpointState"
) -> Iterable[str]:
"""Views are mapped to DataHub dataset concept."""
yield from CheckpointStateUtil.get_dataset_urns_not_in(
self.encoded_view_urns, checkpoint.encoded_view_urns
)
def _get_assertion_urns_not_in(
self, checkpoint: "BaseSQLAlchemyCheckpointState"
) -> Iterable[str]:
"""Tables are mapped to DataHub dataset concept."""
diff = CheckpointStateUtil.get_encoded_urns_not_in(
self.encoded_assertion_urns, checkpoint.encoded_assertion_urns
)
for assertion_id in diff:
yield make_assertion_urn(assertion_id)
def _add_table_urn(self, table_urn: str) -> None:
self.encoded_table_urns.append(self._get_lightweight_repr(table_urn))
def _add_assertion_urn(self, assertion_urn: str) -> None:
key = assertion_urn_to_key(assertion_urn)
assert key is not None
self.encoded_assertion_urns.append(key.assertionId)
def _add_view_urn(self, view_urn: str) -> None:
self.encoded_view_urns.append(self._get_lightweight_repr(view_urn))
def _add_container_guid(self, container_urn: str) -> None:
self.encoded_container_urns.append(
self._get_container_lightweight_repr(container_urn)
)
def add_checkpoint_urn(self, type: str, urn: str) -> None:
assert type in self.get_supported_types()
if type == "assertion":
self._add_assertion_urn(urn)
elif type == "container":
self._add_container_guid(urn)
elif type == "table":
self._add_table_urn(urn)
elif type == "view":
self._add_view_urn(urn)
def get_urns_not_in(
self, type: str, other_checkpoint_state: "BaseSQLAlchemyCheckpointState"
) -> Iterable[str]:
assert type in self.get_supported_types()
if type == "assertion":
yield from self._get_assertion_urns_not_in(other_checkpoint_state)
if type == "container":
yield from self._get_container_urns_not_in(
self.encoded_container_urns,
other_checkpoint_state.encoded_container_urns,
)
elif type == "table":
yield from self._get_table_urns_not_in(other_checkpoint_state)
elif type == "view":
yield from self._get_view_urns_not_in(other_checkpoint_state)
def get_percent_entities_changed(
self, old_checkpoint_state: "BaseSQLAlchemyCheckpointState"
) -> float:
return StaleEntityCheckpointStateBase.compute_percent_entities_changed(
[
(
self.encoded_assertion_urns,
old_checkpoint_state.encoded_assertion_urns,
),
(
self.encoded_container_urns,
old_checkpoint_state.encoded_container_urns,
),
(self.encoded_table_urns, old_checkpoint_state.encoded_table_urns),
(self.encoded_view_urns, old_checkpoint_state.encoded_view_urns),
]
_migration = pydantic_state_migrator(
{
"encoded_table_urns": "dataset",
"encoded_view_urns": "dataset",
"encoded_container_urns": "container",
"encoded_assertion_urns": "assertion",
}
)

View File

@ -1,46 +1,9 @@
import logging
from typing import Iterable, List
import pydantic
from datahub.ingestion.source.state.stale_entity_removal_handler import (
StaleEntityCheckpointStateBase,
)
from datahub.utilities.urns.urn import guess_entity_type
logger = logging.getLogger(__name__)
from datahub.ingestion.source.state.entity_removal_state import GenericCheckpointState
class TableauCheckpointState(StaleEntityCheckpointStateBase["TableauCheckpointState"]):
class TableauCheckpointState(GenericCheckpointState):
"""
Class for representing the checkpoint state for Tableau sources.
Stores all datasets, charts and dashboards being ingested and is
used to remove any stale entities.
"""
urns: List[str] = pydantic.Field(default_factory=list)
@classmethod
def get_supported_types(cls) -> List[str]:
return ["*"]
def add_checkpoint_urn(self, type: str, urn: str) -> None:
self.urns.append(urn)
def get_urns_not_in(
self, type: str, other_checkpoint_state: "TableauCheckpointState"
) -> Iterable[str]:
diff = set(self.urns) - set(other_checkpoint_state.urns)
# To maintain backwards compatibility, we provide this filtering mechanism.
if type == "*":
yield from diff
else:
yield from (urn for urn in diff if guess_entity_type(urn) == type)
def get_percent_entities_changed(
self, old_checkpoint_state: "TableauCheckpointState"
) -> float:
return StaleEntityCheckpointStateBase.compute_percent_entities_changed(
[(self.urns, old_checkpoint_state.urns)]
)

View File

@ -360,7 +360,7 @@ def test_dbt_stateful(pytestconfig, tmp_path, mock_time, mock_datahub_graph):
state1 = cast(DbtCheckpointState, checkpoint1.state)
state2 = cast(DbtCheckpointState, checkpoint2.state)
difference_urns = list(
state1.get_urns_not_in(type="dataset", other_checkpoint_state=state2)
state1.get_urns_not_in(type="*", other_checkpoint_state=state2)
)
assert len(difference_urns) == 2

View File

@ -29,7 +29,7 @@ from datahub.ingestion.source.looker.looker_query_model import (
)
from datahub.ingestion.source.looker.looker_source import LookerDashboardSource
from datahub.ingestion.source.state.checkpoint import Checkpoint
from datahub.ingestion.source.state.looker_state import LookerCheckpointState
from datahub.ingestion.source.state.entity_removal_state import GenericCheckpointState
from tests.test_helpers import mce_helpers
from tests.test_helpers.state_helpers import (
validate_all_providers_have_committed_successfully,
@ -689,8 +689,8 @@ def test_looker_ingest_stateful(pytestconfig, tmp_path, mock_time, mock_datahub_
# Perform all assertions on the states. The deleted table should not be
# part of the second state
state1 = cast(LookerCheckpointState, checkpoint1.state)
state2 = cast(LookerCheckpointState, checkpoint2.state)
state1 = cast(GenericCheckpointState, checkpoint1.state)
state2 = cast(GenericCheckpointState, checkpoint2.state)
difference_dataset_urns = list(
state1.get_urns_not_in(type="dataset", other_checkpoint_state=state2)

View File

@ -10,7 +10,7 @@ from datahub.configuration.common import PipelineExecutionError
from datahub.ingestion.run.pipeline import Pipeline
from datahub.ingestion.source.looker.lookml_source import LookMLSource
from datahub.ingestion.source.state.checkpoint import Checkpoint
from datahub.ingestion.source.state.lookml_state import LookMLCheckpointState
from datahub.ingestion.source.state.entity_removal_state import GenericCheckpointState
from datahub.metadata.schema_classes import (
DatasetSnapshotClass,
MetadataChangeEventClass,
@ -624,8 +624,8 @@ def test_lookml_ingest_stateful(pytestconfig, tmp_path, mock_time, mock_datahub_
# Perform all assertions on the states. The deleted table should not be
# part of the second state
state1 = cast(LookMLCheckpointState, checkpoint1.state)
state2 = cast(LookMLCheckpointState, checkpoint2.state)
state1 = cast(GenericCheckpointState, checkpoint1.state)
state2 = cast(GenericCheckpointState, checkpoint2.state)
difference_dataset_urns = list(
state1.get_urns_not_in(type="dataset", other_checkpoint_state=state2)

View File

@ -16,15 +16,15 @@ def test_sql_common_state() -> None:
state2 = BaseSQLAlchemyCheckpointState()
table_urns_diff = list(
state1.get_urns_not_in(type="table", other_checkpoint_state=state2)
dataset_urns_diff = list(
state1.get_urns_not_in(type="dataset", other_checkpoint_state=state2)
)
assert len(table_urns_diff) == 1 and table_urns_diff[0] == test_table_urn
view_urns_diff = list(
state1.get_urns_not_in(type="view", other_checkpoint_state=state2)
assert len(dataset_urns_diff) == 2 and sorted(dataset_urns_diff) == sorted(
[
test_table_urn,
test_view_urn,
]
)
assert len(view_urns_diff) == 1 and view_urns_diff[0] == test_view_urn
container_urns_diff = list(
state1.get_urns_not_in(type="container", other_checkpoint_state=state2)
@ -32,3 +32,22 @@ def test_sql_common_state() -> None:
assert (
len(container_urns_diff) == 1 and container_urns_diff[0] == test_container_urn
)
def test_backward_compat() -> None:
state = BaseSQLAlchemyCheckpointState.parse_obj(
dict(
encoded_table_urns=["mysql||db1.t1||PROD"],
encoded_view_urns=["mysql||db1.v1||PROD"],
encoded_container_urns=["1154d1da73a95376c9f33f47694cf1de"],
encoded_assertion_urns=["815963e1332b46a203504ba46ebfab24"],
)
)
assert state == BaseSQLAlchemyCheckpointState(
urns=[
"urn:li:dataset:(urn:li:dataPlatform:mysql,db1.t1,PROD)",
"urn:li:dataset:(urn:li:dataPlatform:mysql,db1.v1,PROD)",
"urn:li:container:1154d1da73a95376c9f33f47694cf1de",
"urn:li:assertion:815963e1332b46a203504ba46ebfab24",
]
)

View File

@ -327,7 +327,7 @@ def test_glue_stateful(pytestconfig, tmp_path, mock_time, mock_datahub_graph):
state1 = cast(BaseSQLAlchemyCheckpointState, checkpoint1.state)
state2 = cast(BaseSQLAlchemyCheckpointState, checkpoint2.state)
difference_urns = list(
state1.get_urns_not_in(type="table", other_checkpoint_state=state2)
state1.get_urns_not_in(type="*", other_checkpoint_state=state2)
)
assert len(difference_urns) == 1