mirror of
https://github.com/datahub-project/datahub.git
synced 2025-11-03 04:10:43 +00:00
feat(ingest): lookml - adding support for only emitting reachable views from explores (#5333)
This commit is contained in:
parent
afc9842e32
commit
e93e4691fb
@ -162,8 +162,8 @@ class LookerDashboardSourceConfig(LookerAPIConfig, LookerCommonConfig):
|
|||||||
@validator("external_base_url", pre=True, always=True)
|
@validator("external_base_url", pre=True, always=True)
|
||||||
def external_url_defaults_to_api_config_base_url(
|
def external_url_defaults_to_api_config_base_url(
|
||||||
cls, v: Optional[str], *, values: Dict[str, Any], **kwargs: Dict[str, Any]
|
cls, v: Optional[str], *, values: Dict[str, Any], **kwargs: Dict[str, Any]
|
||||||
) -> str:
|
) -> Optional[str]:
|
||||||
return v or values["base_url"]
|
return v or values.get("base_url")
|
||||||
|
|
||||||
@validator("platform_instance")
|
@validator("platform_instance")
|
||||||
def platform_instance_not_supported(cls, v: str) -> str:
|
def platform_instance_not_supported(cls, v: str) -> str:
|
||||||
|
|||||||
@ -494,20 +494,23 @@ class LookerExplore:
|
|||||||
return field_match.findall(sql_fragment)
|
return field_match.findall(sql_fragment)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def __from_dict(cls, model_name: str, dict: Dict) -> "LookerExplore":
|
def from_dict(cls, model_name: str, dict: Dict) -> "LookerExplore":
|
||||||
|
view_names = set()
|
||||||
|
joins = None
|
||||||
|
# always add the explore's name or the name from the from clause as the view on which this explore is built
|
||||||
|
view_names.add(dict.get("from", dict.get("name")))
|
||||||
|
|
||||||
if dict.get("joins", {}) != {}:
|
if dict.get("joins", {}) != {}:
|
||||||
|
# additionally for join-based explores, pull in the linked views
|
||||||
assert "joins" in dict
|
assert "joins" in dict
|
||||||
view_names = set()
|
|
||||||
for join in dict["joins"]:
|
for join in dict["joins"]:
|
||||||
|
join_from = join.get("from")
|
||||||
|
view_names.add(join_from or join["name"])
|
||||||
sql_on = join.get("sql_on", None)
|
sql_on = join.get("sql_on", None)
|
||||||
if sql_on is not None:
|
if sql_on is not None:
|
||||||
fields = cls._get_fields_from_sql_equality(sql_on)
|
fields = cls._get_fields_from_sql_equality(sql_on)
|
||||||
joins = fields
|
joins = fields
|
||||||
for f in fields:
|
|
||||||
view_names.add(LookerUtil._extract_view_from_field(f))
|
|
||||||
else:
|
|
||||||
# non-join explore, get view_name from `from` field if possible, default to explore name
|
|
||||||
view_names = set(dict.get("from", dict.get("name")))
|
|
||||||
return LookerExplore(
|
return LookerExplore(
|
||||||
model_name=model_name,
|
model_name=model_name,
|
||||||
name=dict["name"],
|
name=dict["name"],
|
||||||
|
|||||||
@ -27,6 +27,7 @@ from datahub.ingestion.api.decorators import (
|
|||||||
)
|
)
|
||||||
from datahub.ingestion.source.looker_common import (
|
from datahub.ingestion.source.looker_common import (
|
||||||
LookerCommonConfig,
|
LookerCommonConfig,
|
||||||
|
LookerExplore,
|
||||||
LookerUtil,
|
LookerUtil,
|
||||||
LookerViewId,
|
LookerViewId,
|
||||||
ViewField,
|
ViewField,
|
||||||
@ -177,6 +178,10 @@ class LookMLSourceConfig(LookerCommonConfig):
|
|||||||
512000, # 512KB should be plenty
|
512000, # 512KB should be plenty
|
||||||
description="When extracting the view definition from a lookml file, the maximum number of characters to extract.",
|
description="When extracting the view definition from a lookml file, the maximum number of characters to extract.",
|
||||||
)
|
)
|
||||||
|
emit_reachable_views_only: bool = Field(
|
||||||
|
False,
|
||||||
|
description="When enabled, only views that are reachable from explores defined in the model files are emitted",
|
||||||
|
)
|
||||||
|
|
||||||
@validator("platform_instance")
|
@validator("platform_instance")
|
||||||
def platform_instance_not_supported(cls, v: str) -> str:
|
def platform_instance_not_supported(cls, v: str) -> str:
|
||||||
@ -341,7 +346,9 @@ class LookerModel:
|
|||||||
or included_file.endswith(".dashboard.lookml")
|
or included_file.endswith(".dashboard.lookml")
|
||||||
or included_file.endswith(".dashboard.lkml")
|
or included_file.endswith(".dashboard.lkml")
|
||||||
):
|
):
|
||||||
logger.debug(f"include '{inc}' is a dashboard, skipping it")
|
logger.debug(
|
||||||
|
f"include '{included_file}' is a dashboard, skipping it"
|
||||||
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
logger.debug(
|
logger.debug(
|
||||||
@ -1081,9 +1088,10 @@ class LookMLSource(Source):
|
|||||||
str(self.source_config.base_folder), self.reporter
|
str(self.source_config.base_folder), self.reporter
|
||||||
)
|
)
|
||||||
|
|
||||||
# some views can be mentioned by multiple 'include' statements, so this set is used to prevent
|
# some views can be mentioned by multiple 'include' statements and can be included via different connections.
|
||||||
# creating duplicate MCE messages
|
# So this set is used to prevent creating duplicate events
|
||||||
processed_view_files: Set[str] = set()
|
processed_view_map: Dict[str, Set[str]] = {}
|
||||||
|
view_connection_map: Dict[str, Tuple[str, str]] = {}
|
||||||
|
|
||||||
# The ** means "this directory and all subdirectories", and hence should
|
# The ** means "this directory and all subdirectories", and hence should
|
||||||
# include all the files we want.
|
# include all the files we want.
|
||||||
@ -1119,20 +1127,43 @@ class LookMLSource(Source):
|
|||||||
self.reporter.report_models_dropped(model_name)
|
self.reporter.report_models_dropped(model_name)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
explore_reachable_views = set()
|
||||||
|
for explore_dict in model.explores:
|
||||||
|
explore: LookerExplore = LookerExplore.from_dict(
|
||||||
|
model_name, explore_dict
|
||||||
|
)
|
||||||
|
if explore.upstream_views:
|
||||||
|
for view_name in explore.upstream_views:
|
||||||
|
explore_reachable_views.add(view_name)
|
||||||
|
|
||||||
|
processed_view_files = processed_view_map.get(model.connection)
|
||||||
|
if processed_view_files is None:
|
||||||
|
processed_view_map[model.connection] = set()
|
||||||
|
processed_view_files = processed_view_map[model.connection]
|
||||||
|
|
||||||
project_name = self.get_project_name(model_name)
|
project_name = self.get_project_name(model_name)
|
||||||
|
logger.debug(f"Model: {model_name}; Includes: {model.resolved_includes}")
|
||||||
|
|
||||||
for include in model.resolved_includes:
|
for include in model.resolved_includes:
|
||||||
logger.debug(f"Considering {include} for model {model_name}")
|
logger.debug(f"Considering {include} for model {model_name}")
|
||||||
if include in processed_view_files:
|
if include in processed_view_files:
|
||||||
logger.debug(f"view '{include}' already processed, skipping it")
|
logger.debug(f"view '{include}' already processed, skipping it")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
logger.debug(f"Attempting to load view file: {include}")
|
logger.debug(f"Attempting to load view file: {include}")
|
||||||
looker_viewfile = viewfile_loader.load_viewfile(
|
looker_viewfile = viewfile_loader.load_viewfile(
|
||||||
include, connectionDefinition, self.reporter
|
include, connectionDefinition, self.reporter
|
||||||
)
|
)
|
||||||
if looker_viewfile is not None:
|
if looker_viewfile is not None:
|
||||||
for raw_view in looker_viewfile.views:
|
for raw_view in looker_viewfile.views:
|
||||||
|
if (
|
||||||
|
self.source_config.emit_reachable_views_only
|
||||||
|
and raw_view["name"] not in explore_reachable_views
|
||||||
|
):
|
||||||
|
logger.debug(
|
||||||
|
f"view {raw_view['name']} is not reachable from an explore, skipping.."
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
self.reporter.report_views_scanned()
|
self.reporter.report_views_scanned()
|
||||||
try:
|
try:
|
||||||
maybe_looker_view = LookerView.from_looker_dict(
|
maybe_looker_view = LookerView.from_looker_dict(
|
||||||
@ -1157,24 +1188,49 @@ class LookMLSource(Source):
|
|||||||
if self.source_config.view_pattern.allowed(
|
if self.source_config.view_pattern.allowed(
|
||||||
maybe_looker_view.id.view_name
|
maybe_looker_view.id.view_name
|
||||||
):
|
):
|
||||||
mce = self._build_dataset_mce(maybe_looker_view)
|
view_connection_mapping = view_connection_map.get(
|
||||||
workunit = MetadataWorkUnit(
|
maybe_looker_view.id.view_name
|
||||||
id=f"lookml-view-{maybe_looker_view.id}",
|
|
||||||
mce=mce,
|
|
||||||
)
|
)
|
||||||
self.reporter.report_workunit(workunit)
|
if not view_connection_mapping:
|
||||||
processed_view_files.add(include)
|
view_connection_map[
|
||||||
yield workunit
|
maybe_looker_view.id.view_name
|
||||||
|
] = (model_name, model.connection)
|
||||||
for mcp in self._build_dataset_mcps(maybe_looker_view):
|
# first time we are discovering this view
|
||||||
# We want to treat mcp aspects as optional, so allowing failures in this aspect to be treated as warnings rather than failures
|
mce = self._build_dataset_mce(maybe_looker_view)
|
||||||
workunit = MetadataWorkUnit(
|
workunit = MetadataWorkUnit(
|
||||||
id=f"lookml-view-{mcp.aspectName}-{maybe_looker_view.id}",
|
id=f"lookml-view-{maybe_looker_view.id}",
|
||||||
mcp=mcp,
|
mce=mce,
|
||||||
treat_errors_as_warnings=True,
|
|
||||||
)
|
)
|
||||||
|
processed_view_files.add(include)
|
||||||
self.reporter.report_workunit(workunit)
|
self.reporter.report_workunit(workunit)
|
||||||
yield workunit
|
yield workunit
|
||||||
|
for mcp in self._build_dataset_mcps(
|
||||||
|
maybe_looker_view
|
||||||
|
):
|
||||||
|
# We want to treat mcp aspects as optional, so allowing failures in this aspect to be treated as warnings rather than failures
|
||||||
|
workunit = MetadataWorkUnit(
|
||||||
|
id=f"lookml-view-{mcp.aspectName}-{maybe_looker_view.id}",
|
||||||
|
mcp=mcp,
|
||||||
|
treat_errors_as_warnings=True,
|
||||||
|
)
|
||||||
|
self.reporter.report_workunit(workunit)
|
||||||
|
yield workunit
|
||||||
|
else:
|
||||||
|
(
|
||||||
|
prev_model_name,
|
||||||
|
prev_model_connection,
|
||||||
|
) = view_connection_mapping
|
||||||
|
if prev_model_connection != model.connection:
|
||||||
|
# this view has previously been discovered and emitted using a different connection
|
||||||
|
logger.warning(
|
||||||
|
f"view {maybe_looker_view.id.view_name} from model {model_name}, connection {model.connection} was previously processed via model {prev_model_name}, connection {prev_model_connection} and will likely lead to incorrect lineage to the underlying tables"
|
||||||
|
)
|
||||||
|
if (
|
||||||
|
not self.source_config.emit_reachable_views_only
|
||||||
|
):
|
||||||
|
logger.warning(
|
||||||
|
"Consider enabling the `emit_reachable_views_only` flag to handle this case."
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
self.reporter.report_views_dropped(
|
self.reporter.report_views_dropped(
|
||||||
str(maybe_looker_view.id)
|
str(maybe_looker_view.id)
|
||||||
|
|||||||
@ -30,4 +30,4 @@ explore: data_model {
|
|||||||
value: "day"
|
value: "day"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -0,0 +1,10 @@
|
|||||||
|
connection: "my_other_connection"
|
||||||
|
include: "**/*.view.lkml"
|
||||||
|
|
||||||
|
explore: aliased_explore2 {
|
||||||
|
from: my_view2
|
||||||
|
}
|
||||||
|
|
||||||
|
explore: duplicate_explore {
|
||||||
|
from: my_view
|
||||||
|
}
|
||||||
@ -0,0 +1,47 @@
|
|||||||
|
view: my_view2 {
|
||||||
|
derived_table: {
|
||||||
|
sql:
|
||||||
|
SELECT
|
||||||
|
is_latest,
|
||||||
|
country,
|
||||||
|
city,
|
||||||
|
timestamp,
|
||||||
|
measurement
|
||||||
|
FROM
|
||||||
|
my_table ;;
|
||||||
|
}
|
||||||
|
|
||||||
|
dimension: country {
|
||||||
|
type: string
|
||||||
|
description: "The country"
|
||||||
|
sql: ${TABLE}.country ;;
|
||||||
|
}
|
||||||
|
|
||||||
|
dimension: city {
|
||||||
|
type: string
|
||||||
|
description: "City"
|
||||||
|
sql: ${TABLE}.city ;;
|
||||||
|
}
|
||||||
|
|
||||||
|
dimension: is_latest {
|
||||||
|
type: yesno
|
||||||
|
description: "Is latest data"
|
||||||
|
sql: ${TABLE}.is_latest ;;
|
||||||
|
}
|
||||||
|
|
||||||
|
dimension_group: timestamp {
|
||||||
|
group_label: "Timestamp"
|
||||||
|
type: time
|
||||||
|
description: "Timestamp of measurement"
|
||||||
|
sql: ${TABLE}.timestamp ;;
|
||||||
|
timeframes: [hour, date, week, day_of_week]
|
||||||
|
}
|
||||||
|
|
||||||
|
measure: average_measurement {
|
||||||
|
group_label: "Measurement"
|
||||||
|
type: average
|
||||||
|
description: "My measurement"
|
||||||
|
sql: ${TABLE}.measurement ;;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@ -0,0 +1,659 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"auditHeader": null,
|
||||||
|
"proposedSnapshot": {
|
||||||
|
"com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
|
||||||
|
"urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)",
|
||||||
|
"aspects": [
|
||||||
|
{
|
||||||
|
"com.linkedin.pegasus2avro.common.BrowsePaths": {
|
||||||
|
"paths": [
|
||||||
|
"/prod/looker/lkml_samples/views/my_view"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"com.linkedin.pegasus2avro.common.Status": {
|
||||||
|
"removed": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"com.linkedin.pegasus2avro.dataset.UpstreamLineage": {
|
||||||
|
"upstreams": [
|
||||||
|
{
|
||||||
|
"auditStamp": {
|
||||||
|
"time": 0,
|
||||||
|
"actor": "urn:li:corpuser:unknown",
|
||||||
|
"impersonator": null
|
||||||
|
},
|
||||||
|
"dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,warehouse.default_db.default_schema.my_table,DEV)",
|
||||||
|
"type": "VIEW"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"fineGrainedLineages": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"com.linkedin.pegasus2avro.schema.SchemaMetadata": {
|
||||||
|
"schemaName": "my_view",
|
||||||
|
"platform": "urn:li:dataPlatform:looker",
|
||||||
|
"version": 0,
|
||||||
|
"created": {
|
||||||
|
"time": 0,
|
||||||
|
"actor": "urn:li:corpuser:unknown",
|
||||||
|
"impersonator": null
|
||||||
|
},
|
||||||
|
"lastModified": {
|
||||||
|
"time": 0,
|
||||||
|
"actor": "urn:li:corpuser:unknown",
|
||||||
|
"impersonator": null
|
||||||
|
},
|
||||||
|
"deleted": null,
|
||||||
|
"dataset": null,
|
||||||
|
"cluster": null,
|
||||||
|
"hash": "",
|
||||||
|
"platformSchema": {
|
||||||
|
"com.linkedin.pegasus2avro.schema.OtherSchema": {
|
||||||
|
"rawSchema": ""
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"fields": [
|
||||||
|
{
|
||||||
|
"fieldPath": "country",
|
||||||
|
"jsonPath": null,
|
||||||
|
"nullable": false,
|
||||||
|
"description": "The country",
|
||||||
|
"created": null,
|
||||||
|
"lastModified": null,
|
||||||
|
"type": {
|
||||||
|
"type": {
|
||||||
|
"com.linkedin.pegasus2avro.schema.StringType": {}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nativeDataType": "string",
|
||||||
|
"recursive": false,
|
||||||
|
"globalTags": {
|
||||||
|
"tags": [
|
||||||
|
{
|
||||||
|
"tag": "urn:li:tag:Dimension",
|
||||||
|
"context": null
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"glossaryTerms": null,
|
||||||
|
"isPartOfKey": false,
|
||||||
|
"isPartitioningKey": null,
|
||||||
|
"jsonProps": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"fieldPath": "city",
|
||||||
|
"jsonPath": null,
|
||||||
|
"nullable": false,
|
||||||
|
"description": "City",
|
||||||
|
"created": null,
|
||||||
|
"lastModified": null,
|
||||||
|
"type": {
|
||||||
|
"type": {
|
||||||
|
"com.linkedin.pegasus2avro.schema.StringType": {}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nativeDataType": "string",
|
||||||
|
"recursive": false,
|
||||||
|
"globalTags": {
|
||||||
|
"tags": [
|
||||||
|
{
|
||||||
|
"tag": "urn:li:tag:Dimension",
|
||||||
|
"context": null
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"glossaryTerms": null,
|
||||||
|
"isPartOfKey": false,
|
||||||
|
"isPartitioningKey": null,
|
||||||
|
"jsonProps": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"fieldPath": "is_latest",
|
||||||
|
"jsonPath": null,
|
||||||
|
"nullable": false,
|
||||||
|
"description": "Is latest data",
|
||||||
|
"created": null,
|
||||||
|
"lastModified": null,
|
||||||
|
"type": {
|
||||||
|
"type": {
|
||||||
|
"com.linkedin.pegasus2avro.schema.BooleanType": {}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nativeDataType": "yesno",
|
||||||
|
"recursive": false,
|
||||||
|
"globalTags": {
|
||||||
|
"tags": [
|
||||||
|
{
|
||||||
|
"tag": "urn:li:tag:Dimension",
|
||||||
|
"context": null
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"glossaryTerms": null,
|
||||||
|
"isPartOfKey": false,
|
||||||
|
"isPartitioningKey": null,
|
||||||
|
"jsonProps": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"fieldPath": "timestamp",
|
||||||
|
"jsonPath": null,
|
||||||
|
"nullable": false,
|
||||||
|
"description": "Timestamp of measurement",
|
||||||
|
"created": null,
|
||||||
|
"lastModified": null,
|
||||||
|
"type": {
|
||||||
|
"type": {
|
||||||
|
"com.linkedin.pegasus2avro.schema.TimeType": {}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nativeDataType": "time",
|
||||||
|
"recursive": false,
|
||||||
|
"globalTags": {
|
||||||
|
"tags": [
|
||||||
|
{
|
||||||
|
"tag": "urn:li:tag:Dimension",
|
||||||
|
"context": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"tag": "urn:li:tag:Temporal",
|
||||||
|
"context": null
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"glossaryTerms": null,
|
||||||
|
"isPartOfKey": false,
|
||||||
|
"isPartitioningKey": null,
|
||||||
|
"jsonProps": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"fieldPath": "average_measurement",
|
||||||
|
"jsonPath": null,
|
||||||
|
"nullable": false,
|
||||||
|
"description": "My measurement",
|
||||||
|
"created": null,
|
||||||
|
"lastModified": null,
|
||||||
|
"type": {
|
||||||
|
"type": {
|
||||||
|
"com.linkedin.pegasus2avro.schema.NumberType": {}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nativeDataType": "average",
|
||||||
|
"recursive": false,
|
||||||
|
"globalTags": {
|
||||||
|
"tags": [
|
||||||
|
{
|
||||||
|
"tag": "urn:li:tag:Measure",
|
||||||
|
"context": null
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"glossaryTerms": null,
|
||||||
|
"isPartOfKey": false,
|
||||||
|
"isPartitioningKey": null,
|
||||||
|
"jsonProps": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"primaryKeys": [],
|
||||||
|
"foreignKeysSpecs": null,
|
||||||
|
"foreignKeys": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
|
||||||
|
"customProperties": {
|
||||||
|
"looker.file.path": "foo.view.lkml"
|
||||||
|
},
|
||||||
|
"externalUrl": null,
|
||||||
|
"name": "my_view",
|
||||||
|
"qualifiedName": null,
|
||||||
|
"description": null,
|
||||||
|
"uri": null,
|
||||||
|
"tags": []
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"proposedDelta": null,
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1586847600000,
|
||||||
|
"runId": "lookml-test",
|
||||||
|
"registryName": null,
|
||||||
|
"registryVersion": null,
|
||||||
|
"properties": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"auditHeader": null,
|
||||||
|
"entityType": "dataset",
|
||||||
|
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)",
|
||||||
|
"entityKeyAspect": null,
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "subTypes",
|
||||||
|
"aspect": {
|
||||||
|
"value": "{\"typeNames\": [\"view\"]}",
|
||||||
|
"contentType": "application/json"
|
||||||
|
},
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1586847600000,
|
||||||
|
"runId": "lookml-test",
|
||||||
|
"registryName": null,
|
||||||
|
"registryVersion": null,
|
||||||
|
"properties": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"auditHeader": null,
|
||||||
|
"entityType": "dataset",
|
||||||
|
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)",
|
||||||
|
"entityKeyAspect": null,
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "viewProperties",
|
||||||
|
"aspect": {
|
||||||
|
"value": "{\"materialized\": false, \"viewLogic\": \"SELECT\\n is_latest,\\n country,\\n city,\\n timestamp,\\n measurement\\n FROM\\n my_table\", \"viewLanguage\": \"sql\"}",
|
||||||
|
"contentType": "application/json"
|
||||||
|
},
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1586847600000,
|
||||||
|
"runId": "lookml-test",
|
||||||
|
"registryName": null,
|
||||||
|
"registryVersion": null,
|
||||||
|
"properties": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"auditHeader": null,
|
||||||
|
"proposedSnapshot": {
|
||||||
|
"com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
|
||||||
|
"urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view2,PROD)",
|
||||||
|
"aspects": [
|
||||||
|
{
|
||||||
|
"com.linkedin.pegasus2avro.common.BrowsePaths": {
|
||||||
|
"paths": [
|
||||||
|
"/prod/looker/lkml_samples/views/my_view2"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"com.linkedin.pegasus2avro.common.Status": {
|
||||||
|
"removed": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"com.linkedin.pegasus2avro.dataset.UpstreamLineage": {
|
||||||
|
"upstreams": [
|
||||||
|
{
|
||||||
|
"auditStamp": {
|
||||||
|
"time": 0,
|
||||||
|
"actor": "urn:li:corpuser:unknown",
|
||||||
|
"impersonator": null
|
||||||
|
},
|
||||||
|
"dataset": "urn:li:dataset:(urn:li:dataPlatform:redshift,rs_warehouse.default_db.default_schema.my_table,DEV)",
|
||||||
|
"type": "VIEW"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"fineGrainedLineages": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"com.linkedin.pegasus2avro.schema.SchemaMetadata": {
|
||||||
|
"schemaName": "my_view2",
|
||||||
|
"platform": "urn:li:dataPlatform:looker",
|
||||||
|
"version": 0,
|
||||||
|
"created": {
|
||||||
|
"time": 0,
|
||||||
|
"actor": "urn:li:corpuser:unknown",
|
||||||
|
"impersonator": null
|
||||||
|
},
|
||||||
|
"lastModified": {
|
||||||
|
"time": 0,
|
||||||
|
"actor": "urn:li:corpuser:unknown",
|
||||||
|
"impersonator": null
|
||||||
|
},
|
||||||
|
"deleted": null,
|
||||||
|
"dataset": null,
|
||||||
|
"cluster": null,
|
||||||
|
"hash": "",
|
||||||
|
"platformSchema": {
|
||||||
|
"com.linkedin.pegasus2avro.schema.OtherSchema": {
|
||||||
|
"rawSchema": ""
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"fields": [
|
||||||
|
{
|
||||||
|
"fieldPath": "country",
|
||||||
|
"jsonPath": null,
|
||||||
|
"nullable": false,
|
||||||
|
"description": "The country",
|
||||||
|
"created": null,
|
||||||
|
"lastModified": null,
|
||||||
|
"type": {
|
||||||
|
"type": {
|
||||||
|
"com.linkedin.pegasus2avro.schema.StringType": {}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nativeDataType": "string",
|
||||||
|
"recursive": false,
|
||||||
|
"globalTags": {
|
||||||
|
"tags": [
|
||||||
|
{
|
||||||
|
"tag": "urn:li:tag:Dimension",
|
||||||
|
"context": null
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"glossaryTerms": null,
|
||||||
|
"isPartOfKey": false,
|
||||||
|
"isPartitioningKey": null,
|
||||||
|
"jsonProps": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"fieldPath": "city",
|
||||||
|
"jsonPath": null,
|
||||||
|
"nullable": false,
|
||||||
|
"description": "City",
|
||||||
|
"created": null,
|
||||||
|
"lastModified": null,
|
||||||
|
"type": {
|
||||||
|
"type": {
|
||||||
|
"com.linkedin.pegasus2avro.schema.StringType": {}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nativeDataType": "string",
|
||||||
|
"recursive": false,
|
||||||
|
"globalTags": {
|
||||||
|
"tags": [
|
||||||
|
{
|
||||||
|
"tag": "urn:li:tag:Dimension",
|
||||||
|
"context": null
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"glossaryTerms": null,
|
||||||
|
"isPartOfKey": false,
|
||||||
|
"isPartitioningKey": null,
|
||||||
|
"jsonProps": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"fieldPath": "is_latest",
|
||||||
|
"jsonPath": null,
|
||||||
|
"nullable": false,
|
||||||
|
"description": "Is latest data",
|
||||||
|
"created": null,
|
||||||
|
"lastModified": null,
|
||||||
|
"type": {
|
||||||
|
"type": {
|
||||||
|
"com.linkedin.pegasus2avro.schema.BooleanType": {}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nativeDataType": "yesno",
|
||||||
|
"recursive": false,
|
||||||
|
"globalTags": {
|
||||||
|
"tags": [
|
||||||
|
{
|
||||||
|
"tag": "urn:li:tag:Dimension",
|
||||||
|
"context": null
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"glossaryTerms": null,
|
||||||
|
"isPartOfKey": false,
|
||||||
|
"isPartitioningKey": null,
|
||||||
|
"jsonProps": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"fieldPath": "timestamp",
|
||||||
|
"jsonPath": null,
|
||||||
|
"nullable": false,
|
||||||
|
"description": "Timestamp of measurement",
|
||||||
|
"created": null,
|
||||||
|
"lastModified": null,
|
||||||
|
"type": {
|
||||||
|
"type": {
|
||||||
|
"com.linkedin.pegasus2avro.schema.TimeType": {}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nativeDataType": "time",
|
||||||
|
"recursive": false,
|
||||||
|
"globalTags": {
|
||||||
|
"tags": [
|
||||||
|
{
|
||||||
|
"tag": "urn:li:tag:Dimension",
|
||||||
|
"context": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"tag": "urn:li:tag:Temporal",
|
||||||
|
"context": null
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"glossaryTerms": null,
|
||||||
|
"isPartOfKey": false,
|
||||||
|
"isPartitioningKey": null,
|
||||||
|
"jsonProps": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"fieldPath": "average_measurement",
|
||||||
|
"jsonPath": null,
|
||||||
|
"nullable": false,
|
||||||
|
"description": "My measurement",
|
||||||
|
"created": null,
|
||||||
|
"lastModified": null,
|
||||||
|
"type": {
|
||||||
|
"type": {
|
||||||
|
"com.linkedin.pegasus2avro.schema.NumberType": {}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nativeDataType": "average",
|
||||||
|
"recursive": false,
|
||||||
|
"globalTags": {
|
||||||
|
"tags": [
|
||||||
|
{
|
||||||
|
"tag": "urn:li:tag:Measure",
|
||||||
|
"context": null
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"glossaryTerms": null,
|
||||||
|
"isPartOfKey": false,
|
||||||
|
"isPartitioningKey": null,
|
||||||
|
"jsonProps": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"primaryKeys": [],
|
||||||
|
"foreignKeysSpecs": null,
|
||||||
|
"foreignKeys": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
|
||||||
|
"customProperties": {
|
||||||
|
"looker.file.path": "foo2.view.lkml"
|
||||||
|
},
|
||||||
|
"externalUrl": null,
|
||||||
|
"name": "my_view2",
|
||||||
|
"qualifiedName": null,
|
||||||
|
"description": null,
|
||||||
|
"uri": null,
|
||||||
|
"tags": []
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"proposedDelta": null,
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1586847600000,
|
||||||
|
"runId": "lookml-test",
|
||||||
|
"registryName": null,
|
||||||
|
"registryVersion": null,
|
||||||
|
"properties": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"auditHeader": null,
|
||||||
|
"entityType": "dataset",
|
||||||
|
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view2,PROD)",
|
||||||
|
"entityKeyAspect": null,
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "subTypes",
|
||||||
|
"aspect": {
|
||||||
|
"value": "{\"typeNames\": [\"view\"]}",
|
||||||
|
"contentType": "application/json"
|
||||||
|
},
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1586847600000,
|
||||||
|
"runId": "lookml-test",
|
||||||
|
"registryName": null,
|
||||||
|
"registryVersion": null,
|
||||||
|
"properties": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"auditHeader": null,
|
||||||
|
"entityType": "dataset",
|
||||||
|
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view2,PROD)",
|
||||||
|
"entityKeyAspect": null,
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "viewProperties",
|
||||||
|
"aspect": {
|
||||||
|
"value": "{\"materialized\": false, \"viewLogic\": \"SELECT\\n is_latest,\\n country,\\n city,\\n timestamp,\\n measurement\\n FROM\\n my_table\", \"viewLanguage\": \"sql\"}",
|
||||||
|
"contentType": "application/json"
|
||||||
|
},
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1586847600000,
|
||||||
|
"runId": "lookml-test",
|
||||||
|
"registryName": null,
|
||||||
|
"registryVersion": null,
|
||||||
|
"properties": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"auditHeader": null,
|
||||||
|
"proposedSnapshot": {
|
||||||
|
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
|
||||||
|
"urn": "urn:li:tag:Dimension",
|
||||||
|
"aspects": [
|
||||||
|
{
|
||||||
|
"com.linkedin.pegasus2avro.common.Ownership": {
|
||||||
|
"owners": [
|
||||||
|
{
|
||||||
|
"owner": "urn:li:corpuser:datahub",
|
||||||
|
"type": "DATAOWNER",
|
||||||
|
"source": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"lastModified": {
|
||||||
|
"time": 0,
|
||||||
|
"actor": "urn:li:corpuser:unknown",
|
||||||
|
"impersonator": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"com.linkedin.pegasus2avro.tag.TagProperties": {
|
||||||
|
"name": "Dimension",
|
||||||
|
"description": "A tag that is applied to all dimension fields.",
|
||||||
|
"colorHex": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"proposedDelta": null,
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1586847600000,
|
||||||
|
"runId": "lookml-test",
|
||||||
|
"registryName": null,
|
||||||
|
"registryVersion": null,
|
||||||
|
"properties": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"auditHeader": null,
|
||||||
|
"proposedSnapshot": {
|
||||||
|
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
|
||||||
|
"urn": "urn:li:tag:Temporal",
|
||||||
|
"aspects": [
|
||||||
|
{
|
||||||
|
"com.linkedin.pegasus2avro.common.Ownership": {
|
||||||
|
"owners": [
|
||||||
|
{
|
||||||
|
"owner": "urn:li:corpuser:datahub",
|
||||||
|
"type": "DATAOWNER",
|
||||||
|
"source": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"lastModified": {
|
||||||
|
"time": 0,
|
||||||
|
"actor": "urn:li:corpuser:unknown",
|
||||||
|
"impersonator": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"com.linkedin.pegasus2avro.tag.TagProperties": {
|
||||||
|
"name": "Temporal",
|
||||||
|
"description": "A tag that is applied to all time-based (temporal) fields such as timestamps or durations.",
|
||||||
|
"colorHex": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"proposedDelta": null,
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1586847600000,
|
||||||
|
"runId": "lookml-test",
|
||||||
|
"registryName": null,
|
||||||
|
"registryVersion": null,
|
||||||
|
"properties": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"auditHeader": null,
|
||||||
|
"proposedSnapshot": {
|
||||||
|
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
|
||||||
|
"urn": "urn:li:tag:Measure",
|
||||||
|
"aspects": [
|
||||||
|
{
|
||||||
|
"com.linkedin.pegasus2avro.common.Ownership": {
|
||||||
|
"owners": [
|
||||||
|
{
|
||||||
|
"owner": "urn:li:corpuser:datahub",
|
||||||
|
"type": "DATAOWNER",
|
||||||
|
"source": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"lastModified": {
|
||||||
|
"time": 0,
|
||||||
|
"actor": "urn:li:corpuser:unknown",
|
||||||
|
"impersonator": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"com.linkedin.pegasus2avro.tag.TagProperties": {
|
||||||
|
"name": "Measure",
|
||||||
|
"description": "A tag that is applied to all measures (metrics). Measures are typically the columns that you aggregate on",
|
||||||
|
"colorHex": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"proposedDelta": null,
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1586847600000,
|
||||||
|
"runId": "lookml-test",
|
||||||
|
"registryName": null,
|
||||||
|
"registryVersion": null,
|
||||||
|
"properties": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
@ -39,6 +39,7 @@ def test_lookml_ingest(pytestconfig, tmp_path, mock_time):
|
|||||||
"parse_table_names_from_sql": True,
|
"parse_table_names_from_sql": True,
|
||||||
"tag_measures_and_dimensions": False,
|
"tag_measures_and_dimensions": False,
|
||||||
"project_name": "lkml_samples",
|
"project_name": "lkml_samples",
|
||||||
|
"model_pattern": {"deny": ["data2"]},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
"sink": {
|
"sink": {
|
||||||
@ -82,6 +83,7 @@ def test_lookml_ingest_offline(pytestconfig, tmp_path, mock_time):
|
|||||||
},
|
},
|
||||||
"parse_table_names_from_sql": True,
|
"parse_table_names_from_sql": True,
|
||||||
"project_name": "lkml_samples",
|
"project_name": "lkml_samples",
|
||||||
|
"model_pattern": {"deny": ["data2"]},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
"sink": {
|
"sink": {
|
||||||
@ -171,6 +173,7 @@ def test_lookml_ingest_offline_platform_instance(pytestconfig, tmp_path, mock_ti
|
|||||||
},
|
},
|
||||||
"parse_table_names_from_sql": True,
|
"parse_table_names_from_sql": True,
|
||||||
"project_name": "lkml_samples",
|
"project_name": "lkml_samples",
|
||||||
|
"model_pattern": {"deny": ["data2"]},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
"sink": {
|
"sink": {
|
||||||
@ -250,6 +253,7 @@ def ingestion_test(
|
|||||||
"base_url": "fake_account.looker.com",
|
"base_url": "fake_account.looker.com",
|
||||||
},
|
},
|
||||||
"parse_table_names_from_sql": True,
|
"parse_table_names_from_sql": True,
|
||||||
|
"model_pattern": {"deny": ["data2"]},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
"sink": {
|
"sink": {
|
||||||
@ -342,6 +346,7 @@ def test_lookml_github_info(pytestconfig, tmp_path, mock_time):
|
|||||||
},
|
},
|
||||||
"parse_table_names_from_sql": True,
|
"parse_table_names_from_sql": True,
|
||||||
"project_name": "lkml_samples",
|
"project_name": "lkml_samples",
|
||||||
|
"model_pattern": {"deny": ["data2"]},
|
||||||
"github_info": {"repo": "datahub/looker-demo", "branch": "master"},
|
"github_info": {"repo": "datahub/looker-demo", "branch": "master"},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@ -362,3 +367,68 @@ def test_lookml_github_info(pytestconfig, tmp_path, mock_time):
|
|||||||
output_path=tmp_path / mce_out,
|
output_path=tmp_path / mce_out,
|
||||||
golden_path=test_resources_dir / mce_out,
|
golden_path=test_resources_dir / mce_out,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@freeze_time(FROZEN_TIME)
|
||||||
|
@pytest.mark.skipif(sys.version_info < (3, 7), reason="lkml requires Python 3.7+")
|
||||||
|
def test_reachable_views(pytestconfig, tmp_path, mock_time):
|
||||||
|
"""Test for reachable views"""
|
||||||
|
test_resources_dir = pytestconfig.rootpath / "tests/integration/lookml"
|
||||||
|
mce_out = "lookml_reachable_views.json"
|
||||||
|
pipeline = Pipeline.create(
|
||||||
|
{
|
||||||
|
"run_id": "lookml-test",
|
||||||
|
"source": {
|
||||||
|
"type": "lookml",
|
||||||
|
"config": {
|
||||||
|
"base_folder": str(test_resources_dir / "lkml_samples"),
|
||||||
|
"connection_to_platform_map": {
|
||||||
|
"my_connection": {
|
||||||
|
"platform": "snowflake",
|
||||||
|
"platform_instance": "warehouse",
|
||||||
|
"platform_env": "dev",
|
||||||
|
"default_db": "default_db",
|
||||||
|
"default_schema": "default_schema",
|
||||||
|
},
|
||||||
|
"my_other_connection": {
|
||||||
|
"platform": "redshift",
|
||||||
|
"platform_instance": "rs_warehouse",
|
||||||
|
"platform_env": "dev",
|
||||||
|
"default_db": "default_db",
|
||||||
|
"default_schema": "default_schema",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"parse_table_names_from_sql": True,
|
||||||
|
"project_name": "lkml_samples",
|
||||||
|
"emit_reachable_views_only": True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"sink": {
|
||||||
|
"type": "file",
|
||||||
|
"config": {
|
||||||
|
"filename": f"{tmp_path}/{mce_out}",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
pipeline.run()
|
||||||
|
pipeline.pretty_print_summary()
|
||||||
|
pipeline.raise_from_status(raise_warnings=True)
|
||||||
|
|
||||||
|
mce_helpers.check_golden_file(
|
||||||
|
pytestconfig,
|
||||||
|
output_path=tmp_path / mce_out,
|
||||||
|
golden_path=test_resources_dir / mce_out,
|
||||||
|
)
|
||||||
|
|
||||||
|
entity_urns = mce_helpers.get_entity_urns(tmp_path / mce_out)
|
||||||
|
# we should only have two views discoverable
|
||||||
|
assert len(entity_urns) == 2
|
||||||
|
assert (
|
||||||
|
"urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)"
|
||||||
|
in entity_urns
|
||||||
|
)
|
||||||
|
assert (
|
||||||
|
"urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view2,PROD)"
|
||||||
|
in entity_urns
|
||||||
|
)
|
||||||
|
|||||||
@ -3,7 +3,7 @@ import logging
|
|||||||
import os
|
import os
|
||||||
import pprint
|
import pprint
|
||||||
import shutil
|
import shutil
|
||||||
from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union
|
from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Type, Union
|
||||||
|
|
||||||
import deepdiff
|
import deepdiff
|
||||||
|
|
||||||
@ -91,11 +91,23 @@ def assert_mces_equal(
|
|||||||
exclude_regex_paths=ignore_paths,
|
exclude_regex_paths=ignore_paths,
|
||||||
ignore_order=True,
|
ignore_order=True,
|
||||||
)
|
)
|
||||||
if clean_diff != diff:
|
if not clean_diff:
|
||||||
logger.warning(
|
logger.debug(f"MCE-s differ, clean MCE-s are fine\n{pprint.pformat(diff)}")
|
||||||
f"MCE-s differ, clean MCE-s are fine\n{pprint.pformat(diff)}"
|
|
||||||
)
|
|
||||||
diff = clean_diff
|
diff = clean_diff
|
||||||
|
if diff:
|
||||||
|
# do some additional processing to emit helpful messages
|
||||||
|
output_urns = _get_entity_urns(output)
|
||||||
|
golden_urns = _get_entity_urns(golden)
|
||||||
|
in_golden_but_not_in_output = golden_urns - output_urns
|
||||||
|
in_output_but_not_in_golden = output_urns - golden_urns
|
||||||
|
if in_golden_but_not_in_output:
|
||||||
|
logger.info(
|
||||||
|
f"Golden file has {len(in_golden_but_not_in_output)} more urns: {in_golden_but_not_in_output}"
|
||||||
|
)
|
||||||
|
if in_output_but_not_in_golden:
|
||||||
|
logger.info(
|
||||||
|
f"Golden file has {len(in_output_but_not_in_golden)} more urns: {in_output_but_not_in_golden}"
|
||||||
|
)
|
||||||
|
|
||||||
assert (
|
assert (
|
||||||
not diff
|
not diff
|
||||||
@ -192,6 +204,33 @@ def _element_matches_pattern(
|
|||||||
return (True, re.search(pattern, str(element)) is not None)
|
return (True, re.search(pattern, str(element)) is not None)
|
||||||
|
|
||||||
|
|
||||||
|
def get_entity_urns(events_file: str) -> Set[str]:
|
||||||
|
events = load_json_file(events_file)
|
||||||
|
assert isinstance(events, list)
|
||||||
|
return _get_entity_urns(events)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_entity_urns(events_list: List[Dict]) -> Set[str]:
|
||||||
|
entity_type = "dataset"
|
||||||
|
# mce urns
|
||||||
|
mce_urns = set(
|
||||||
|
[
|
||||||
|
_get_element(x, _get_mce_urn_path_spec(entity_type))
|
||||||
|
for x in events_list
|
||||||
|
if _get_filter(mce=True, entity_type=entity_type)(x)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
mcp_urns = set(
|
||||||
|
[
|
||||||
|
_get_element(x, _get_mcp_urn_path_spec())
|
||||||
|
for x in events_list
|
||||||
|
if _get_filter(mcp=True, entity_type=entity_type)(x)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
all_urns = mce_urns.union(mcp_urns)
|
||||||
|
return all_urns
|
||||||
|
|
||||||
|
|
||||||
def assert_mcp_entity_urn(
|
def assert_mcp_entity_urn(
|
||||||
filter: str, entity_type: str, regex_pattern: str, file: str
|
filter: str, entity_type: str, regex_pattern: str, file: str
|
||||||
) -> int:
|
) -> int:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user