feat(ingestion): lookml refinement support (#7781)

Co-authored-by: MohdSiddiqueBagwan <mohdsiddique.bagwan@gslab.com>
Co-authored-by: Harshal Sheth <hsheth2@gmail.com>
This commit is contained in:
mohdsiddique 2023-04-21 23:25:31 +05:30 committed by GitHub
parent fa10256c47
commit f21eeed6e7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
23 changed files with 4359 additions and 140 deletions

View File

@ -1,3 +1,4 @@
import copy
import glob
import itertools
import logging
@ -6,7 +7,18 @@ import re
import tempfile
from dataclasses import dataclass, field as dataclass_field, replace
from datetime import datetime, timedelta
from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Type, Union
from typing import (
Any,
ClassVar,
Dict,
Iterable,
List,
Optional,
Set,
Tuple,
Type,
Union,
)
import lkml
import lkml.simple
@ -237,6 +249,10 @@ class LookMLSourceConfig(
stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = Field(
default=None, description=""
)
process_refinements: bool = Field(
False,
description="When enabled, looker refinement will be processed to adapt an existing view.",
)
@validator("connection_to_platform_map", pre=True)
def convert_string_to_connection_def(cls, conn_map):
@ -669,6 +685,246 @@ class LookerViewFileLoader:
return replace(viewfile, connection=connection)
class LookerRefinementResolver:
"""
Refinements are a way to "edit" an existing view or explore.
Refer: https://cloud.google.com/looker/docs/lookml-refinements
A refinement to an existing view/explore is only applied if it's refinement is reachable from include files in a model.
For refinement applied order please refer: https://cloud.google.com/looker/docs/lookml-refinements#refinements_are_applied_in_order
"""
REFINEMENT_PREFIX: ClassVar[str] = "+"
DIMENSIONS: ClassVar[str] = "dimensions"
MEASURES: ClassVar[str] = "measures"
DIMENSION_GROUPS: ClassVar[str] = "dimension_groups"
NAME: ClassVar[str] = "name"
EXTENDS: ClassVar[str] = "extends"
EXTENDS_ALL: ClassVar[str] = "extends__all"
looker_model: LookerModel
looker_viewfile_loader: LookerViewFileLoader
connection_definition: LookerConnectionDefinition
source_config: LookMLSourceConfig
reporter: LookMLSourceReport
view_refinement_cache: Dict[
str, dict
] # Map of view-name as key, and it is raw view dictionary after applying refinement process
explore_refinement_cache: Dict[
str, dict
] # Map of explore-name as key, and it is raw view dictionary after applying refinement process
def __init__(
self,
looker_model: LookerModel,
looker_viewfile_loader: LookerViewFileLoader,
connection_definition: LookerConnectionDefinition,
source_config: LookMLSourceConfig,
reporter: LookMLSourceReport,
):
self.looker_model = looker_model
self.looker_viewfile_loader = looker_viewfile_loader
self.connection_definition = connection_definition
self.source_config = source_config
self.reporter = reporter
self.view_refinement_cache = {}
self.explore_refinement_cache = {}
@staticmethod
def is_refinement(view_name: str) -> bool:
return view_name.startswith(LookerRefinementResolver.REFINEMENT_PREFIX)
@staticmethod
def merge_column(
original_dict: dict, refinement_dict: dict, key: str
) -> List[dict]:
"""
Merge a dimension/measure/other column with one from a refinement.
This follows the process documented at https://help.looker.com/hc/en-us/articles/4419773929107-LookML-refinements
"""
merge_column: List[dict] = []
original_value: List[dict] = original_dict.get(key, [])
refine_value: List[dict] = refinement_dict.get(key, [])
# name is required field, not going to be None
original_column_map = {
column[LookerRefinementResolver.NAME]: column for column in original_value
}
refine_column_map = {
column[LookerRefinementResolver.NAME]: column for column in refine_value
}
for existing_column_name in original_column_map:
existing_column = original_column_map[existing_column_name]
refine_column = refine_column_map.get(existing_column_name)
if refine_column is not None:
existing_column.update(refine_column)
merge_column.append(existing_column)
# merge any remaining column from refine_column_map
for new_column_name in refine_column_map:
if new_column_name not in original_column_map:
merge_column.append(refine_column_map[new_column_name])
return merge_column
@staticmethod
def merge_and_set_column(
new_raw_view: dict, refinement_view: dict, key: str
) -> None:
merged_column = LookerRefinementResolver.merge_column(
new_raw_view, refinement_view, key
)
if merged_column:
new_raw_view[key] = merged_column
@staticmethod
def merge_refinements(raw_view: dict, refinement_views: List[dict]) -> dict:
"""
Iterate over refinement_views and merge parameter of each view with raw_view.
Detail of merging order can be found at https://cloud.google.com/looker/docs/lookml-refinements
"""
new_raw_view: dict = copy.deepcopy(raw_view)
for refinement_view in refinement_views:
# Merge dimension and measure
# TODO: low priority: handle additive parameters
# https://cloud.google.com/looker/docs/lookml-refinements#some_parameters_are_additive
# Merge Dimension
LookerRefinementResolver.merge_and_set_column(
new_raw_view, refinement_view, LookerRefinementResolver.DIMENSIONS
)
# Merge Measure
LookerRefinementResolver.merge_and_set_column(
new_raw_view, refinement_view, LookerRefinementResolver.MEASURES
)
# Merge Dimension Group
LookerRefinementResolver.merge_and_set_column(
new_raw_view, refinement_view, LookerRefinementResolver.DIMENSION_GROUPS
)
return new_raw_view
def get_refinements(self, views: List[dict], view_name: str) -> List[dict]:
"""
Refinement syntax for view and explore are same.
This function can be used to filter out view/explore refinement from raw dictionary list
"""
view_refinement_name: str = self.REFINEMENT_PREFIX + view_name
refined_views: List[dict] = []
for raw_view in views:
if view_refinement_name == raw_view[LookerRefinementResolver.NAME]:
refined_views.append(raw_view)
return refined_views
def get_refinement_from_model_includes(self, view_name: str) -> List[dict]:
refined_views: List[dict] = []
for include in self.looker_model.resolved_includes:
included_looker_viewfile = self.looker_viewfile_loader.load_viewfile(
include.include,
include.project,
self.connection_definition,
self.reporter,
)
if not included_looker_viewfile:
continue
refined_views.extend(
self.get_refinements(included_looker_viewfile.views, view_name)
)
return refined_views
def should_skip_processing(self, raw_view_name: str) -> bool:
if LookerRefinementResolver.is_refinement(raw_view_name):
return True
if self.source_config.process_refinements is False:
return True
return False
def apply_view_refinement(self, raw_view: dict) -> dict:
"""
Looker process the lkml file in include order and merge the all refinement to original view.
"""
assert raw_view.get(LookerRefinementResolver.NAME) is not None
raw_view_name: str = raw_view[LookerRefinementResolver.NAME]
if self.should_skip_processing(raw_view_name):
return raw_view
if raw_view_name in self.view_refinement_cache:
logger.debug(f"Returning applied refined view {raw_view_name} from cache")
return self.view_refinement_cache[raw_view_name]
logger.debug(f"Processing refinement for view {raw_view_name}")
refinement_views: List[dict] = self.get_refinement_from_model_includes(
raw_view_name
)
self.view_refinement_cache[raw_view_name] = self.merge_refinements(
raw_view, refinement_views
)
return self.view_refinement_cache[raw_view_name]
@staticmethod
def add_extended_explore(
raw_explore: dict, refinement_explores: List[Dict]
) -> None:
extended_explores: Set[str] = set()
for view in refinement_explores:
extends = list(
itertools.chain.from_iterable(
view.get(
LookerRefinementResolver.EXTENDS,
view.get(LookerRefinementResolver.EXTENDS_ALL, []),
)
)
)
extended_explores.update(extends)
if extended_explores: # if it is not empty then add to the original view
raw_explore[LookerRefinementResolver.EXTENDS] = list(extended_explores)
def apply_explore_refinement(self, raw_view: dict) -> dict:
"""
In explore refinement `extends` parameter is additive.
Refer looker refinement document: https://cloud.google.com/looker/docs/lookml-refinements#additive
"""
assert raw_view.get(LookerRefinementResolver.NAME) is not None
raw_view_name: str = raw_view[LookerRefinementResolver.NAME]
if self.should_skip_processing(raw_view_name):
return raw_view
if raw_view_name in self.explore_refinement_cache:
logger.debug(
f"Returning applied refined explore {raw_view_name} from cache"
)
return self.explore_refinement_cache[raw_view_name]
logger.debug(f"Processing refinement for explore {raw_view_name}")
refinement_explore: List[dict] = self.get_refinements(
self.looker_model.explores, raw_view_name
)
self.add_extended_explore(raw_view, refinement_explore)
self.explore_refinement_cache[raw_view_name] = raw_view
return self.explore_refinement_cache[raw_view_name]
VIEW_LANGUAGE_LOOKML: str = "lookml"
VIEW_LANGUAGE_SQL: str = "sql"
@ -821,6 +1077,7 @@ class LookerView:
connection: LookerConnectionDefinition,
looker_viewfile: LookerViewFile,
looker_viewfile_loader: LookerViewFileLoader,
looker_refinement_resolver: LookerRefinementResolver,
reporter: LookMLSourceReport,
max_file_snippet_length: int,
parse_table_names_from_sql: bool = False,
@ -839,6 +1096,7 @@ class LookerView:
connection=connection,
looker_viewfile=looker_viewfile,
looker_viewfile_loader=looker_viewfile_loader,
looker_refinement_resolver=looker_refinement_resolver,
field="sql_table_name",
reporter=reporter,
)
@ -855,6 +1113,7 @@ class LookerView:
connection=connection,
looker_viewfile=looker_viewfile,
looker_viewfile_loader=looker_viewfile_loader,
looker_refinement_resolver=looker_refinement_resolver,
field="derived_table",
reporter=reporter,
)
@ -1081,6 +1340,7 @@ class LookerView:
connection: LookerConnectionDefinition,
looker_viewfile: LookerViewFile,
looker_viewfile_loader: LookerViewFileLoader,
looker_refinement_resolver: LookerRefinementResolver,
target_view_name: str,
reporter: LookMLSourceReport,
) -> Optional[dict]:
@ -1088,7 +1348,7 @@ class LookerView:
for raw_view in looker_viewfile.views:
raw_view_name = raw_view["name"]
if raw_view_name == target_view_name:
return raw_view
return looker_refinement_resolver.apply_view_refinement(raw_view)
# Or, it could live in one of the imports.
view = _find_view_from_resolved_includes(
@ -1099,7 +1359,7 @@ class LookerView:
reporter,
)
if view:
return view[1]
return looker_refinement_resolver.apply_view_refinement(view[1])
else:
logger.warning(
f"failed to resolve view {target_view_name} included from {looker_viewfile.absolute_file_path}"
@ -1114,6 +1374,7 @@ class LookerView:
connection: LookerConnectionDefinition,
looker_viewfile: LookerViewFile,
looker_viewfile_loader: LookerViewFileLoader,
looker_refinement_resolver: LookerRefinementResolver,
field: str,
reporter: LookMLSourceReport,
) -> Optional[Any]:
@ -1131,7 +1392,12 @@ class LookerView:
for extend in reversed(extends):
assert extend != view_name, "a view cannot extend itself"
extend_view = LookerView.resolve_extends_view_name(
connection, looker_viewfile, looker_viewfile_loader, extend, reporter
connection,
looker_viewfile,
looker_viewfile_loader,
looker_refinement_resolver,
extend,
reporter,
)
if not extend_view:
raise NameError(
@ -1542,7 +1808,7 @@ class LookMLSource(StatefulIngestionSourceBase):
# we don't have a base_folder, so we need to clone the repo and process it locally
start_time = datetime.now()
git_clone = GitClone(tmp_dir)
# github info deploy key is always populated
# Github info deploy key is always populated
assert self.source_config.git_info.deploy_key
assert self.source_config.git_info.repo_ssh_locator
checkout_dir = git_clone.clone(
@ -1726,11 +1992,28 @@ class LookMLSource(StatefulIngestionSourceBase):
self.reporter.report_models_dropped(model_name)
continue
explore_reachable_views: Set[ProjectInclude] = set()
explore_reachable_views: Set[str] = set()
looker_refinement_resolver: LookerRefinementResolver = (
LookerRefinementResolver(
looker_model=model,
connection_definition=connectionDefinition,
looker_viewfile_loader=viewfile_loader,
source_config=self.source_config,
reporter=self.reporter,
)
)
if self.source_config.emit_reachable_views_only:
model_explores_map = {d["name"]: d for d in model.explores}
for explore_dict in model.explores:
try:
if LookerRefinementResolver.is_refinement(explore_dict["name"]):
continue
explore_dict = (
looker_refinement_resolver.apply_explore_refinement(
explore_dict
)
)
explore: LookerExplore = LookerExplore.from_dict(
model_name,
explore_dict,
@ -1741,7 +2024,7 @@ class LookMLSource(StatefulIngestionSourceBase):
)
if explore.upstream_views:
for view_name in explore.upstream_views:
explore_reachable_views.add(view_name)
explore_reachable_views.add(view_name.include)
except Exception as e:
self.reporter.report_warning(
f"{model}.explores",
@ -1769,23 +2052,28 @@ class LookMLSource(StatefulIngestionSourceBase):
connection=connectionDefinition,
reporter=self.reporter,
)
if looker_viewfile is not None:
for raw_view in looker_viewfile.views:
raw_view_name = raw_view["name"]
if LookerRefinementResolver.is_refinement(raw_view_name):
continue
if (
self.source_config.emit_reachable_views_only
and ProjectInclude(_BASE_PROJECT_NAME, raw_view["name"])
not in explore_reachable_views
and raw_view_name not in explore_reachable_views
):
logger.debug(
f"view {raw_view['name']} is not reachable from an explore, skipping.."
)
self.reporter.report_unreachable_view_dropped(
raw_view["name"]
f"view {raw_view_name} is not reachable from an explore, skipping.."
)
self.reporter.report_unreachable_view_dropped(raw_view_name)
continue
self.reporter.report_views_scanned()
try:
raw_view = looker_refinement_resolver.apply_view_refinement(
raw_view=raw_view,
)
maybe_looker_view = LookerView.from_looker_dict(
include.project
if include.project != _BASE_PROJECT_NAME
@ -1795,6 +2083,7 @@ class LookMLSource(StatefulIngestionSourceBase):
connectionDefinition,
looker_viewfile,
viewfile_loader,
looker_refinement_resolver,
self.reporter,
self.source_config.max_file_snippet_length,
self.source_config.parse_table_names_from_sql,
@ -1809,6 +2098,7 @@ class LookMLSource(StatefulIngestionSourceBase):
f"unable to load Looker view {raw_view}: {repr(e)}",
)
continue
if maybe_looker_view:
if self.source_config.view_pattern.allowed(
maybe_looker_view.id.view_name
@ -1821,6 +2111,9 @@ class LookMLSource(StatefulIngestionSourceBase):
maybe_looker_view.id.view_name
] = (model_name, model.connection)
# first time we are discovering this view
logger.debug(
f"Generating MCP for view {raw_view['name']}"
)
mce = self._build_dataset_mce(maybe_looker_view)
workunit = MetadataWorkUnit(
id=f"lookml-view-{maybe_looker_view.id}",

View File

@ -1648,5 +1648,147 @@
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
},
{
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
"urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)",
"aspects": [
{
"com.linkedin.pegasus2avro.common.BrowsePaths": {
"paths": [
"/prod/looker/lkml_samples/views"
]
}
},
{
"com.linkedin.pegasus2avro.common.Status": {
"removed": false
}
},
{
"com.linkedin.pegasus2avro.dataset.UpstreamLineage": {
"upstreams": [
{
"auditStamp": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
},
"dataset": "urn:li:dataset:(urn:li:dataPlatform:conn,.flightstats.accidents,PROD)",
"type": "VIEW"
}
],
"fineGrainedLineages": [
{
"upstreamType": "FIELD_SET",
"upstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.flightstats.accidents,PROD),id)"
],
"downstreamType": "FIELD",
"downstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD),id)"
],
"confidenceScore": 1.0
}
]
}
},
{
"com.linkedin.pegasus2avro.schema.SchemaMetadata": {
"schemaName": "flights",
"platform": "urn:li:dataPlatform:looker",
"version": 0,
"created": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
},
"lastModified": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
},
"hash": "",
"platformSchema": {
"com.linkedin.pegasus2avro.schema.OtherSchema": {
"rawSchema": ""
}
},
"fields": [
{
"fieldPath": "id",
"nullable": false,
"description": "",
"label": "id",
"type": {
"type": {
"com.linkedin.pegasus2avro.schema.NumberType": {}
}
},
"nativeDataType": "number",
"recursive": false,
"globalTags": {
"tags": [
{
"tag": "urn:li:tag:Dimension"
}
]
},
"isPartOfKey": true
}
],
"primaryKeys": [
"id"
]
}
},
{
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
"customProperties": {
"looker.file.path": "flights.view.lkml"
},
"name": "flights",
"tags": []
}
}
]
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)",
"changeType": "UPSERT",
"aspectName": "subTypes",
"aspect": {
"json": {
"typeNames": [
"View"
]
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)",
"changeType": "UPSERT",
"aspectName": "viewProperties",
"aspect": {
"json": {
"materialized": false,
"viewLogic": "view: flights {\n sql_table_name: flightstats.accidents ;;\n\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: number\n sql: ${TABLE}.id ;;\n }\n}\n\n# override type of id parameter\nview: +flights {\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: string\n sql: ${TABLE}.id ;;\n }\n}",
"viewLanguage": "lookml"
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
}
]

View File

@ -0,0 +1,26 @@
connection: "db-connection"
# include all the views
include: "/views/book.view"
include: "/views/book_refinement_2.view"
include: "/views/extend_book.view"
include: "/views/order.view"
datagroup: model_default_datagroup {
# sql_trigger: SELECT MAX(id) FROM etl_log;;
max_cache_age: "1 hour"
}
persist_with: model_default_datagroup
explore: order {}
explore: book {}
explore: +book {
extends: [order]
}
explore: extend_book {}
explore: issue_history {}

View File

@ -0,0 +1,11 @@
connection: "db-connection"
# include all the views
include: "/views/book.view"
include: "/views/issue_history.view"
explore: book_with_additional_properties {
view_name: book
}
explore: issue_history {}

View File

@ -0,0 +1,20 @@
view: book {
sql_table_name: public.book ;;
dimension: name {
type: string
sql: ${TABLE}."name" ;;
}
measure: count {
type: count
drill_fields: [name]
}
}
view: +book {
dimension: date {
type: string
sql: ${TABLE}."date" ;;
}
}

View File

@ -0,0 +1,8 @@
include: "book.view"
view: +book {
dimension: issue_date {
type: string
sql: ${TABLE}."date" ;;
}
}

View File

@ -0,0 +1,9 @@
include: "book.view"
include: "/views/book_refinement_1.view"
view: +book {
dimension: issue_date {
type: number
sql: ${TABLE}."date" ;;
}
}

View File

@ -0,0 +1,13 @@
include: "book.view"
view: +book {
dimension: issue_date_3 {
type: number
sql: ${TABLE}."date" ;;
}
}
view: extend_book {
extends: [book]
}

View File

@ -0,0 +1,35 @@
include: "book.view"
view: issue_history {
sql_table_name: public.issue_history ;;
dimension: book_name {
type: string
sql: ${TABLE}."book_name" ;;
}
dimension: user_name {
type: string
sql: ${TABLE}."user_name" ;;
}
measure: count {
type: count
drill_fields: [book_name, user_name]
}
}
view: +book {
dimension: issue_date_2 {
type: string
sql: ${TABLE}."date" ;;
}
}
view: +book {
dimension: issue_date_4 {
type: string
sql: ${TABLE}."date" ;;
}
}

View File

@ -0,0 +1,14 @@
view: order {
sql_table_name: public.order ;;
dimension: order_id {
type: number
sql: ${TABLE}."order_id" ;;
}
dimension: book_id {
type: number
sql: ${TABLE}."book_id" ;;
}
}

View File

@ -7,6 +7,8 @@ include: "liquid.view.lkml"
include: "ability.view.lkml"
include: "dataset_owners.explore.lkml"
include: "native_derived_table.view.lkml"
include: "owners_refinement.view.lkml"
include: "flights.view.lkml"
explore: aliased_explore {
from: my_view

View File

@ -0,0 +1,20 @@
view: flights {
sql_table_name: flightstats.accidents ;;
dimension: id {
label: "id"
primary_key: yes
type: number
sql: ${TABLE}.id ;;
}
}
# override type of id parameter
view: +flights {
dimension: id {
label: "id"
primary_key: yes
type: string
sql: ${TABLE}.id ;;
}
}

View File

@ -0,0 +1,7 @@
# File was added to test cross-file refinement resolution
view: +owners {
dimension: has_owner_name {
type: yesno
sql: ${TABLE}.owner_name::string is not null;;
}
}

View File

@ -1649,6 +1649,148 @@
"runId": "lookml-test"
}
},
{
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
"urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)",
"aspects": [
{
"com.linkedin.pegasus2avro.common.BrowsePaths": {
"paths": [
"/prod/looker/lkml_samples/views"
]
}
},
{
"com.linkedin.pegasus2avro.common.Status": {
"removed": false
}
},
{
"com.linkedin.pegasus2avro.dataset.UpstreamLineage": {
"upstreams": [
{
"auditStamp": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
},
"dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-foo.flightstats.accidents,PROD)",
"type": "VIEW"
}
],
"fineGrainedLineages": [
{
"upstreamType": "FIELD_SET",
"upstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,project-foo.flightstats.accidents,PROD),id)"
],
"downstreamType": "FIELD",
"downstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD),id)"
],
"confidenceScore": 1.0
}
]
}
},
{
"com.linkedin.pegasus2avro.schema.SchemaMetadata": {
"schemaName": "flights",
"platform": "urn:li:dataPlatform:looker",
"version": 0,
"created": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
},
"lastModified": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
},
"hash": "",
"platformSchema": {
"com.linkedin.pegasus2avro.schema.OtherSchema": {
"rawSchema": ""
}
},
"fields": [
{
"fieldPath": "id",
"nullable": false,
"description": "",
"label": "id",
"type": {
"type": {
"com.linkedin.pegasus2avro.schema.NumberType": {}
}
},
"nativeDataType": "number",
"recursive": false,
"globalTags": {
"tags": [
{
"tag": "urn:li:tag:Dimension"
}
]
},
"isPartOfKey": true
}
],
"primaryKeys": [
"id"
]
}
},
{
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
"customProperties": {
"looker.file.path": "flights.view.lkml"
},
"name": "flights",
"tags": []
}
}
]
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)",
"changeType": "UPSERT",
"aspectName": "subTypes",
"aspect": {
"json": {
"typeNames": [
"View"
]
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)",
"changeType": "UPSERT",
"aspectName": "viewProperties",
"aspect": {
"json": {
"materialized": false,
"viewLogic": "view: flights {\n sql_table_name: flightstats.accidents ;;\n\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: number\n sql: ${TABLE}.id ;;\n }\n}\n\n# override type of id parameter\nview: +flights {\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: string\n sql: ${TABLE}.id ;;\n }\n}",
"viewLanguage": "lookml"
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
},
{
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {

View File

@ -1649,6 +1649,148 @@
"runId": "lookml-test"
}
},
{
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
"urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)",
"aspects": [
{
"com.linkedin.pegasus2avro.common.BrowsePaths": {
"paths": [
"/prod/looker/lkml_samples/views"
]
}
},
{
"com.linkedin.pegasus2avro.common.Status": {
"removed": false
}
},
{
"com.linkedin.pegasus2avro.dataset.UpstreamLineage": {
"upstreams": [
{
"auditStamp": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
},
"dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,flightstats.accidents,PROD)",
"type": "VIEW"
}
],
"fineGrainedLineages": [
{
"upstreamType": "FIELD_SET",
"upstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,flightstats.accidents,PROD),id)"
],
"downstreamType": "FIELD",
"downstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD),id)"
],
"confidenceScore": 1.0
}
]
}
},
{
"com.linkedin.pegasus2avro.schema.SchemaMetadata": {
"schemaName": "flights",
"platform": "urn:li:dataPlatform:looker",
"version": 0,
"created": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
},
"lastModified": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
},
"hash": "",
"platformSchema": {
"com.linkedin.pegasus2avro.schema.OtherSchema": {
"rawSchema": ""
}
},
"fields": [
{
"fieldPath": "id",
"nullable": false,
"description": "",
"label": "id",
"type": {
"type": {
"com.linkedin.pegasus2avro.schema.NumberType": {}
}
},
"nativeDataType": "number",
"recursive": false,
"globalTags": {
"tags": [
{
"tag": "urn:li:tag:Dimension"
}
]
},
"isPartOfKey": true
}
],
"primaryKeys": [
"id"
]
}
},
{
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
"customProperties": {
"looker.file.path": "flights.view.lkml"
},
"name": "flights",
"tags": []
}
}
]
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)",
"changeType": "UPSERT",
"aspectName": "subTypes",
"aspect": {
"json": {
"typeNames": [
"View"
]
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)",
"changeType": "UPSERT",
"aspectName": "viewProperties",
"aspect": {
"json": {
"materialized": false,
"viewLogic": "view: flights {\n sql_table_name: flightstats.accidents ;;\n\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: number\n sql: ${TABLE}.id ;;\n }\n}\n\n# override type of id parameter\nview: +flights {\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: string\n sql: ${TABLE}.id ;;\n }\n}",
"viewLanguage": "lookml"
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
},
{
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {

View File

@ -1532,6 +1532,148 @@
"runId": "lookml-test"
}
},
{
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
"urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)",
"aspects": [
{
"com.linkedin.pegasus2avro.common.BrowsePaths": {
"paths": [
"/prod/looker/lkml_samples/views"
]
}
},
{
"com.linkedin.pegasus2avro.common.Status": {
"removed": false
}
},
{
"com.linkedin.pegasus2avro.dataset.UpstreamLineage": {
"upstreams": [
{
"auditStamp": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
},
"dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.flightstats.accidents,PROD)",
"type": "VIEW"
}
],
"fineGrainedLineages": [
{
"upstreamType": "FIELD_SET",
"upstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.flightstats.accidents,PROD),id)"
],
"downstreamType": "FIELD",
"downstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD),id)"
],
"confidenceScore": 1.0
}
]
}
},
{
"com.linkedin.pegasus2avro.schema.SchemaMetadata": {
"schemaName": "flights",
"platform": "urn:li:dataPlatform:looker",
"version": 0,
"created": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
},
"lastModified": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
},
"hash": "",
"platformSchema": {
"com.linkedin.pegasus2avro.schema.OtherSchema": {
"rawSchema": ""
}
},
"fields": [
{
"fieldPath": "id",
"nullable": false,
"description": "",
"label": "id",
"type": {
"type": {
"com.linkedin.pegasus2avro.schema.NumberType": {}
}
},
"nativeDataType": "number",
"recursive": false,
"globalTags": {
"tags": [
{
"tag": "urn:li:tag:Dimension"
}
]
},
"isPartOfKey": true
}
],
"primaryKeys": [
"id"
]
}
},
{
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
"customProperties": {
"looker.file.path": "flights.view.lkml"
},
"name": "flights",
"tags": []
}
}
]
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)",
"changeType": "UPSERT",
"aspectName": "subTypes",
"aspect": {
"json": {
"typeNames": [
"View"
]
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)",
"changeType": "UPSERT",
"aspectName": "viewProperties",
"aspect": {
"json": {
"materialized": false,
"viewLogic": "view: flights {\n sql_table_name: flightstats.accidents ;;\n\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: number\n sql: ${TABLE}.id ;;\n }\n}\n\n# override type of id parameter\nview: +flights {\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: string\n sql: ${TABLE}.id ;;\n }\n}",
"viewLanguage": "lookml"
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
},
{
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {

View File

@ -387,6 +387,51 @@
"runId": "lookml-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
"json": {
"removed": true
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
"json": {
"removed": true
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
"json": {
"removed": true
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)",
@ -402,6 +447,81 @@
"runId": "lookml-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
"json": {
"removed": true
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
"json": {
"removed": true
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
"json": {
"removed": true
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
"json": {
"removed": true
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
"json": {
"removed": true
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)",
@ -431,110 +551,5 @@
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
"json": {
"removed": true
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
"json": {
"removed": true
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
"json": {
"removed": true
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
"json": {
"removed": true
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
"json": {
"removed": true
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
"json": {
"removed": true
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
"json": {
"removed": true
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
}
]

View File

@ -1649,6 +1649,148 @@
"runId": "lookml-test"
}
},
{
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
"urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)",
"aspects": [
{
"com.linkedin.pegasus2avro.common.BrowsePaths": {
"paths": [
"/prod/looker/lkml_samples/views"
]
}
},
{
"com.linkedin.pegasus2avro.common.Status": {
"removed": false
}
},
{
"com.linkedin.pegasus2avro.dataset.UpstreamLineage": {
"upstreams": [
{
"auditStamp": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
},
"dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.flightstats.accidents,PROD)",
"type": "VIEW"
}
],
"fineGrainedLineages": [
{
"upstreamType": "FIELD_SET",
"upstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.flightstats.accidents,PROD),id)"
],
"downstreamType": "FIELD",
"downstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD),id)"
],
"confidenceScore": 1.0
}
]
}
},
{
"com.linkedin.pegasus2avro.schema.SchemaMetadata": {
"schemaName": "flights",
"platform": "urn:li:dataPlatform:looker",
"version": 0,
"created": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
},
"lastModified": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
},
"hash": "",
"platformSchema": {
"com.linkedin.pegasus2avro.schema.OtherSchema": {
"rawSchema": ""
}
},
"fields": [
{
"fieldPath": "id",
"nullable": false,
"description": "",
"label": "id",
"type": {
"type": {
"com.linkedin.pegasus2avro.schema.NumberType": {}
}
},
"nativeDataType": "number",
"recursive": false,
"globalTags": {
"tags": [
{
"tag": "urn:li:tag:Dimension"
}
]
},
"isPartOfKey": true
}
],
"primaryKeys": [
"id"
]
}
},
{
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
"customProperties": {
"looker.file.path": "flights.view.lkml"
},
"name": "flights",
"tags": []
}
}
]
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)",
"changeType": "UPSERT",
"aspectName": "subTypes",
"aspect": {
"json": {
"typeNames": [
"View"
]
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)",
"changeType": "UPSERT",
"aspectName": "viewProperties",
"aspect": {
"json": {
"materialized": false,
"viewLogic": "view: flights {\n sql_table_name: flightstats.accidents ;;\n\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: number\n sql: ${TABLE}.id ;;\n }\n}\n\n# override type of id parameter\nview: +flights {\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: string\n sql: ${TABLE}.id ;;\n }\n}",
"viewLanguage": "lookml"
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
},
{
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {

View File

@ -1649,6 +1649,148 @@
"runId": "lookml-test"
}
},
{
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
"urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)",
"aspects": [
{
"com.linkedin.pegasus2avro.common.BrowsePaths": {
"paths": [
"/prod/looker/lkml_samples/views"
]
}
},
{
"com.linkedin.pegasus2avro.common.Status": {
"removed": false
}
},
{
"com.linkedin.pegasus2avro.dataset.UpstreamLineage": {
"upstreams": [
{
"auditStamp": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
},
"dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,warehouse.default_db.flightstats.accidents,DEV)",
"type": "VIEW"
}
],
"fineGrainedLineages": [
{
"upstreamType": "FIELD_SET",
"upstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,warehouse.default_db.flightstats.accidents,DEV),id)"
],
"downstreamType": "FIELD",
"downstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD),id)"
],
"confidenceScore": 1.0
}
]
}
},
{
"com.linkedin.pegasus2avro.schema.SchemaMetadata": {
"schemaName": "flights",
"platform": "urn:li:dataPlatform:looker",
"version": 0,
"created": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
},
"lastModified": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
},
"hash": "",
"platformSchema": {
"com.linkedin.pegasus2avro.schema.OtherSchema": {
"rawSchema": ""
}
},
"fields": [
{
"fieldPath": "id",
"nullable": false,
"description": "",
"label": "id",
"type": {
"type": {
"com.linkedin.pegasus2avro.schema.NumberType": {}
}
},
"nativeDataType": "number",
"recursive": false,
"globalTags": {
"tags": [
{
"tag": "urn:li:tag:Dimension"
}
]
},
"isPartOfKey": true
}
],
"primaryKeys": [
"id"
]
}
},
{
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
"customProperties": {
"looker.file.path": "flights.view.lkml"
},
"name": "flights",
"tags": []
}
}
]
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)",
"changeType": "UPSERT",
"aspectName": "subTypes",
"aspect": {
"json": {
"typeNames": [
"View"
]
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)",
"changeType": "UPSERT",
"aspectName": "viewProperties",
"aspect": {
"json": {
"materialized": false,
"viewLogic": "view: flights {\n sql_table_name: flightstats.accidents ;;\n\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: number\n sql: ${TABLE}.id ;;\n }\n}\n\n# override type of id parameter\nview: +flights {\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: string\n sql: ${TABLE}.id ;;\n }\n}",
"viewLanguage": "lookml"
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
},
{
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {

View File

@ -1661,6 +1661,149 @@
"runId": "lookml-test"
}
},
{
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
"urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)",
"aspects": [
{
"com.linkedin.pegasus2avro.common.BrowsePaths": {
"paths": [
"/prod/looker/lkml_samples/views"
]
}
},
{
"com.linkedin.pegasus2avro.common.Status": {
"removed": false
}
},
{
"com.linkedin.pegasus2avro.dataset.UpstreamLineage": {
"upstreams": [
{
"auditStamp": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
},
"dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.flightstats.accidents,PROD)",
"type": "VIEW"
}
],
"fineGrainedLineages": [
{
"upstreamType": "FIELD_SET",
"upstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.flightstats.accidents,PROD),id)"
],
"downstreamType": "FIELD",
"downstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD),id)"
],
"confidenceScore": 1.0
}
]
}
},
{
"com.linkedin.pegasus2avro.schema.SchemaMetadata": {
"schemaName": "flights",
"platform": "urn:li:dataPlatform:looker",
"version": 0,
"created": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
},
"lastModified": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
},
"hash": "",
"platformSchema": {
"com.linkedin.pegasus2avro.schema.OtherSchema": {
"rawSchema": ""
}
},
"fields": [
{
"fieldPath": "id",
"nullable": false,
"description": "",
"label": "id",
"type": {
"type": {
"com.linkedin.pegasus2avro.schema.NumberType": {}
}
},
"nativeDataType": "number",
"recursive": false,
"globalTags": {
"tags": [
{
"tag": "urn:li:tag:Dimension"
}
]
},
"isPartOfKey": true
}
],
"primaryKeys": [
"id"
]
}
},
{
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
"customProperties": {
"looker.file.path": "flights.view.lkml"
},
"externalUrl": "https://github.com/datahub/looker-demo/blob/master/flights.view.lkml",
"name": "flights",
"tags": []
}
}
]
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)",
"changeType": "UPSERT",
"aspectName": "subTypes",
"aspect": {
"json": {
"typeNames": [
"View"
]
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)",
"changeType": "UPSERT",
"aspectName": "viewProperties",
"aspect": {
"json": {
"materialized": false,
"viewLogic": "view: flights {\n sql_table_name: flightstats.accidents ;;\n\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: number\n sql: ${TABLE}.id ;;\n }\n}\n\n# override type of id parameter\nview: +flights {\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: string\n sql: ${TABLE}.id ;;\n }\n}",
"viewLanguage": "lookml"
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
},
{
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {

View File

@ -0,0 +1,706 @@
[
{
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
"urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_1.view.book,PROD)",
"aspects": [
{
"com.linkedin.pegasus2avro.common.BrowsePaths": {
"paths": [
"/prod/looker/lkml_refinement_sample1/views"
]
}
},
{
"com.linkedin.pegasus2avro.common.Status": {
"removed": false
}
},
{
"com.linkedin.pegasus2avro.dataset.UpstreamLineage": {
"upstreams": [
{
"auditStamp": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
},
"dataset": "urn:li:dataset:(urn:li:dataPlatform:conn,.public.book,PROD)",
"type": "VIEW"
}
],
"fineGrainedLineages": [
{
"upstreamType": "FIELD_SET",
"upstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.public.book,PROD),name)"
],
"downstreamType": "FIELD",
"downstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_1.view.book,PROD),name)"
],
"confidenceScore": 1.0
},
{
"upstreamType": "FIELD_SET",
"upstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.public.book,PROD),date)"
],
"downstreamType": "FIELD",
"downstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_1.view.book,PROD),date)"
],
"confidenceScore": 1.0
},
{
"upstreamType": "FIELD_SET",
"upstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.public.book,PROD),date)"
],
"downstreamType": "FIELD",
"downstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_1.view.book,PROD),issue_date)"
],
"confidenceScore": 1.0
},
{
"upstreamType": "FIELD_SET",
"upstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.public.book,PROD),date)"
],
"downstreamType": "FIELD",
"downstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_1.view.book,PROD),issue_date_3)"
],
"confidenceScore": 1.0
}
]
}
},
{
"com.linkedin.pegasus2avro.schema.SchemaMetadata": {
"schemaName": "book",
"platform": "urn:li:dataPlatform:looker",
"version": 0,
"created": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
},
"lastModified": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
},
"hash": "",
"platformSchema": {
"com.linkedin.pegasus2avro.schema.OtherSchema": {
"rawSchema": ""
}
},
"fields": [
{
"fieldPath": "name",
"nullable": false,
"description": "",
"label": "",
"type": {
"type": {
"com.linkedin.pegasus2avro.schema.StringType": {}
}
},
"nativeDataType": "string",
"recursive": false,
"globalTags": {
"tags": [
{
"tag": "urn:li:tag:Dimension"
}
]
},
"isPartOfKey": false
},
{
"fieldPath": "date",
"nullable": false,
"description": "",
"label": "",
"type": {
"type": {
"com.linkedin.pegasus2avro.schema.StringType": {}
}
},
"nativeDataType": "string",
"recursive": false,
"globalTags": {
"tags": [
{
"tag": "urn:li:tag:Dimension"
}
]
},
"isPartOfKey": false
},
{
"fieldPath": "issue_date",
"nullable": false,
"description": "",
"label": "",
"type": {
"type": {
"com.linkedin.pegasus2avro.schema.NumberType": {}
}
},
"nativeDataType": "number",
"recursive": false,
"globalTags": {
"tags": [
{
"tag": "urn:li:tag:Dimension"
}
]
},
"isPartOfKey": false
},
{
"fieldPath": "issue_date_3",
"nullable": false,
"description": "",
"label": "",
"type": {
"type": {
"com.linkedin.pegasus2avro.schema.NumberType": {}
}
},
"nativeDataType": "number",
"recursive": false,
"globalTags": {
"tags": [
{
"tag": "urn:li:tag:Dimension"
}
]
},
"isPartOfKey": false
},
{
"fieldPath": "count",
"nullable": false,
"description": "",
"label": "",
"type": {
"type": {
"com.linkedin.pegasus2avro.schema.NumberType": {}
}
},
"nativeDataType": "count",
"recursive": false,
"globalTags": {
"tags": [
{
"tag": "urn:li:tag:Measure"
}
]
},
"isPartOfKey": false
}
],
"primaryKeys": []
}
},
{
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
"customProperties": {
"looker.file.path": "views/book.view.lkml"
},
"name": "book",
"tags": []
}
}
]
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_1.view.book,PROD)",
"changeType": "UPSERT",
"aspectName": "subTypes",
"aspect": {
"json": {
"typeNames": [
"View"
]
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_1.view.book,PROD)",
"changeType": "UPSERT",
"aspectName": "viewProperties",
"aspect": {
"json": {
"materialized": false,
"viewLogic": "view: book {\n sql_table_name: public.book ;;\n\n dimension: name {\n type: string\n sql: ${TABLE}.\"name\" ;;\n }\n\n measure: count {\n type: count\n drill_fields: [name]\n }\n}\n\nview: +book {\n dimension: date {\n type: string\n sql: ${TABLE}.\"date\" ;;\n }\n}\n",
"viewLanguage": "lookml"
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
},
{
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
"urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_1.view.extend_book,PROD)",
"aspects": [
{
"com.linkedin.pegasus2avro.common.BrowsePaths": {
"paths": [
"/prod/looker/lkml_refinement_sample1/views"
]
}
},
{
"com.linkedin.pegasus2avro.common.Status": {
"removed": false
}
},
{
"com.linkedin.pegasus2avro.dataset.UpstreamLineage": {
"upstreams": [
{
"auditStamp": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
},
"dataset": "urn:li:dataset:(urn:li:dataPlatform:conn,.public.book,PROD)",
"type": "VIEW"
}
]
}
},
{
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
"customProperties": {
"looker.file.path": "views/extend_book.view.lkml"
},
"name": "extend_book",
"tags": []
}
}
]
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_1.view.extend_book,PROD)",
"changeType": "UPSERT",
"aspectName": "subTypes",
"aspect": {
"json": {
"typeNames": [
"View"
]
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_1.view.extend_book,PROD)",
"changeType": "UPSERT",
"aspectName": "viewProperties",
"aspect": {
"json": {
"materialized": false,
"viewLogic": "include: \"book.view\"\n\nview: +book {\n dimension: issue_date_3 {\n type: number\n sql: ${TABLE}.\"date\" ;;\n }\n}\n\n\nview: extend_book {\n extends: [book]\n}\n",
"viewLanguage": "lookml"
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
},
{
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
"urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_1.view.order,PROD)",
"aspects": [
{
"com.linkedin.pegasus2avro.common.BrowsePaths": {
"paths": [
"/prod/looker/lkml_refinement_sample1/views"
]
}
},
{
"com.linkedin.pegasus2avro.common.Status": {
"removed": false
}
},
{
"com.linkedin.pegasus2avro.dataset.UpstreamLineage": {
"upstreams": [
{
"auditStamp": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
},
"dataset": "urn:li:dataset:(urn:li:dataPlatform:conn,.public.order,PROD)",
"type": "VIEW"
}
],
"fineGrainedLineages": [
{
"upstreamType": "FIELD_SET",
"upstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.public.order,PROD),order_id)"
],
"downstreamType": "FIELD",
"downstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_1.view.order,PROD),order_id)"
],
"confidenceScore": 1.0
},
{
"upstreamType": "FIELD_SET",
"upstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.public.order,PROD),book_id)"
],
"downstreamType": "FIELD",
"downstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_1.view.order,PROD),book_id)"
],
"confidenceScore": 1.0
}
]
}
},
{
"com.linkedin.pegasus2avro.schema.SchemaMetadata": {
"schemaName": "order",
"platform": "urn:li:dataPlatform:looker",
"version": 0,
"created": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
},
"lastModified": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
},
"hash": "",
"platformSchema": {
"com.linkedin.pegasus2avro.schema.OtherSchema": {
"rawSchema": ""
}
},
"fields": [
{
"fieldPath": "order_id",
"nullable": false,
"description": "",
"label": "",
"type": {
"type": {
"com.linkedin.pegasus2avro.schema.NumberType": {}
}
},
"nativeDataType": "number",
"recursive": false,
"globalTags": {
"tags": [
{
"tag": "urn:li:tag:Dimension"
}
]
},
"isPartOfKey": false
},
{
"fieldPath": "book_id",
"nullable": false,
"description": "",
"label": "",
"type": {
"type": {
"com.linkedin.pegasus2avro.schema.NumberType": {}
}
},
"nativeDataType": "number",
"recursive": false,
"globalTags": {
"tags": [
{
"tag": "urn:li:tag:Dimension"
}
]
},
"isPartOfKey": false
}
],
"primaryKeys": []
}
},
{
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
"customProperties": {
"looker.file.path": "views/order.view.lkml"
},
"name": "order",
"tags": []
}
}
]
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_1.view.order,PROD)",
"changeType": "UPSERT",
"aspectName": "subTypes",
"aspect": {
"json": {
"typeNames": [
"View"
]
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_1.view.order,PROD)",
"changeType": "UPSERT",
"aspectName": "viewProperties",
"aspect": {
"json": {
"materialized": false,
"viewLogic": "view: order {\n sql_table_name: public.order ;;\n\n dimension: order_id {\n type: number\n sql: ${TABLE}.\"order_id\" ;;\n }\n\n dimension: book_id {\n type: number\n sql: ${TABLE}.\"book_id\" ;;\n }\n\n}",
"viewLanguage": "lookml"
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
},
{
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
"urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_2.view.issue_history,PROD)",
"aspects": [
{
"com.linkedin.pegasus2avro.common.BrowsePaths": {
"paths": [
"/prod/looker/lkml_refinement_sample1/views"
]
}
},
{
"com.linkedin.pegasus2avro.common.Status": {
"removed": false
}
},
{
"com.linkedin.pegasus2avro.dataset.UpstreamLineage": {
"upstreams": [
{
"auditStamp": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
},
"dataset": "urn:li:dataset:(urn:li:dataPlatform:conn,.public.issue_history,PROD)",
"type": "VIEW"
}
],
"fineGrainedLineages": [
{
"upstreamType": "FIELD_SET",
"upstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.public.issue_history,PROD),book_name)"
],
"downstreamType": "FIELD",
"downstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_2.view.issue_history,PROD),book_name)"
],
"confidenceScore": 1.0
},
{
"upstreamType": "FIELD_SET",
"upstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.public.issue_history,PROD),user_name)"
],
"downstreamType": "FIELD",
"downstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_2.view.issue_history,PROD),user_name)"
],
"confidenceScore": 1.0
}
]
}
},
{
"com.linkedin.pegasus2avro.schema.SchemaMetadata": {
"schemaName": "issue_history",
"platform": "urn:li:dataPlatform:looker",
"version": 0,
"created": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
},
"lastModified": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
},
"hash": "",
"platformSchema": {
"com.linkedin.pegasus2avro.schema.OtherSchema": {
"rawSchema": ""
}
},
"fields": [
{
"fieldPath": "book_name",
"nullable": false,
"description": "",
"label": "",
"type": {
"type": {
"com.linkedin.pegasus2avro.schema.StringType": {}
}
},
"nativeDataType": "string",
"recursive": false,
"globalTags": {
"tags": [
{
"tag": "urn:li:tag:Dimension"
}
]
},
"isPartOfKey": false
},
{
"fieldPath": "user_name",
"nullable": false,
"description": "",
"label": "",
"type": {
"type": {
"com.linkedin.pegasus2avro.schema.StringType": {}
}
},
"nativeDataType": "string",
"recursive": false,
"globalTags": {
"tags": [
{
"tag": "urn:li:tag:Dimension"
}
]
},
"isPartOfKey": false
},
{
"fieldPath": "count",
"nullable": false,
"description": "",
"label": "",
"type": {
"type": {
"com.linkedin.pegasus2avro.schema.NumberType": {}
}
},
"nativeDataType": "count",
"recursive": false,
"globalTags": {
"tags": [
{
"tag": "urn:li:tag:Measure"
}
]
},
"isPartOfKey": false
}
],
"primaryKeys": []
}
},
{
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
"customProperties": {
"looker.file.path": "views/issue_history.view.lkml"
},
"name": "issue_history",
"tags": []
}
}
]
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_2.view.issue_history,PROD)",
"changeType": "UPSERT",
"aspectName": "subTypes",
"aspect": {
"json": {
"typeNames": [
"View"
]
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_2.view.issue_history,PROD)",
"changeType": "UPSERT",
"aspectName": "viewProperties",
"aspect": {
"json": {
"materialized": false,
"viewLogic": "include: \"book.view\"\n\nview: issue_history {\n sql_table_name: public.issue_history ;;\n\n dimension: book_name {\n type: string\n sql: ${TABLE}.\"book_name\" ;;\n }\n\n dimension: user_name {\n type: string\n sql: ${TABLE}.\"user_name\" ;;\n }\n\n measure: count {\n type: count\n drill_fields: [book_name, user_name]\n }\n}\n\n\nview: +book {\n dimension: issue_date_2 {\n type: string\n sql: ${TABLE}.\"date\" ;;\n }\n}\n\nview: +book {\n dimension: issue_date_4 {\n type: string\n sql: ${TABLE}.\"date\" ;;\n }\n}\n",
"viewLanguage": "lookml"
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test"
}
}
]

File diff suppressed because it is too large Load Diff

View File

@ -5,6 +5,7 @@ from unittest import mock
import pydantic
import pytest
from deepdiff import DeepDiff
from freezegun import freeze_time
from looker_sdk.sdk.api40.models import DBConnection
@ -12,6 +13,8 @@ from datahub.configuration.common import PipelineExecutionError
from datahub.ingestion.run.pipeline import Pipeline
from datahub.ingestion.source.file import read_metadata_file
from datahub.ingestion.source.looker.lookml_source import (
LookerModel,
LookerRefinementResolver,
LookMLSource,
LookMLSourceConfig,
)
@ -34,6 +37,30 @@ GMS_PORT = 8080
GMS_SERVER = f"http://localhost:{GMS_PORT}"
def get_default_recipe(output_file_path, base_folder_path):
return {
"run_id": "lookml-test",
"source": {
"type": "lookml",
"config": {
"base_folder": base_folder_path,
"connection_to_platform_map": {"my_connection": "conn"},
"parse_table_names_from_sql": True,
"tag_measures_and_dimensions": False,
"project_name": "lkml_samples",
"model_pattern": {"deny": ["data2"]},
"emit_reachable_views_only": False,
},
},
"sink": {
"type": "file",
"config": {
"filename": f"{output_file_path}",
},
},
}
@freeze_time(FROZEN_TIME)
def test_lookml_ingest(pytestconfig, tmp_path, mock_time):
"""Test backwards compatibility with previous form of config with new flags turned off"""
@ -43,28 +70,11 @@ def test_lookml_ingest(pytestconfig, tmp_path, mock_time):
# Note this config below is known to create "bad" lineage since the config author has not provided enough information
# to resolve relative table names (which are not fully qualified)
# We keep this check just to validate that ingestion doesn't croak on this config
pipeline = Pipeline.create(
{
"run_id": "lookml-test",
"source": {
"type": "lookml",
"config": {
"base_folder": str(test_resources_dir / "lkml_samples"),
"connection_to_platform_map": {"my_connection": "conn"},
"parse_table_names_from_sql": True,
"tag_measures_and_dimensions": False,
"project_name": "lkml_samples",
"model_pattern": {"deny": ["data2"]},
"emit_reachable_views_only": False,
},
},
"sink": {
"type": "file",
"config": {
"filename": f"{tmp_path}/{mce_out_file}",
},
},
}
get_default_recipe(
f"{tmp_path}/{mce_out_file}", f"{test_resources_dir}/lkml_samples"
)
)
pipeline.run()
pipeline.pretty_print_summary()
@ -77,6 +87,206 @@ def test_lookml_ingest(pytestconfig, tmp_path, mock_time):
)
@freeze_time(FROZEN_TIME)
def test_lookml_refinement_ingest(pytestconfig, tmp_path, mock_time):
"""Test backwards compatibility with previous form of config with new flags turned off"""
test_resources_dir = pytestconfig.rootpath / "tests/integration/lookml"
mce_out_file = "refinement_mces_output.json"
# Note this config below is known to create "bad" lineage since the config author has not provided enough information
# to resolve relative table names (which are not fully qualified)
# We keep this check just to validate that ingestion doesn't croak on this config
new_recipe = get_default_recipe(
f"{tmp_path}/{mce_out_file}", f"{test_resources_dir}/lkml_samples"
)
new_recipe["source"]["config"]["process_refinements"] = True
pipeline = Pipeline.create(new_recipe)
pipeline.run()
pipeline.pretty_print_summary()
pipeline.raise_from_status(raise_warnings=True)
golden_path = test_resources_dir / "refinements_ingestion_golden.json"
mce_helpers.check_golden_file(
pytestconfig,
output_path=tmp_path / mce_out_file,
golden_path=golden_path,
)
@freeze_time(FROZEN_TIME)
def test_lookml_refinement_include_order(pytestconfig, tmp_path, mock_time):
test_resources_dir = pytestconfig.rootpath / "tests/integration/lookml"
mce_out_file = "refinement_include_order_mces_output.json"
new_recipe = get_default_recipe(
f"{tmp_path}/{mce_out_file}",
f"{test_resources_dir}/lkml_refinement_samples/sample1",
)
new_recipe["source"]["config"]["process_refinements"] = True
new_recipe["source"]["config"]["project_name"] = "lkml_refinement_sample1"
new_recipe["source"]["config"]["view_naming_pattern"] = {
"pattern": "{project}.{model}.view.{name}"
}
new_recipe["source"]["config"]["connection_to_platform_map"] = {
"db-connection": "conn"
}
pipeline = Pipeline.create(new_recipe)
pipeline.run()
pipeline.pretty_print_summary()
pipeline.raise_from_status(raise_warnings=True)
golden_path = test_resources_dir / "refinement_include_order_golden.json"
mce_helpers.check_golden_file(
pytestconfig,
output_path=tmp_path / mce_out_file,
golden_path=golden_path,
)
@freeze_time(FROZEN_TIME)
def test_lookml_explore_refinement(pytestconfig, tmp_path, mock_time):
looker_model = LookerModel(
explores=[
{
"name": "book",
},
{"name": "+book", "extends__all": [["order"]]},
{"name": "+book", "extends__all": [["transaction"]]},
],
connection=str(),
resolved_includes=[],
includes=[],
)
refinement_resolver = LookerRefinementResolver(
looker_model=looker_model,
looker_viewfile_loader=None, # type: ignore
reporter=None, # type: ignore
source_config=LookMLSourceConfig.parse_obj(
{
"process_refinements": "True",
"base_folder": ".",
"api": {
"base_url": "fake",
"client_id": "fake_client_id",
"client_secret": "fake_client_secret",
},
}
),
connection_definition=None, # type: ignore
)
new_explore: dict = refinement_resolver.apply_explore_refinement(
looker_model.explores[0]
)
assert new_explore.get("extends") is not None
assert new_explore["extends"].sort() == ["order", "transaction"].sort()
@freeze_time(FROZEN_TIME)
def test_lookml_view_merge(pytestconfig, tmp_path, mock_time):
raw_view: dict = {
"sql_table_name": "flightstats.accidents",
"dimensions": [
{
"type": "number",
"primary_key": "yes",
"sql": '${TABLE}."id"',
"name": "id",
}
],
"name": "flights",
}
refinement_views: List[dict] = [
{
"dimensions": [
{
"type": "string",
"sql": '${TABLE}."air_carrier"',
"name": "air_carrier",
}
],
"name": "+flights",
},
{
"measures": [
{"type": "average", "sql": "${distance}", "name": "distance_avg"},
{
"type": "number",
"sql": "STDDEV(${distance})",
"name": "distance_stddev",
},
],
"dimensions": [
{
"type": "tier",
"sql": "${distance}",
"tiers": [500, 1300],
"name": "distance_tiered2",
},
],
"name": "+flights",
},
{
"dimension_groups": [
{
"type": "duration",
"intervals": ["week", "year"],
"sql_start": '${TABLE}."enrollment_date"',
"sql_end": '${TABLE}."graduation_date"',
"name": "enrolled",
},
],
"name": "+flights",
},
{
"dimensions": [{"type": "string", "sql": '${TABLE}."id"', "name": "id"}],
"name": "+flights",
},
]
merged_view: dict = LookerRefinementResolver.merge_refinements(
raw_view=raw_view, refinement_views=refinement_views
)
expected_view: dict = {
"sql_table_name": "flightstats.accidents",
"dimensions": [
{
"type": "string",
"primary_key": "yes",
"sql": '${TABLE}."id"',
"name": "id",
},
{"type": "string", "sql": '${TABLE}."air_carrier"', "name": "air_carrier"},
{
"type": "tier",
"sql": "${distance}",
"tiers": [500, 1300],
"name": "distance_tiered2",
},
],
"name": "flights",
"measures": [
{"type": "average", "sql": "${distance}", "name": "distance_avg"},
{"type": "number", "sql": "STDDEV(${distance})", "name": "distance_stddev"},
],
"dimension_groups": [
{
"type": "duration",
"intervals": ["week", "year"],
"sql_start": '${TABLE}."enrollment_date"',
"sql_end": '${TABLE}."graduation_date"',
"name": "enrolled",
}
],
}
assert DeepDiff(expected_view, merged_view) == {}
@freeze_time(FROZEN_TIME)
def test_lookml_ingest_offline(pytestconfig, tmp_path, mock_time):
"""New form of config with offline specification of connection defaults"""
@ -100,6 +310,7 @@ def test_lookml_ingest_offline(pytestconfig, tmp_path, mock_time):
"project_name": "lkml_samples",
"model_pattern": {"deny": ["data2"]},
"emit_reachable_views_only": False,
"process_refinements": False,
},
},
"sink": {
@ -144,6 +355,7 @@ def test_lookml_ingest_offline_with_model_deny(pytestconfig, tmp_path, mock_time
"project_name": "lkml_samples",
"model_pattern": {"deny": ["data"]},
"emit_reachable_views_only": False,
"process_refinements": False,
},
},
"sink": {
@ -190,6 +402,7 @@ def test_lookml_ingest_offline_platform_instance(pytestconfig, tmp_path, mock_ti
"project_name": "lkml_samples",
"model_pattern": {"deny": ["data2"]},
"emit_reachable_views_only": False,
"process_refinements": False,
},
},
"sink": {
@ -269,6 +482,7 @@ def ingestion_test(
"parse_table_names_from_sql": True,
"model_pattern": {"deny": ["data2"]},
"emit_reachable_views_only": False,
"process_refinements": False,
},
},
"sink": {
@ -313,6 +527,7 @@ def test_lookml_bad_sql_parser(pytestconfig, tmp_path, mock_time):
"project_name": "lkml_samples",
"sql_parser": "bad.sql.Parser",
"emit_reachable_views_only": False,
"process_refinements": False,
},
},
"sink": {
@ -360,6 +575,7 @@ def test_lookml_git_info(pytestconfig, tmp_path, mock_time):
"model_pattern": {"deny": ["data2"]},
"github_info": {"repo": "datahub/looker-demo", "branch": "master"},
"emit_reachable_views_only": False,
"process_refinements": False,
},
},
"sink": {
@ -412,6 +628,7 @@ def test_reachable_views(pytestconfig, tmp_path, mock_time):
"parse_table_names_from_sql": True,
"project_name": "lkml_samples",
"emit_reachable_views_only": True,
"process_refinements": False,
},
},
"sink": {
@ -473,6 +690,7 @@ def test_hive_platform_drops_ids(pytestconfig, tmp_path, mock_time):
"model_pattern": {"deny": ["data2"]},
"github_info": {"repo": "datahub/looker-demo", "branch": "master"},
"emit_reachable_views_only": False,
"process_refinements": False,
},
},
"sink": {
@ -612,7 +830,7 @@ def test_lookml_ingest_stateful(pytestconfig, tmp_path, mock_time, mock_datahub_
state1.get_urns_not_in(type="dataset", other_checkpoint_state=state2)
)
# the difference in dataset urns are all the views that are not reachable from the model file
assert len(difference_dataset_urns) == 10
assert len(difference_dataset_urns) == 11
deleted_dataset_urns: List[str] = [
"urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)",
"urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)",
@ -624,6 +842,7 @@ def test_lookml_ingest_stateful(pytestconfig, tmp_path, mock_time, mock_datahub_
"urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)",
"urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)",
"urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)",
"urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)",
]
assert sorted(deleted_dataset_urns) == sorted(difference_dataset_urns)