mirror of
https://github.com/datahub-project/datahub.git
synced 2025-12-25 17:08:29 +00:00
feat(ingest/lookml): support complex lookml manifests (#9688)
This commit is contained in:
parent
943bb57cbc
commit
b94d463fe0
@ -149,7 +149,7 @@ looker_common = {
|
||||
# This version of lkml contains a fix for parsing lists in
|
||||
# LookML files with spaces between an item and the following comma.
|
||||
# See https://github.com/joshtemple/lkml/issues/73.
|
||||
"lkml>=1.3.0b5",
|
||||
"lkml>=1.3.4",
|
||||
"sql-metadata==2.2.2",
|
||||
*sqllineage_lib,
|
||||
"GitPython>2",
|
||||
|
||||
@ -0,0 +1,28 @@
|
||||
import pathlib
|
||||
from typing import Union
|
||||
|
||||
import lkml
|
||||
import lkml.simple
|
||||
import lkml.tree
|
||||
|
||||
# Patch lkml to support the manifest.lkml files.
|
||||
# We have to patch both locations because lkml uses a immutable tuple
|
||||
# instead of a list for this type.
|
||||
lkml.simple.PLURAL_KEYS = (
|
||||
*lkml.simple.PLURAL_KEYS,
|
||||
"local_dependency",
|
||||
"remote_dependency",
|
||||
"constant",
|
||||
"override_constant",
|
||||
)
|
||||
lkml.tree.PLURAL_KEYS = lkml.simple.PLURAL_KEYS
|
||||
|
||||
|
||||
def load_lkml(path: Union[str, pathlib.Path]) -> dict:
|
||||
"""Loads a LookML file from disk and returns a dictionary."""
|
||||
|
||||
# Using this method instead of lkml.load directly ensures
|
||||
# that our patches to lkml are applied.
|
||||
|
||||
with open(path, "r") as file:
|
||||
return lkml.load(file)
|
||||
@ -49,6 +49,7 @@ from datahub.ingestion.api.source import MetadataWorkUnitProcessor, SourceCapabi
|
||||
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
||||
from datahub.ingestion.source.common.subtypes import DatasetSubTypes
|
||||
from datahub.ingestion.source.git.git_import import GitClone
|
||||
from datahub.ingestion.source.looker.lkml_patched import load_lkml
|
||||
from datahub.ingestion.source.looker.looker_common import (
|
||||
CORPUSER_DATAHUB,
|
||||
LookerCommonConfig,
|
||||
@ -98,13 +99,6 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
_BASE_PROJECT_NAME = "__BASE"
|
||||
|
||||
# Patch lkml to support the local_dependency and remote_dependency keywords.
|
||||
lkml.simple.PLURAL_KEYS = (
|
||||
*lkml.simple.PLURAL_KEYS,
|
||||
"local_dependency",
|
||||
"remote_dependency",
|
||||
)
|
||||
|
||||
_EXPLORE_FILE_EXTENSION = ".explore.lkml"
|
||||
_VIEW_FILE_EXTENSION = ".view.lkml"
|
||||
_MODEL_FILE_EXTENSION = ".model.lkml"
|
||||
@ -384,10 +378,9 @@ class LookerModel:
|
||||
]
|
||||
for included_file in explore_files:
|
||||
try:
|
||||
with open(included_file, "r") as file:
|
||||
parsed = lkml.load(file)
|
||||
included_explores = parsed.get("explores", [])
|
||||
explores.extend(included_explores)
|
||||
parsed = load_lkml(included_file)
|
||||
included_explores = parsed.get("explores", [])
|
||||
explores.extend(included_explores)
|
||||
except Exception as e:
|
||||
reporter.report_warning(
|
||||
path, f"Failed to load {included_file} due to {e}"
|
||||
@ -514,24 +507,23 @@ class LookerModel:
|
||||
f"Will be loading {included_file}, traversed here via {traversal_path}"
|
||||
)
|
||||
try:
|
||||
with open(included_file, "r") as file:
|
||||
parsed = lkml.load(file)
|
||||
seen_so_far.add(included_file)
|
||||
if "includes" in parsed: # we have more includes to resolve!
|
||||
resolved.extend(
|
||||
LookerModel.resolve_includes(
|
||||
parsed["includes"],
|
||||
resolved_project_name,
|
||||
root_project_name,
|
||||
base_projects_folder,
|
||||
included_file,
|
||||
reporter,
|
||||
seen_so_far,
|
||||
traversal_path=traversal_path
|
||||
+ "."
|
||||
+ pathlib.Path(included_file).stem,
|
||||
)
|
||||
parsed = load_lkml(included_file)
|
||||
seen_so_far.add(included_file)
|
||||
if "includes" in parsed: # we have more includes to resolve!
|
||||
resolved.extend(
|
||||
LookerModel.resolve_includes(
|
||||
parsed["includes"],
|
||||
resolved_project_name,
|
||||
root_project_name,
|
||||
base_projects_folder,
|
||||
included_file,
|
||||
reporter,
|
||||
seen_so_far,
|
||||
traversal_path=traversal_path
|
||||
+ "."
|
||||
+ pathlib.Path(included_file).stem,
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
reporter.report_warning(
|
||||
path, f"Failed to load {included_file} due to {e}"
|
||||
@ -648,21 +640,20 @@ class LookerViewFileLoader:
|
||||
self.reporter.report_failure(path, f"failed to load view file: {e}")
|
||||
return None
|
||||
try:
|
||||
with open(path, "r") as file:
|
||||
logger.debug(f"Loading viewfile {path}")
|
||||
parsed = lkml.load(file)
|
||||
looker_viewfile = LookerViewFile.from_looker_dict(
|
||||
absolute_file_path=path,
|
||||
looker_view_file_dict=parsed,
|
||||
project_name=project_name,
|
||||
root_project_name=self._root_project_name,
|
||||
base_projects_folder=self._base_projects_folder,
|
||||
raw_file_content=raw_file_content,
|
||||
reporter=reporter,
|
||||
)
|
||||
logger.debug(f"adding viewfile for path {path} to the cache")
|
||||
self.viewfile_cache[path] = looker_viewfile
|
||||
return looker_viewfile
|
||||
logger.debug(f"Loading viewfile {path}")
|
||||
parsed = load_lkml(path)
|
||||
looker_viewfile = LookerViewFile.from_looker_dict(
|
||||
absolute_file_path=path,
|
||||
looker_view_file_dict=parsed,
|
||||
project_name=project_name,
|
||||
root_project_name=self._root_project_name,
|
||||
base_projects_folder=self._base_projects_folder,
|
||||
raw_file_content=raw_file_content,
|
||||
reporter=reporter,
|
||||
)
|
||||
logger.debug(f"adding viewfile for path {path} to the cache")
|
||||
self.viewfile_cache[path] = looker_viewfile
|
||||
return looker_viewfile
|
||||
except Exception as e:
|
||||
self.reporter.report_failure(path, f"failed to load view file: {e}")
|
||||
return None
|
||||
@ -1498,17 +1489,16 @@ class LookMLSource(StatefulIngestionSourceBase):
|
||||
)
|
||||
|
||||
def _load_model(self, path: str) -> LookerModel:
|
||||
with open(path, "r") as file:
|
||||
logger.debug(f"Loading model from file {path}")
|
||||
parsed = lkml.load(file)
|
||||
looker_model = LookerModel.from_looker_dict(
|
||||
parsed,
|
||||
_BASE_PROJECT_NAME,
|
||||
self.source_config.project_name,
|
||||
self.base_projects_folder,
|
||||
path,
|
||||
self.reporter,
|
||||
)
|
||||
logger.debug(f"Loading model from file {path}")
|
||||
parsed = load_lkml(path)
|
||||
looker_model = LookerModel.from_looker_dict(
|
||||
parsed,
|
||||
_BASE_PROJECT_NAME,
|
||||
self.source_config.project_name,
|
||||
self.base_projects_folder,
|
||||
path,
|
||||
self.reporter,
|
||||
)
|
||||
return looker_model
|
||||
|
||||
def _platform_names_have_2_parts(self, platform: str) -> bool:
|
||||
@ -1797,8 +1787,7 @@ class LookMLSource(StatefulIngestionSourceBase):
|
||||
def get_manifest_if_present(self, folder: pathlib.Path) -> Optional[LookerManifest]:
|
||||
manifest_file = folder / "manifest.lkml"
|
||||
if manifest_file.exists():
|
||||
with manifest_file.open() as fp:
|
||||
manifest_dict = lkml.load(fp)
|
||||
manifest_dict = load_lkml(manifest_file)
|
||||
|
||||
manifest = LookerManifest(
|
||||
project_name=manifest_dict.get("project_name"),
|
||||
|
||||
@ -0,0 +1,23 @@
|
||||
project_name: "complex-manifest-project"
|
||||
|
||||
constant: CONNECTION_NAME {
|
||||
value: "choose-connection"
|
||||
export: override_required
|
||||
}
|
||||
|
||||
constant: other_variable {
|
||||
value: "other-variable"
|
||||
export: override_required
|
||||
}
|
||||
|
||||
local_dependency: {
|
||||
project: "looker-hub"
|
||||
}
|
||||
|
||||
remote_dependency: remote-proj-1 {
|
||||
override_constant: schema_name {value: "mycorp_prod" }
|
||||
override_constant: choose-connection {value: "snowflake-conn-main"}
|
||||
}
|
||||
|
||||
remote_dependency: remote-proj-2 {
|
||||
}
|
||||
@ -16,6 +16,7 @@ from datahub.ingestion.source.looker.lookml_source import (
|
||||
LookerModel,
|
||||
LookerRefinementResolver,
|
||||
LookMLSourceConfig,
|
||||
load_lkml,
|
||||
)
|
||||
from datahub.metadata.schema_classes import (
|
||||
DatasetSnapshotClass,
|
||||
@ -852,3 +853,14 @@ def test_same_name_views_different_file_path(pytestconfig, tmp_path, mock_time):
|
||||
output_path=tmp_path / mce_out,
|
||||
golden_path=test_resources_dir / mce_out,
|
||||
)
|
||||
|
||||
|
||||
def test_manifest_parser(pytestconfig: pytest.Config) -> None:
|
||||
# This mainly tests that we're permissive enough that we don't crash when parsing the manifest file.
|
||||
# We need the test because we monkeypatch the lkml library.
|
||||
|
||||
test_resources_dir = pytestconfig.rootpath / "tests/integration/lookml"
|
||||
manifest_file = test_resources_dir / "lkml_manifest_samples/complex-manifest.lkml"
|
||||
|
||||
manifest = load_lkml(manifest_file)
|
||||
assert manifest
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user