mirror of
https://github.com/datahub-project/datahub.git
synced 2025-09-25 17:15:09 +00:00
feat(ingest/lookml): correctly handle include directives from imported projects (#7798)
This commit is contained in:
parent
1831b89a86
commit
4f59169566
@ -286,7 +286,7 @@ class LookMLSourceConfig(
|
|||||||
cls, v: Optional[pydantic.DirectoryPath], values: Dict[str, Any]
|
cls, v: Optional[pydantic.DirectoryPath], values: Dict[str, Any]
|
||||||
) -> Optional[pydantic.DirectoryPath]:
|
) -> Optional[pydantic.DirectoryPath]:
|
||||||
if v is None:
|
if v is None:
|
||||||
git_info: Optional[GitInfo] = values.get("git_info", None)
|
git_info: Optional[GitInfo] = values.get("git_info")
|
||||||
if git_info and git_info.deploy_key:
|
if git_info and git_info.deploy_key:
|
||||||
# We have git_info populated correctly, base folder is not needed
|
# We have git_info populated correctly, base folder is not needed
|
||||||
pass
|
pass
|
||||||
@ -342,7 +342,7 @@ class LookerModel:
|
|||||||
def from_looker_dict(
|
def from_looker_dict(
|
||||||
looker_model_dict: dict,
|
looker_model_dict: dict,
|
||||||
base_project_name: str,
|
base_project_name: str,
|
||||||
base_folder: str,
|
root_project_name: Optional[str],
|
||||||
base_projects_folders: Dict[str, pathlib.Path],
|
base_projects_folders: Dict[str, pathlib.Path],
|
||||||
path: str,
|
path: str,
|
||||||
reporter: LookMLSourceReport,
|
reporter: LookMLSourceReport,
|
||||||
@ -353,7 +353,7 @@ class LookerModel:
|
|||||||
resolved_includes = LookerModel.resolve_includes(
|
resolved_includes = LookerModel.resolve_includes(
|
||||||
includes,
|
includes,
|
||||||
base_project_name,
|
base_project_name,
|
||||||
base_folder,
|
root_project_name,
|
||||||
base_projects_folders,
|
base_projects_folders,
|
||||||
path,
|
path,
|
||||||
reporter,
|
reporter,
|
||||||
@ -391,7 +391,7 @@ class LookerModel:
|
|||||||
def resolve_includes(
|
def resolve_includes(
|
||||||
includes: List[str],
|
includes: List[str],
|
||||||
project_name: str,
|
project_name: str,
|
||||||
base_folder: str,
|
root_project_name: Optional[str],
|
||||||
base_projects_folder: Dict[str, pathlib.Path],
|
base_projects_folder: Dict[str, pathlib.Path],
|
||||||
path: str,
|
path: str,
|
||||||
reporter: LookMLSourceReport,
|
reporter: LookMLSourceReport,
|
||||||
@ -403,9 +403,9 @@ class LookerModel:
|
|||||||
For rules on how LookML ``include`` statements are written, see
|
For rules on how LookML ``include`` statements are written, see
|
||||||
https://docs.looker.com/data-modeling/getting-started/ide-folders#wildcard_examples
|
https://docs.looker.com/data-modeling/getting-started/ide-folders#wildcard_examples
|
||||||
"""
|
"""
|
||||||
|
|
||||||
resolved = []
|
resolved = []
|
||||||
for inc in includes:
|
for inc in includes:
|
||||||
resolved_project_name = project_name
|
|
||||||
# Filter out dashboards - we get those through the looker source.
|
# Filter out dashboards - we get those through the looker source.
|
||||||
if (
|
if (
|
||||||
inc.endswith(".dashboard")
|
inc.endswith(".dashboard")
|
||||||
@ -415,14 +415,16 @@ class LookerModel:
|
|||||||
logger.debug(f"include '{inc}' is a dashboard, skipping it")
|
logger.debug(f"include '{inc}' is a dashboard, skipping it")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
resolved_project_name = project_name
|
||||||
|
resolved_project_folder = str(base_projects_folder[project_name])
|
||||||
|
|
||||||
# Massage the looker include into a valid glob wildcard expression
|
# Massage the looker include into a valid glob wildcard expression
|
||||||
if inc.startswith("//"):
|
if inc.startswith("//"):
|
||||||
# remote include, let's see if we have the project checked out locally
|
# remote include, let's see if we have the project checked out locally
|
||||||
(remote_project, project_local_path) = inc[2:].split("/", maxsplit=1)
|
(remote_project, project_local_path) = inc[2:].split("/", maxsplit=1)
|
||||||
if remote_project in base_projects_folder:
|
if remote_project in base_projects_folder:
|
||||||
glob_expr = (
|
resolved_project_folder = str(base_projects_folder[remote_project])
|
||||||
f"{base_projects_folder[remote_project]}/{project_local_path}"
|
glob_expr = f"{resolved_project_folder}/{project_local_path}"
|
||||||
)
|
|
||||||
resolved_project_name = remote_project
|
resolved_project_name = remote_project
|
||||||
else:
|
else:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
@ -430,7 +432,29 @@ class LookerModel:
|
|||||||
)
|
)
|
||||||
continue
|
continue
|
||||||
elif inc.startswith("/"):
|
elif inc.startswith("/"):
|
||||||
glob_expr = f"{base_folder}{inc}"
|
glob_expr = f"{resolved_project_folder}{inc}"
|
||||||
|
|
||||||
|
# The include path is sometimes '/{project_name}/{path_within_project}'
|
||||||
|
# instead of '//{project_name}/{path_within_project}' or '/{path_within_project}'.
|
||||||
|
#
|
||||||
|
# TODO: I can't seem to find any documentation on this pattern, but we definitely
|
||||||
|
# have seen it in the wild. Example from Mozilla's public looker-hub repo:
|
||||||
|
# https://github.com/mozilla/looker-hub/blob/f491ca51ce1add87c338e6723fd49bc6ae4015ca/fenix/explores/activation.explore.lkml#L7
|
||||||
|
# As such, we try to handle it but are as defensive as possible.
|
||||||
|
|
||||||
|
non_base_project_name = project_name
|
||||||
|
if project_name == _BASE_PROJECT_NAME and root_project_name is not None:
|
||||||
|
non_base_project_name = root_project_name
|
||||||
|
if non_base_project_name != _BASE_PROJECT_NAME and inc.startswith(
|
||||||
|
f"/{non_base_project_name}/"
|
||||||
|
):
|
||||||
|
# This might be a local include. Let's make sure that '/{project_name}' doesn't
|
||||||
|
# exist as normal include in the project.
|
||||||
|
if not pathlib.Path(
|
||||||
|
f"{resolved_project_folder}/{non_base_project_name}"
|
||||||
|
).exists():
|
||||||
|
path_within_project = pathlib.Path(*pathlib.Path(inc).parts[2:])
|
||||||
|
glob_expr = f"{resolved_project_folder}/{path_within_project}"
|
||||||
else:
|
else:
|
||||||
# Need to handle a relative path.
|
# Need to handle a relative path.
|
||||||
glob_expr = str(pathlib.Path(path).parent / inc)
|
glob_expr = str(pathlib.Path(path).parent / inc)
|
||||||
@ -484,7 +508,7 @@ class LookerModel:
|
|||||||
LookerModel.resolve_includes(
|
LookerModel.resolve_includes(
|
||||||
parsed["includes"],
|
parsed["includes"],
|
||||||
resolved_project_name,
|
resolved_project_name,
|
||||||
base_folder,
|
root_project_name,
|
||||||
base_projects_folder,
|
base_projects_folder,
|
||||||
included_file,
|
included_file,
|
||||||
reporter,
|
reporter,
|
||||||
@ -524,7 +548,7 @@ class LookerViewFile:
|
|||||||
absolute_file_path: str,
|
absolute_file_path: str,
|
||||||
looker_view_file_dict: dict,
|
looker_view_file_dict: dict,
|
||||||
project_name: str,
|
project_name: str,
|
||||||
base_folder: str,
|
root_project_name: Optional[str],
|
||||||
base_projects_folder: Dict[str, pathlib.Path],
|
base_projects_folder: Dict[str, pathlib.Path],
|
||||||
raw_file_content: str,
|
raw_file_content: str,
|
||||||
reporter: LookMLSourceReport,
|
reporter: LookMLSourceReport,
|
||||||
@ -537,7 +561,7 @@ class LookerViewFile:
|
|||||||
resolved_includes = LookerModel.resolve_includes(
|
resolved_includes = LookerModel.resolve_includes(
|
||||||
includes,
|
includes,
|
||||||
project_name,
|
project_name,
|
||||||
base_folder,
|
root_project_name,
|
||||||
base_projects_folder,
|
base_projects_folder,
|
||||||
absolute_file_path,
|
absolute_file_path,
|
||||||
reporter,
|
reporter,
|
||||||
@ -572,12 +596,12 @@ class LookerViewFileLoader:
|
|||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
base_folder: str,
|
root_project_name: Optional[str],
|
||||||
base_projects_folder: Dict[str, pathlib.Path],
|
base_projects_folder: Dict[str, pathlib.Path],
|
||||||
reporter: LookMLSourceReport,
|
reporter: LookMLSourceReport,
|
||||||
) -> None:
|
) -> None:
|
||||||
self.viewfile_cache: Dict[str, LookerViewFile] = {}
|
self.viewfile_cache: Dict[str, LookerViewFile] = {}
|
||||||
self._base_folder = base_folder
|
self._root_project_name = root_project_name
|
||||||
self._base_projects_folder = base_projects_folder
|
self._base_projects_folder = base_projects_folder
|
||||||
self.reporter = reporter
|
self.reporter = reporter
|
||||||
|
|
||||||
@ -617,7 +641,7 @@ class LookerViewFileLoader:
|
|||||||
absolute_file_path=path,
|
absolute_file_path=path,
|
||||||
looker_view_file_dict=parsed,
|
looker_view_file_dict=parsed,
|
||||||
project_name=project_name,
|
project_name=project_name,
|
||||||
base_folder=self._base_folder,
|
root_project_name=self._root_project_name,
|
||||||
base_projects_folder=self._base_projects_folder,
|
base_projects_folder=self._base_projects_folder,
|
||||||
raw_file_content=raw_file_content,
|
raw_file_content=raw_file_content,
|
||||||
reporter=reporter,
|
reporter=reporter,
|
||||||
@ -1198,7 +1222,7 @@ class LookMLSource(StatefulIngestionSourceBase):
|
|||||||
looker_model = LookerModel.from_looker_dict(
|
looker_model = LookerModel.from_looker_dict(
|
||||||
parsed,
|
parsed,
|
||||||
_BASE_PROJECT_NAME,
|
_BASE_PROJECT_NAME,
|
||||||
str(self.source_config.base_folder),
|
self.source_config.project_name,
|
||||||
self.base_projects_folder,
|
self.base_projects_folder,
|
||||||
path,
|
path,
|
||||||
self.reporter,
|
self.reporter,
|
||||||
@ -1380,12 +1404,9 @@ class LookMLSource(StatefulIngestionSourceBase):
|
|||||||
|
|
||||||
def _get_custom_properties(self, looker_view: LookerView) -> DatasetPropertiesClass:
|
def _get_custom_properties(self, looker_view: LookerView) -> DatasetPropertiesClass:
|
||||||
assert self.source_config.base_folder # this is always filled out
|
assert self.source_config.base_folder # this is always filled out
|
||||||
if looker_view.id.project_name == _BASE_PROJECT_NAME:
|
base_folder = self.base_projects_folder.get(
|
||||||
base_folder = self.source_config.base_folder
|
looker_view.id.project_name, self.source_config.base_folder
|
||||||
else:
|
)
|
||||||
base_folder = self.base_projects_folder.get(
|
|
||||||
looker_view.id.project_name, self.source_config.base_folder
|
|
||||||
)
|
|
||||||
try:
|
try:
|
||||||
file_path = str(
|
file_path = str(
|
||||||
pathlib.Path(looker_view.absolute_file_path).relative_to(
|
pathlib.Path(looker_view.absolute_file_path).relative_to(
|
||||||
@ -1535,6 +1556,7 @@ class LookMLSource(StatefulIngestionSourceBase):
|
|||||||
self.base_projects_folder[
|
self.base_projects_folder[
|
||||||
_BASE_PROJECT_NAME
|
_BASE_PROJECT_NAME
|
||||||
] = self.source_config.base_folder
|
] = self.source_config.base_folder
|
||||||
|
|
||||||
visited_projects: Set[str] = set()
|
visited_projects: Set[str] = set()
|
||||||
|
|
||||||
# We clone everything that we're pointed at.
|
# We clone everything that we're pointed at.
|
||||||
@ -1592,6 +1614,20 @@ class LookMLSource(StatefulIngestionSourceBase):
|
|||||||
|
|
||||||
manifest = self.get_manifest_if_present(project_path)
|
manifest = self.get_manifest_if_present(project_path)
|
||||||
if manifest:
|
if manifest:
|
||||||
|
# Special case handling if the root project has a name in the manifest file.
|
||||||
|
if project_name == _BASE_PROJECT_NAME and manifest.project_name:
|
||||||
|
if (
|
||||||
|
self.source_config.project_name is not None
|
||||||
|
and manifest.project_name != self.source_config.project_name
|
||||||
|
):
|
||||||
|
logger.warning(
|
||||||
|
f"The project name in the manifest file '{manifest.project_name}'"
|
||||||
|
f"does not match the configured project name '{self.source_config.project_name}'. "
|
||||||
|
"This can lead to failures in LookML include resolution and lineage generation."
|
||||||
|
)
|
||||||
|
elif self.source_config.project_name is None:
|
||||||
|
self.source_config.project_name = manifest.project_name
|
||||||
|
|
||||||
# Clone the remote project dependencies.
|
# Clone the remote project dependencies.
|
||||||
for remote_project in manifest.remote_dependencies:
|
for remote_project in manifest.remote_dependencies:
|
||||||
if remote_project.name in project_visited:
|
if remote_project.name in project_visited:
|
||||||
@ -1644,7 +1680,7 @@ class LookMLSource(StatefulIngestionSourceBase):
|
|||||||
assert self.source_config.base_folder
|
assert self.source_config.base_folder
|
||||||
|
|
||||||
viewfile_loader = LookerViewFileLoader(
|
viewfile_loader = LookerViewFileLoader(
|
||||||
str(self.source_config.base_folder),
|
self.source_config.project_name,
|
||||||
self.base_projects_folder,
|
self.base_projects_folder,
|
||||||
self.reporter,
|
self.reporter,
|
||||||
)
|
)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user