mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-11-03 03:59:12 +00:00
Looker - Fix file extension and blob import (#12232)
* Fix file extension and blob import * Fix file extension and blob import
This commit is contained in:
parent
4aab0fd6fb
commit
1ecf5607c7
@ -11,7 +11,9 @@
|
||||
"""
|
||||
.lkml files parser
|
||||
"""
|
||||
import fnmatch
|
||||
import traceback
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
import lkml
|
||||
@ -28,6 +30,8 @@ from metadata.utils.logger import ingestion_logger
|
||||
|
||||
logger = ingestion_logger()
|
||||
|
||||
EXTENSIONS = (".lkml", ".lookml")
|
||||
|
||||
|
||||
class LkmlParser:
|
||||
"""
|
||||
@ -61,6 +65,18 @@ class LkmlParser:
|
||||
|
||||
self.reader = reader
|
||||
|
||||
self._file_tree: Optional[List[Includes]] = None
|
||||
|
||||
@property
|
||||
def file_tree(self) -> List[Includes]:
|
||||
"""
|
||||
Parse the file tree of the repo
|
||||
"""
|
||||
if not self._file_tree:
|
||||
self._file_tree = self.reader.get_tree()
|
||||
|
||||
return self._file_tree or []
|
||||
|
||||
def parse_file(self, path: Includes) -> Optional[List[Includes]]:
|
||||
"""
|
||||
Internal parser. Parse the file and cache the views
|
||||
@ -79,17 +95,7 @@ class LkmlParser:
|
||||
return []
|
||||
|
||||
try:
|
||||
file = self.reader.read(path)
|
||||
lkml_file = LkmlFile.parse_obj(lkml.load(file))
|
||||
self.parsed_files[path] = file
|
||||
|
||||
# Cache everything
|
||||
self._visited_files[path] = lkml_file.includes
|
||||
for view in lkml_file.views:
|
||||
view.source_file = path
|
||||
self._views_cache[view.name] = view
|
||||
|
||||
return lkml_file.includes
|
||||
return self._process_file(path)
|
||||
|
||||
except ReadException as err:
|
||||
logger.debug(traceback.format_exc())
|
||||
@ -104,6 +110,71 @@ class LkmlParser:
|
||||
|
||||
return None
|
||||
|
||||
def _process_file(self, path: Includes) -> Optional[List[Includes]]:
|
||||
"""
|
||||
Processing of a single path
|
||||
"""
|
||||
file = self._read_file(path)
|
||||
lkml_file = LkmlFile.parse_obj(lkml.load(file))
|
||||
self.parsed_files[path] = file
|
||||
|
||||
# Cache everything
|
||||
expanded_includes = self._expand_includes(lkml_file.includes)
|
||||
self._visited_files[path] = expanded_includes
|
||||
for view in lkml_file.views:
|
||||
view.source_file = path
|
||||
self._views_cache[view.name] = view
|
||||
|
||||
return expanded_includes
|
||||
|
||||
def _expand_includes(
|
||||
self, includes: Optional[List[Includes]]
|
||||
) -> Optional[List[Includes]]:
|
||||
"""
|
||||
If we have * in includes, expand them based on the file tree
|
||||
"""
|
||||
if not includes:
|
||||
return includes
|
||||
|
||||
return [expanded for path in includes for expanded in self._expand(path)]
|
||||
|
||||
def _expand(self, path: Includes) -> List[Includes]:
|
||||
"""
|
||||
Match files in tree if there's any * in the include
|
||||
"""
|
||||
suffixes = Path(path).suffixes
|
||||
if "*" in path:
|
||||
if set(suffixes).intersection(set(EXTENSIONS)):
|
||||
return fnmatch.filter(self.file_tree, path)
|
||||
for suffix in EXTENSIONS:
|
||||
res = fnmatch.filter(self.file_tree, Includes(str(path) + suffix))
|
||||
if res:
|
||||
return res
|
||||
# Nothing matched, we cannot find the file
|
||||
logger.warning(f"We could not match any file from the include {path}")
|
||||
return []
|
||||
|
||||
return [path]
|
||||
|
||||
def _read_file(self, path: Includes) -> str:
|
||||
"""
|
||||
Read the LookML file
|
||||
"""
|
||||
suffixes = Path(path).suffixes
|
||||
|
||||
# Check if any suffix is in our extension list
|
||||
if not set(suffixes).intersection(set(EXTENSIONS)):
|
||||
for suffix in EXTENSIONS:
|
||||
try:
|
||||
return self.reader.read(path + suffix)
|
||||
except ReadException as err:
|
||||
logger.debug(f"Error trying to read the file [{path}]: {err}")
|
||||
|
||||
else:
|
||||
return self.reader.read(path)
|
||||
|
||||
raise ReadException(f"Error trying to read the file [{path}]")
|
||||
|
||||
def get_view_from_cache(self, view_name: ViewName) -> Optional[LookMlView]:
|
||||
"""
|
||||
Check if view is cached, and return it.
|
||||
|
||||
@ -11,7 +11,13 @@
|
||||
"""
|
||||
Base local reader
|
||||
"""
|
||||
import traceback
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import List, Optional
|
||||
|
||||
from metadata.utils.logger import ingestion_logger
|
||||
|
||||
logger = ingestion_logger()
|
||||
|
||||
|
||||
class ReadException(Exception):
|
||||
@ -21,9 +27,31 @@ class ReadException(Exception):
|
||||
|
||||
|
||||
class Reader(ABC):
|
||||
"""
|
||||
Abstract class for all readers
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def read(self, path: str) -> str:
|
||||
"""
|
||||
Given a string, return a string
|
||||
"""
|
||||
raise NotImplementedError("Missing read implementation")
|
||||
|
||||
@abstractmethod
|
||||
def _get_tree(self) -> List[str]:
|
||||
"""
|
||||
Return the filenames of the root
|
||||
"""
|
||||
raise NotImplementedError("Missing get_tree implementation")
|
||||
|
||||
def get_tree(self) -> Optional[List[str]]:
|
||||
"""
|
||||
If something happens, return None
|
||||
"""
|
||||
try:
|
||||
return self._get_tree()
|
||||
except Exception as err:
|
||||
logger.debug(traceback.format_exc())
|
||||
logger.error(f"Error getting file tree [{err}]")
|
||||
return None
|
||||
|
||||
@ -13,6 +13,7 @@ GitHub client to read files with token auth
|
||||
"""
|
||||
import traceback
|
||||
from enum import Enum
|
||||
from typing import List
|
||||
|
||||
import requests
|
||||
|
||||
@ -27,6 +28,7 @@ logger = ingestion_logger()
|
||||
|
||||
|
||||
HOST = "https://api.bitbucket.org/2.0"
|
||||
PAGE_LENGTH = 100
|
||||
|
||||
|
||||
class UrlParts(Enum):
|
||||
@ -74,3 +76,48 @@ class BitBucketReader(ApiReader):
|
||||
raise ReadException(f"Error fetching file [{path}] from repo: {err}")
|
||||
|
||||
raise ReadException(f"Could not fetch file [{path}] from repo")
|
||||
|
||||
def _get_files_from_dir(self, url: str) -> List[str]:
|
||||
"""
|
||||
Run the request and return the page results
|
||||
"""
|
||||
res = requests.get(
|
||||
url=url + "/?fields=values.path",
|
||||
headers=self.auth_headers,
|
||||
timeout=30,
|
||||
)
|
||||
|
||||
if res.status_code == 200:
|
||||
files = []
|
||||
json_res = res.json()
|
||||
for file in json_res.get("values") or []:
|
||||
path = file.get("path")
|
||||
new_url = url + "/" + path.split("/")[-1]
|
||||
|
||||
# If we have a file, append. Otherwise, call again
|
||||
if "." in path:
|
||||
files.append(path)
|
||||
else:
|
||||
files.extend(self._get_files_from_dir(new_url))
|
||||
|
||||
return files
|
||||
|
||||
# If we don't get a 200, raise
|
||||
res.raise_for_status()
|
||||
raise RuntimeError("Could not fetch the tree")
|
||||
|
||||
def _get_tree(self) -> List[str]:
|
||||
"""
|
||||
Paginate over the results
|
||||
"""
|
||||
|
||||
url = self._build_url(
|
||||
HOST,
|
||||
UrlParts.REPOS.value,
|
||||
self.credentials.repositoryOwner.__root__,
|
||||
self.credentials.repositoryName.__root__,
|
||||
UrlParts.SRC.value,
|
||||
self.credentials.branch,
|
||||
)
|
||||
|
||||
return self._get_files_from_dir(url)
|
||||
|
||||
@ -14,7 +14,7 @@ GitHub client to read files with token auth
|
||||
import base64
|
||||
import traceback
|
||||
from enum import Enum
|
||||
from typing import Any, Dict
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import requests
|
||||
|
||||
@ -85,3 +85,53 @@ class GitHubReader(ApiReader):
|
||||
raise ReadException(f"Error fetching file [{path}] from repo: {err}")
|
||||
|
||||
raise ReadException(f"Could not fetch file [{path}] from repo")
|
||||
|
||||
def _get_default_branch(self) -> str:
|
||||
"""
|
||||
Get repo default branch
|
||||
"""
|
||||
res = requests.get(
|
||||
self._build_url(
|
||||
HOST,
|
||||
UrlParts.REPOS.value,
|
||||
self.credentials.repositoryOwner.__root__,
|
||||
self.credentials.repositoryName.__root__,
|
||||
),
|
||||
headers=self.auth_headers,
|
||||
timeout=30,
|
||||
)
|
||||
if res.status_code == 200:
|
||||
return res.json().get("default_branch")
|
||||
|
||||
# If we don't get a 200, raise
|
||||
res.raise_for_status()
|
||||
raise RuntimeError("Could not fetch the default branch")
|
||||
|
||||
def _get_tree(self) -> Optional[List[str]]:
|
||||
"""
|
||||
Use the GitHub Tree API
|
||||
"""
|
||||
# First, get the default branch
|
||||
branch = self._get_default_branch()
|
||||
if branch:
|
||||
res = requests.get(
|
||||
self._build_url(
|
||||
HOST,
|
||||
UrlParts.REPOS.value,
|
||||
self.credentials.repositoryOwner.__root__,
|
||||
self.credentials.repositoryName.__root__,
|
||||
"git",
|
||||
"trees",
|
||||
f"{branch}?recursive=1",
|
||||
),
|
||||
headers=self.auth_headers,
|
||||
timeout=30,
|
||||
)
|
||||
if res.status_code == 200:
|
||||
return [elem.get("path") for elem in res.json().get("tree")]
|
||||
|
||||
# If we don't get a 200, raise
|
||||
res.raise_for_status()
|
||||
|
||||
# If we don't find a branch, return None
|
||||
return None
|
||||
|
||||
@ -13,6 +13,7 @@ Local Reader
|
||||
"""
|
||||
import traceback
|
||||
from pathlib import Path
|
||||
from typing import List, Optional
|
||||
|
||||
from metadata.readers.base import Reader, ReadException
|
||||
from metadata.utils.constants import UTF_8
|
||||
@ -40,3 +41,12 @@ class LocalReader(Reader):
|
||||
except Exception as err:
|
||||
logger.debug(traceback.format_exc())
|
||||
raise ReadException(f"Error reading file [{path}] locally: {err}")
|
||||
|
||||
def _get_tree(self) -> Optional[List[str]]:
|
||||
"""
|
||||
Return the tree with the files relative to the base path
|
||||
"""
|
||||
return [
|
||||
str(path).replace(str(self.base_path) + "/", "")
|
||||
for path in Path(self.base_path).rglob("*")
|
||||
]
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
include: "views/cats.view.lkml"
|
||||
include: "*/cats.view"
|
||||
include: "views/dogs.view.lkml"
|
||||
|
||||
|
||||
|
||||
0
ingestion/tests/unit/topology/dashboard/__init__.py
Normal file
0
ingestion/tests/unit/topology/dashboard/__init__.py
Normal file
@ -154,3 +154,15 @@ class TestLkmlParser(TestCase):
|
||||
self.assertEqual(
|
||||
get_path_from_link(link_no_files), "hello/explores/my_explore.explore.lkml"
|
||||
)
|
||||
|
||||
def test_expand(self):
|
||||
"""
|
||||
We can expand a single Path. We are looking for "*/cats.view", which will
|
||||
match a file in the resources directory "cats.view.lkml"
|
||||
"""
|
||||
path = Includes("*/cats.view")
|
||||
|
||||
reader = LocalReader(BASE_PATH)
|
||||
parser = LkmlParser(reader)
|
||||
|
||||
self.assertIn("cats.view.lkml", parser._expand(path)[0])
|
||||
|
||||
@ -0,0 +1,60 @@
|
||||
# Copyright 2021 Collate
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""
|
||||
Test GitHub Reader
|
||||
"""
|
||||
from unittest import TestCase
|
||||
|
||||
from metadata.generated.schema.security.credentials.bitbucketCredentials import (
|
||||
BitBucketCredentials,
|
||||
)
|
||||
from metadata.ingestion.source.dashboard.looker.models import Includes, ViewName
|
||||
from metadata.ingestion.source.dashboard.looker.parser import LkmlParser
|
||||
from metadata.readers.bitbucket import BitBucketReader
|
||||
|
||||
|
||||
class TestLookMLBitBucketReader(TestCase):
|
||||
"""
|
||||
Validate the github reader against the OM repo
|
||||
"""
|
||||
|
||||
creds = BitBucketCredentials(
|
||||
repositoryName="api",
|
||||
repositoryOwner="pmbrull-trial-api",
|
||||
branch="main",
|
||||
)
|
||||
|
||||
reader = BitBucketReader(creds)
|
||||
parser = LkmlParser(reader)
|
||||
|
||||
def test_lookml_read_and_parse(self):
|
||||
"""
|
||||
We can parse the explore file.
|
||||
|
||||
We'll expand and find views from https://bitbucket.org/pmbrull-trial-api/api/src/main
|
||||
"""
|
||||
|
||||
explore_file = "cats.explore.lkml"
|
||||
self.parser.parse_file(Includes(explore_file))
|
||||
|
||||
contents = self.parser.parsed_files.get(Includes(explore_file))
|
||||
|
||||
# Check file contents
|
||||
self.assertIn("explore: cats", contents)
|
||||
|
||||
view = self.parser.find_view(
|
||||
view_name=ViewName("cats"), path=Includes(explore_file)
|
||||
)
|
||||
|
||||
# We can get views that are resolved even if the include does not contain `.lkml`
|
||||
self.assertIsNotNone(view)
|
||||
self.assertEqual(view.name, "cats")
|
||||
@ -0,0 +1,59 @@
|
||||
# Copyright 2021 Collate
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""
|
||||
Test GitHub Reader
|
||||
"""
|
||||
from unittest import TestCase
|
||||
|
||||
from metadata.generated.schema.security.credentials.githubCredentials import (
|
||||
GitHubCredentials,
|
||||
)
|
||||
from metadata.ingestion.source.dashboard.looker.models import Includes, ViewName
|
||||
from metadata.ingestion.source.dashboard.looker.parser import LkmlParser
|
||||
from metadata.readers.github import GitHubReader
|
||||
|
||||
|
||||
class TestLookMLGitHubReader(TestCase):
|
||||
"""
|
||||
Validate the github reader against the OM repo
|
||||
"""
|
||||
|
||||
creds = GitHubCredentials(
|
||||
repositoryName="lookml-sample",
|
||||
repositoryOwner="open-metadata",
|
||||
)
|
||||
|
||||
reader = GitHubReader(creds)
|
||||
parser = LkmlParser(reader)
|
||||
|
||||
def test_lookml_read_and_parse(self):
|
||||
"""
|
||||
We can parse the explore file.
|
||||
|
||||
We'll expand and find views from https://github.com/open-metadata/lookml-sample/blob/main/cats.explore.lkml
|
||||
"""
|
||||
|
||||
explore_file = "cats.explore.lkml"
|
||||
self.parser.parse_file(Includes(explore_file))
|
||||
|
||||
contents = self.parser.parsed_files.get(Includes(explore_file))
|
||||
|
||||
# Check file contents
|
||||
self.assertIn("explore: cats", contents)
|
||||
|
||||
view = self.parser.find_view(
|
||||
view_name=ViewName("cats"), path=Includes(explore_file)
|
||||
)
|
||||
|
||||
# We can get views that are resolved even if the include does not contain `.lkml`
|
||||
self.assertIsNotNone(view)
|
||||
self.assertEqual(view.name, "cats")
|
||||
Loading…
x
Reference in New Issue
Block a user