mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-11-10 15:59:57 +00:00
Looker - Fix file extension and blob import (#12232)
* Fix file extension and blob import * Fix file extension and blob import
This commit is contained in:
parent
4aab0fd6fb
commit
1ecf5607c7
@ -11,7 +11,9 @@
|
|||||||
"""
|
"""
|
||||||
.lkml files parser
|
.lkml files parser
|
||||||
"""
|
"""
|
||||||
|
import fnmatch
|
||||||
import traceback
|
import traceback
|
||||||
|
from pathlib import Path
|
||||||
from typing import Dict, List, Optional
|
from typing import Dict, List, Optional
|
||||||
|
|
||||||
import lkml
|
import lkml
|
||||||
@ -28,6 +30,8 @@ from metadata.utils.logger import ingestion_logger
|
|||||||
|
|
||||||
logger = ingestion_logger()
|
logger = ingestion_logger()
|
||||||
|
|
||||||
|
EXTENSIONS = (".lkml", ".lookml")
|
||||||
|
|
||||||
|
|
||||||
class LkmlParser:
|
class LkmlParser:
|
||||||
"""
|
"""
|
||||||
@ -61,6 +65,18 @@ class LkmlParser:
|
|||||||
|
|
||||||
self.reader = reader
|
self.reader = reader
|
||||||
|
|
||||||
|
self._file_tree: Optional[List[Includes]] = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def file_tree(self) -> List[Includes]:
|
||||||
|
"""
|
||||||
|
Parse the file tree of the repo
|
||||||
|
"""
|
||||||
|
if not self._file_tree:
|
||||||
|
self._file_tree = self.reader.get_tree()
|
||||||
|
|
||||||
|
return self._file_tree or []
|
||||||
|
|
||||||
def parse_file(self, path: Includes) -> Optional[List[Includes]]:
|
def parse_file(self, path: Includes) -> Optional[List[Includes]]:
|
||||||
"""
|
"""
|
||||||
Internal parser. Parse the file and cache the views
|
Internal parser. Parse the file and cache the views
|
||||||
@ -79,17 +95,7 @@ class LkmlParser:
|
|||||||
return []
|
return []
|
||||||
|
|
||||||
try:
|
try:
|
||||||
file = self.reader.read(path)
|
return self._process_file(path)
|
||||||
lkml_file = LkmlFile.parse_obj(lkml.load(file))
|
|
||||||
self.parsed_files[path] = file
|
|
||||||
|
|
||||||
# Cache everything
|
|
||||||
self._visited_files[path] = lkml_file.includes
|
|
||||||
for view in lkml_file.views:
|
|
||||||
view.source_file = path
|
|
||||||
self._views_cache[view.name] = view
|
|
||||||
|
|
||||||
return lkml_file.includes
|
|
||||||
|
|
||||||
except ReadException as err:
|
except ReadException as err:
|
||||||
logger.debug(traceback.format_exc())
|
logger.debug(traceback.format_exc())
|
||||||
@ -104,6 +110,71 @@ class LkmlParser:
|
|||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def _process_file(self, path: Includes) -> Optional[List[Includes]]:
|
||||||
|
"""
|
||||||
|
Processing of a single path
|
||||||
|
"""
|
||||||
|
file = self._read_file(path)
|
||||||
|
lkml_file = LkmlFile.parse_obj(lkml.load(file))
|
||||||
|
self.parsed_files[path] = file
|
||||||
|
|
||||||
|
# Cache everything
|
||||||
|
expanded_includes = self._expand_includes(lkml_file.includes)
|
||||||
|
self._visited_files[path] = expanded_includes
|
||||||
|
for view in lkml_file.views:
|
||||||
|
view.source_file = path
|
||||||
|
self._views_cache[view.name] = view
|
||||||
|
|
||||||
|
return expanded_includes
|
||||||
|
|
||||||
|
def _expand_includes(
|
||||||
|
self, includes: Optional[List[Includes]]
|
||||||
|
) -> Optional[List[Includes]]:
|
||||||
|
"""
|
||||||
|
If we have * in includes, expand them based on the file tree
|
||||||
|
"""
|
||||||
|
if not includes:
|
||||||
|
return includes
|
||||||
|
|
||||||
|
return [expanded for path in includes for expanded in self._expand(path)]
|
||||||
|
|
||||||
|
def _expand(self, path: Includes) -> List[Includes]:
|
||||||
|
"""
|
||||||
|
Match files in tree if there's any * in the include
|
||||||
|
"""
|
||||||
|
suffixes = Path(path).suffixes
|
||||||
|
if "*" in path:
|
||||||
|
if set(suffixes).intersection(set(EXTENSIONS)):
|
||||||
|
return fnmatch.filter(self.file_tree, path)
|
||||||
|
for suffix in EXTENSIONS:
|
||||||
|
res = fnmatch.filter(self.file_tree, Includes(str(path) + suffix))
|
||||||
|
if res:
|
||||||
|
return res
|
||||||
|
# Nothing matched, we cannot find the file
|
||||||
|
logger.warning(f"We could not match any file from the include {path}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
return [path]
|
||||||
|
|
||||||
|
def _read_file(self, path: Includes) -> str:
|
||||||
|
"""
|
||||||
|
Read the LookML file
|
||||||
|
"""
|
||||||
|
suffixes = Path(path).suffixes
|
||||||
|
|
||||||
|
# Check if any suffix is in our extension list
|
||||||
|
if not set(suffixes).intersection(set(EXTENSIONS)):
|
||||||
|
for suffix in EXTENSIONS:
|
||||||
|
try:
|
||||||
|
return self.reader.read(path + suffix)
|
||||||
|
except ReadException as err:
|
||||||
|
logger.debug(f"Error trying to read the file [{path}]: {err}")
|
||||||
|
|
||||||
|
else:
|
||||||
|
return self.reader.read(path)
|
||||||
|
|
||||||
|
raise ReadException(f"Error trying to read the file [{path}]")
|
||||||
|
|
||||||
def get_view_from_cache(self, view_name: ViewName) -> Optional[LookMlView]:
|
def get_view_from_cache(self, view_name: ViewName) -> Optional[LookMlView]:
|
||||||
"""
|
"""
|
||||||
Check if view is cached, and return it.
|
Check if view is cached, and return it.
|
||||||
|
|||||||
@ -11,7 +11,13 @@
|
|||||||
"""
|
"""
|
||||||
Base local reader
|
Base local reader
|
||||||
"""
|
"""
|
||||||
|
import traceback
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
from metadata.utils.logger import ingestion_logger
|
||||||
|
|
||||||
|
logger = ingestion_logger()
|
||||||
|
|
||||||
|
|
||||||
class ReadException(Exception):
|
class ReadException(Exception):
|
||||||
@ -21,9 +27,31 @@ class ReadException(Exception):
|
|||||||
|
|
||||||
|
|
||||||
class Reader(ABC):
|
class Reader(ABC):
|
||||||
|
"""
|
||||||
|
Abstract class for all readers
|
||||||
|
"""
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def read(self, path: str) -> str:
|
def read(self, path: str) -> str:
|
||||||
"""
|
"""
|
||||||
Given a string, return a string
|
Given a string, return a string
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError("Missing read implementation")
|
raise NotImplementedError("Missing read implementation")
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def _get_tree(self) -> List[str]:
|
||||||
|
"""
|
||||||
|
Return the filenames of the root
|
||||||
|
"""
|
||||||
|
raise NotImplementedError("Missing get_tree implementation")
|
||||||
|
|
||||||
|
def get_tree(self) -> Optional[List[str]]:
|
||||||
|
"""
|
||||||
|
If something happens, return None
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
return self._get_tree()
|
||||||
|
except Exception as err:
|
||||||
|
logger.debug(traceback.format_exc())
|
||||||
|
logger.error(f"Error getting file tree [{err}]")
|
||||||
|
return None
|
||||||
|
|||||||
@ -13,6 +13,7 @@ GitHub client to read files with token auth
|
|||||||
"""
|
"""
|
||||||
import traceback
|
import traceback
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
from typing import List
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
@ -27,6 +28,7 @@ logger = ingestion_logger()
|
|||||||
|
|
||||||
|
|
||||||
HOST = "https://api.bitbucket.org/2.0"
|
HOST = "https://api.bitbucket.org/2.0"
|
||||||
|
PAGE_LENGTH = 100
|
||||||
|
|
||||||
|
|
||||||
class UrlParts(Enum):
|
class UrlParts(Enum):
|
||||||
@ -74,3 +76,48 @@ class BitBucketReader(ApiReader):
|
|||||||
raise ReadException(f"Error fetching file [{path}] from repo: {err}")
|
raise ReadException(f"Error fetching file [{path}] from repo: {err}")
|
||||||
|
|
||||||
raise ReadException(f"Could not fetch file [{path}] from repo")
|
raise ReadException(f"Could not fetch file [{path}] from repo")
|
||||||
|
|
||||||
|
def _get_files_from_dir(self, url: str) -> List[str]:
|
||||||
|
"""
|
||||||
|
Run the request and return the page results
|
||||||
|
"""
|
||||||
|
res = requests.get(
|
||||||
|
url=url + "/?fields=values.path",
|
||||||
|
headers=self.auth_headers,
|
||||||
|
timeout=30,
|
||||||
|
)
|
||||||
|
|
||||||
|
if res.status_code == 200:
|
||||||
|
files = []
|
||||||
|
json_res = res.json()
|
||||||
|
for file in json_res.get("values") or []:
|
||||||
|
path = file.get("path")
|
||||||
|
new_url = url + "/" + path.split("/")[-1]
|
||||||
|
|
||||||
|
# If we have a file, append. Otherwise, call again
|
||||||
|
if "." in path:
|
||||||
|
files.append(path)
|
||||||
|
else:
|
||||||
|
files.extend(self._get_files_from_dir(new_url))
|
||||||
|
|
||||||
|
return files
|
||||||
|
|
||||||
|
# If we don't get a 200, raise
|
||||||
|
res.raise_for_status()
|
||||||
|
raise RuntimeError("Could not fetch the tree")
|
||||||
|
|
||||||
|
def _get_tree(self) -> List[str]:
|
||||||
|
"""
|
||||||
|
Paginate over the results
|
||||||
|
"""
|
||||||
|
|
||||||
|
url = self._build_url(
|
||||||
|
HOST,
|
||||||
|
UrlParts.REPOS.value,
|
||||||
|
self.credentials.repositoryOwner.__root__,
|
||||||
|
self.credentials.repositoryName.__root__,
|
||||||
|
UrlParts.SRC.value,
|
||||||
|
self.credentials.branch,
|
||||||
|
)
|
||||||
|
|
||||||
|
return self._get_files_from_dir(url)
|
||||||
|
|||||||
@ -14,7 +14,7 @@ GitHub client to read files with token auth
|
|||||||
import base64
|
import base64
|
||||||
import traceback
|
import traceback
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import Any, Dict
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
@ -85,3 +85,53 @@ class GitHubReader(ApiReader):
|
|||||||
raise ReadException(f"Error fetching file [{path}] from repo: {err}")
|
raise ReadException(f"Error fetching file [{path}] from repo: {err}")
|
||||||
|
|
||||||
raise ReadException(f"Could not fetch file [{path}] from repo")
|
raise ReadException(f"Could not fetch file [{path}] from repo")
|
||||||
|
|
||||||
|
def _get_default_branch(self) -> str:
|
||||||
|
"""
|
||||||
|
Get repo default branch
|
||||||
|
"""
|
||||||
|
res = requests.get(
|
||||||
|
self._build_url(
|
||||||
|
HOST,
|
||||||
|
UrlParts.REPOS.value,
|
||||||
|
self.credentials.repositoryOwner.__root__,
|
||||||
|
self.credentials.repositoryName.__root__,
|
||||||
|
),
|
||||||
|
headers=self.auth_headers,
|
||||||
|
timeout=30,
|
||||||
|
)
|
||||||
|
if res.status_code == 200:
|
||||||
|
return res.json().get("default_branch")
|
||||||
|
|
||||||
|
# If we don't get a 200, raise
|
||||||
|
res.raise_for_status()
|
||||||
|
raise RuntimeError("Could not fetch the default branch")
|
||||||
|
|
||||||
|
def _get_tree(self) -> Optional[List[str]]:
|
||||||
|
"""
|
||||||
|
Use the GitHub Tree API
|
||||||
|
"""
|
||||||
|
# First, get the default branch
|
||||||
|
branch = self._get_default_branch()
|
||||||
|
if branch:
|
||||||
|
res = requests.get(
|
||||||
|
self._build_url(
|
||||||
|
HOST,
|
||||||
|
UrlParts.REPOS.value,
|
||||||
|
self.credentials.repositoryOwner.__root__,
|
||||||
|
self.credentials.repositoryName.__root__,
|
||||||
|
"git",
|
||||||
|
"trees",
|
||||||
|
f"{branch}?recursive=1",
|
||||||
|
),
|
||||||
|
headers=self.auth_headers,
|
||||||
|
timeout=30,
|
||||||
|
)
|
||||||
|
if res.status_code == 200:
|
||||||
|
return [elem.get("path") for elem in res.json().get("tree")]
|
||||||
|
|
||||||
|
# If we don't get a 200, raise
|
||||||
|
res.raise_for_status()
|
||||||
|
|
||||||
|
# If we don't find a branch, return None
|
||||||
|
return None
|
||||||
|
|||||||
@ -13,6 +13,7 @@ Local Reader
|
|||||||
"""
|
"""
|
||||||
import traceback
|
import traceback
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
from metadata.readers.base import Reader, ReadException
|
from metadata.readers.base import Reader, ReadException
|
||||||
from metadata.utils.constants import UTF_8
|
from metadata.utils.constants import UTF_8
|
||||||
@ -40,3 +41,12 @@ class LocalReader(Reader):
|
|||||||
except Exception as err:
|
except Exception as err:
|
||||||
logger.debug(traceback.format_exc())
|
logger.debug(traceback.format_exc())
|
||||||
raise ReadException(f"Error reading file [{path}] locally: {err}")
|
raise ReadException(f"Error reading file [{path}] locally: {err}")
|
||||||
|
|
||||||
|
def _get_tree(self) -> Optional[List[str]]:
|
||||||
|
"""
|
||||||
|
Return the tree with the files relative to the base path
|
||||||
|
"""
|
||||||
|
return [
|
||||||
|
str(path).replace(str(self.base_path) + "/", "")
|
||||||
|
for path in Path(self.base_path).rglob("*")
|
||||||
|
]
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
include: "views/cats.view.lkml"
|
include: "*/cats.view"
|
||||||
include: "views/dogs.view.lkml"
|
include: "views/dogs.view.lkml"
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
0
ingestion/tests/unit/topology/dashboard/__init__.py
Normal file
0
ingestion/tests/unit/topology/dashboard/__init__.py
Normal file
@ -154,3 +154,15 @@ class TestLkmlParser(TestCase):
|
|||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
get_path_from_link(link_no_files), "hello/explores/my_explore.explore.lkml"
|
get_path_from_link(link_no_files), "hello/explores/my_explore.explore.lkml"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def test_expand(self):
|
||||||
|
"""
|
||||||
|
We can expand a single Path. We are looking for "*/cats.view", which will
|
||||||
|
match a file in the resources directory "cats.view.lkml"
|
||||||
|
"""
|
||||||
|
path = Includes("*/cats.view")
|
||||||
|
|
||||||
|
reader = LocalReader(BASE_PATH)
|
||||||
|
parser = LkmlParser(reader)
|
||||||
|
|
||||||
|
self.assertIn("cats.view.lkml", parser._expand(path)[0])
|
||||||
|
|||||||
@ -0,0 +1,60 @@
|
|||||||
|
# Copyright 2021 Collate
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
"""
|
||||||
|
Test GitHub Reader
|
||||||
|
"""
|
||||||
|
from unittest import TestCase
|
||||||
|
|
||||||
|
from metadata.generated.schema.security.credentials.bitbucketCredentials import (
|
||||||
|
BitBucketCredentials,
|
||||||
|
)
|
||||||
|
from metadata.ingestion.source.dashboard.looker.models import Includes, ViewName
|
||||||
|
from metadata.ingestion.source.dashboard.looker.parser import LkmlParser
|
||||||
|
from metadata.readers.bitbucket import BitBucketReader
|
||||||
|
|
||||||
|
|
||||||
|
class TestLookMLBitBucketReader(TestCase):
|
||||||
|
"""
|
||||||
|
Validate the github reader against the OM repo
|
||||||
|
"""
|
||||||
|
|
||||||
|
creds = BitBucketCredentials(
|
||||||
|
repositoryName="api",
|
||||||
|
repositoryOwner="pmbrull-trial-api",
|
||||||
|
branch="main",
|
||||||
|
)
|
||||||
|
|
||||||
|
reader = BitBucketReader(creds)
|
||||||
|
parser = LkmlParser(reader)
|
||||||
|
|
||||||
|
def test_lookml_read_and_parse(self):
|
||||||
|
"""
|
||||||
|
We can parse the explore file.
|
||||||
|
|
||||||
|
We'll expand and find views from https://bitbucket.org/pmbrull-trial-api/api/src/main
|
||||||
|
"""
|
||||||
|
|
||||||
|
explore_file = "cats.explore.lkml"
|
||||||
|
self.parser.parse_file(Includes(explore_file))
|
||||||
|
|
||||||
|
contents = self.parser.parsed_files.get(Includes(explore_file))
|
||||||
|
|
||||||
|
# Check file contents
|
||||||
|
self.assertIn("explore: cats", contents)
|
||||||
|
|
||||||
|
view = self.parser.find_view(
|
||||||
|
view_name=ViewName("cats"), path=Includes(explore_file)
|
||||||
|
)
|
||||||
|
|
||||||
|
# We can get views that are resolved even if the include does not contain `.lkml`
|
||||||
|
self.assertIsNotNone(view)
|
||||||
|
self.assertEqual(view.name, "cats")
|
||||||
@ -0,0 +1,59 @@
|
|||||||
|
# Copyright 2021 Collate
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
"""
|
||||||
|
Test GitHub Reader
|
||||||
|
"""
|
||||||
|
from unittest import TestCase
|
||||||
|
|
||||||
|
from metadata.generated.schema.security.credentials.githubCredentials import (
|
||||||
|
GitHubCredentials,
|
||||||
|
)
|
||||||
|
from metadata.ingestion.source.dashboard.looker.models import Includes, ViewName
|
||||||
|
from metadata.ingestion.source.dashboard.looker.parser import LkmlParser
|
||||||
|
from metadata.readers.github import GitHubReader
|
||||||
|
|
||||||
|
|
||||||
|
class TestLookMLGitHubReader(TestCase):
|
||||||
|
"""
|
||||||
|
Validate the github reader against the OM repo
|
||||||
|
"""
|
||||||
|
|
||||||
|
creds = GitHubCredentials(
|
||||||
|
repositoryName="lookml-sample",
|
||||||
|
repositoryOwner="open-metadata",
|
||||||
|
)
|
||||||
|
|
||||||
|
reader = GitHubReader(creds)
|
||||||
|
parser = LkmlParser(reader)
|
||||||
|
|
||||||
|
def test_lookml_read_and_parse(self):
|
||||||
|
"""
|
||||||
|
We can parse the explore file.
|
||||||
|
|
||||||
|
We'll expand and find views from https://github.com/open-metadata/lookml-sample/blob/main/cats.explore.lkml
|
||||||
|
"""
|
||||||
|
|
||||||
|
explore_file = "cats.explore.lkml"
|
||||||
|
self.parser.parse_file(Includes(explore_file))
|
||||||
|
|
||||||
|
contents = self.parser.parsed_files.get(Includes(explore_file))
|
||||||
|
|
||||||
|
# Check file contents
|
||||||
|
self.assertIn("explore: cats", contents)
|
||||||
|
|
||||||
|
view = self.parser.find_view(
|
||||||
|
view_name=ViewName("cats"), path=Includes(explore_file)
|
||||||
|
)
|
||||||
|
|
||||||
|
# We can get views that are resolved even if the include does not contain `.lkml`
|
||||||
|
self.assertIsNotNone(view)
|
||||||
|
self.assertEqual(view.name, "cats")
|
||||||
Loading…
x
Reference in New Issue
Block a user