Fix Looker explore git link & Add BitBucket reader (#11837)

* Add looker test connection step

* Add looker test connection step

* Update Credentials

* Fix explore link and add bitbucket reader

* Format

* Fix test

* Fix spline linting

* Fix import
This commit is contained in:
Pere Miquel Brull 2023-06-02 07:19:32 +02:00 committed by GitHub
parent dae4b64326
commit fdeea71671
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
27 changed files with 739 additions and 92 deletions

View File

@ -2,3 +2,13 @@
UPDATE metadata_service_entity
SET json = JSON_REMOVE(json, '$.openMetadataServerConnection.secretsManagerCredentials')
where name = 'OpenMetadata';
-- Rename githubCredentials to gitCredentials
UPDATE dashboard_service_entity
SET json = JSON_INSERT(
JSON_REMOVE(json, '$.connection.config.githubCredentials'),
'$.connection.config.gitCredentials',
JSON_EXTRACT(json, '$.connection.config.githubCredentials')
)
WHERE serviceType = 'Looker'
AND JSON_EXTRACT(json, '$.connection.config.githubCredentials') IS NOT NULL;

View File

@ -2,3 +2,9 @@
UPDATE metadata_service_entity
SET json = json::jsonb #- '{openMetadataServerConnection.secretsManagerCredentials}'
where name = 'OpenMetadata';
-- Rename githubCredentials to gitCredentials
UPDATE dashboard_service_entity
SET json = jsonb_set(json, '{connection,config,gitCredentials}', json#>'{connection,config,githubCredentials}')
where serviceType = 'Looker'
and json#>'{connection,config,githubCredentials}' is not null;

View File

@ -61,8 +61,17 @@ def test_connection(
"""
assert client.all_lookml_models(limit=1)
def validate_api_version():
"""
Make sure we get a True
"""
assert "4.0" in (
api_version.version for api_version in client.versions().supported_versions
)
test_fn = {
"CheckAccess": client.me,
"ValidateVersion": validate_api_version,
"ListDashboards": lambda: client.all_dashboards(fields="id,title"),
"ListLookMLModels": list_datamodels_test,
}

View File

@ -0,0 +1,30 @@
# Copyright 2021 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
LookML Link handler
"""
from urllib.parse import unquote, urlparse
def get_path_from_link(link: str) -> str:
"""
Given the `lookml_link` property from an explore,
get the source file path to fetch the file from Git.
Note that we cannot directly use the `source_file`
property since it does not give us the actual path,
only the file name.
The usual shape will be:
/projects/<projectId>/files/<path>?params
"""
parsed = urlparse(unquote(link))
return parsed.path.split("/files/")[-1]

View File

@ -21,7 +21,7 @@ Notes:
import traceback
from datetime import datetime
from typing import Iterable, List, Optional, Sequence, Set, Union, cast
from typing import Dict, Iterable, List, Optional, Sequence, Set, Type, Union, cast
from looker_sdk.sdk.api40.methods import Looker40SDK
from looker_sdk.sdk.api40.models import Dashboard as LookerDashboard
@ -31,6 +31,7 @@ from looker_sdk.sdk.api40.models import (
LookmlModel,
LookmlModelExplore,
LookmlModelNavExplore,
Project,
)
from pydantic import ValidationError
@ -61,6 +62,9 @@ from metadata.generated.schema.entity.services.dashboardService import (
from metadata.generated.schema.metadataIngestion.workflow import (
Source as WorkflowSource,
)
from metadata.generated.schema.security.credentials.bitbucketCredentials import (
BitBucketCredentials,
)
from metadata.generated.schema.security.credentials.githubCredentials import (
GitHubCredentials,
)
@ -73,12 +77,17 @@ from metadata.ingestion.source.dashboard.dashboard_service import (
DashboardUsage,
)
from metadata.ingestion.source.dashboard.looker.columns import get_columns_from_model
from metadata.ingestion.source.dashboard.looker.links import get_path_from_link
from metadata.ingestion.source.dashboard.looker.models import (
Includes,
LookMlView,
ViewName,
)
from metadata.ingestion.source.dashboard.looker.parser import LkmlParser
from metadata.readers.api_reader import ReadersCredentials
from metadata.readers.base import Reader
from metadata.readers.bitbucket import BitBucketReader
from metadata.readers.credentials import get_credentials_from_url
from metadata.readers.github import GitHubReader
from metadata.utils import fqn
from metadata.utils.filters import filter_by_chart, filter_by_datamodel
@ -118,6 +127,7 @@ def build_datamodel_name(model_name: str, explore_name: str) -> str:
return clean_dashboard_name(model_name + "_" + explore_name)
# pylint: disable=too-many-public-methods
class LookerSource(DashboardServiceSource):
"""
Looker Source Class.
@ -137,8 +147,10 @@ class LookerSource(DashboardServiceSource):
super().__init__(config, metadata_config)
self.today = datetime.now().strftime("%Y-%m-%d")
self._parser = None
self._explores_cache = {}
self._repo_credentials: Optional[ReadersCredentials] = None
self._reader_class: Optional[Type[Reader]] = None
self._project_parsers: Optional[Dict[str, LkmlParser]] = None
@classmethod
def create(
@ -153,24 +165,91 @@ class LookerSource(DashboardServiceSource):
return cls(config, metadata_config)
@property
def parser(self) -> Optional[LkmlParser]:
if not self._parser and self.github_credentials:
self._parser = LkmlParser(reader=GitHubReader(self.github_credentials))
def parser(self) -> Optional[Dict[str, LkmlParser]]:
if self.repository_credentials:
return self._project_parsers
return self._parser
return None
@parser.setter
def parser(self, all_lookml_models: Sequence[LookmlModel]) -> None:
"""
Initialize the project parsers.
Each LookML model is linked to a Looker Project. Each project can be
hosted in different GitHub repositories.
Here we will prepare the Readers for each project and the LookML parser.
We are assuming that each Git repo is based under the same owner
and can be accessed with the same token. If we have
any errors obtaining the git project information, we will default
to the incoming GitHub Credentials.
"""
if self.repository_credentials:
all_projects: Set[str] = {model.project_name for model in all_lookml_models}
self._project_parsers: Dict[str, LkmlParser] = {
project_name: LkmlParser(
reader=self.reader(
credentials=self.get_lookml_project_credentials(
project_name=project_name
)
)
)
for project_name in all_projects
}
logger.info(f"We found the following parsers:\n {self._project_parsers}")
def get_lookml_project_credentials(self, project_name: str) -> GitHubCredentials:
"""
Given a lookml project, get its git URL and build the credentials
"""
try:
project: Project = self.client.project(project_id=project_name)
return get_credentials_from_url(
original=self.repository_credentials, url=project.git_remote_url
)
except Exception as err:
logger.error(
f"Error trying to build project credentials - [{err}]. We'll use the default ones."
)
return self.repository_credentials
@property
def github_credentials(self) -> Optional[GitHubCredentials]:
def reader(self) -> Optional[Type[Reader]]:
"""
Depending on the type of the credentials we'll need a different reader
"""
if not self._reader_class:
if self.service_connection.gitCredentials and isinstance(
self.service_connection.gitCredentials, GitHubCredentials
):
self._reader_class = GitHubReader
if self.service_connection.gitCredentials and isinstance(
self.service_connection.gitCredentials, BitBucketCredentials
):
self._reader_class = BitBucketReader
return self._reader_class
@property
def repository_credentials(self) -> Optional[ReadersCredentials]:
"""
Check if the credentials are informed and return them.
We either get GitHubCredentials or `NoGitHubCredentials`
"""
if self.service_connection.githubCredentials and isinstance(
self.service_connection.githubCredentials, GitHubCredentials
):
return self.service_connection.githubCredentials
return None
if not self._repo_credentials:
if self.service_connection.gitCredentials and isinstance(
self.service_connection.gitCredentials, GitHubCredentials
):
self._repo_credentials = self.service_connection.gitCredentials
return self._repo_credentials
def list_datamodels(self) -> Iterable[LookmlModelExplore]:
"""
@ -182,6 +261,11 @@ class LookerSource(DashboardServiceSource):
all_lookml_models: Sequence[
LookmlModel
] = self.client.all_lookml_models()
# Then, gather their information and build the parser
self.parser = all_lookml_models
# Finally, iterate through them to ingest Explores and Views
yield from self.fetch_lookml_explores(all_lookml_models)
except Exception as err:
logger.debug(traceback.format_exc())
@ -255,7 +339,7 @@ class LookerSource(DashboardServiceSource):
# We can get VIEWs from the JOINs to know the dependencies
# We will only try and fetch if we have the credentials
if self.github_credentials:
if self.repository_credentials:
for view in model.joins:
if filter_by_datamodel(
self.source_config.dataModelFilterPattern, view.name
@ -290,11 +374,17 @@ class LookerSource(DashboardServiceSource):
file definition and add it here
"""
# Only look to parse if creds are in
if self.github_credentials:
if self.repository_credentials:
try:
# This will only parse if the file has not been parsed yet
self.parser.parse_file(Includes(explore.source_file))
return self.parser.parsed_files.get(Includes(explore.source_file))
project_parser = self.parser.get(explore.project_name)
if project_parser:
# This will only parse if the file has not been parsed yet
project_parser.parse_file(
Includes(get_path_from_link(explore.lookml_link))
)
return project_parser.parsed_files.get(
Includes(get_path_from_link(explore.lookml_link))
)
except Exception as err:
logger.warning(f"Exception getting the model sql: {err}")
@ -310,24 +400,29 @@ class LookerSource(DashboardServiceSource):
Every visited view, will be cached so that we don't need to process
everything again.
"""
view: Optional[LookMlView] = self.parser.find_view(
view_name=view_name, path=Includes(explore.source_file)
)
if view:
yield CreateDashboardDataModelRequest(
name=build_datamodel_name(explore.model_name, view.name),
displayName=view.name,
description=view.description,
service=self.context.dashboard_service.fullyQualifiedName.__root__,
dataModelType=DataModelType.LookMlView.value,
serviceType=DashboardServiceType.Looker.value,
columns=get_columns_from_model(view),
sql=self.parser.parsed_files.get(Includes(view.source_file)),
project_parser = self.parser.get(explore.project_name)
if project_parser:
view: Optional[LookMlView] = project_parser.find_view(
view_name=view_name,
path=Includes(get_path_from_link(explore.lookml_link)),
)
self.status.scanned(f"Data Model Scanned: {view.name}")
yield from self.add_view_lineage(view, explore)
if view:
yield CreateDashboardDataModelRequest(
name=build_datamodel_name(explore.model_name, view.name),
displayName=view.name,
description=view.description,
service=self.context.dashboard_service.fullyQualifiedName.__root__,
dataModelType=DataModelType.LookMlView.value,
serviceType=DashboardServiceType.Looker.value,
columns=get_columns_from_model(view),
sql=project_parser.parsed_files.get(Includes(view.source_file)),
)
self.status.scanned(f"Data Model Scanned: {view.name}")
yield from self.add_view_lineage(view, explore)
def add_view_lineage(
self, view: LookMlView, explore: LookmlModelExplore

View File

@ -134,3 +134,9 @@ class LkmlParser:
# We might not find the view ever
return self.get_view_from_cache(view_name)
def __repr__(self):
"""
Customize string repr for logs
"""
return f"Parser at [{self.reader.credentials.repositoryOwner}/{self.reader.credentials.repositoryName}]"

View File

@ -17,16 +17,16 @@ from typing import List
from metadata.generated.schema.entity.services.connections.pipeline.splineConnection import (
SplineConnection,
)
from metadata.ingestion.ometa.client import REST, APIError, ClientConfig
from metadata.ingestion.ometa.client import REST, ClientConfig
from metadata.ingestion.source.pipeline.spline.models import (
ExecutionDetail,
ExecutionEvents,
)
from metadata.utils.constants import AUTHORIZATION_HEADER, NO_ACCESS_TOKEN
from metadata.utils.helpers import clean_uri
from metadata.utils.logger import ingestion_logger
logger = ingestion_logger()
from metadata.utils.helpers import clean_uri
class SplineClient:
@ -34,7 +34,6 @@ class SplineClient:
Wrapper on top of Spline REST API
"""
# pylint: disable=too-many-arguments
def __init__(self, config: SplineConnection):
self.config = config
client_config: ClientConfig = ClientConfig(

View File

@ -109,9 +109,9 @@ class SplineSource(PipelineServiceSource):
def _get_table_entity(
self, database_name: str, schema_name: str, table_name: str
) -> Table:
) -> Optional[Table]:
if not table_name:
return
return None
for service_name in self.source_config.dbServiceNames:
table_fqn = fqn.build(
metadata=self.metadata,
@ -122,19 +122,21 @@ class SplineSource(PipelineServiceSource):
database_name=database_name,
)
if table_fqn:
table_entity = self.metadata.get_by_name(entity=Table, fqn=table_fqn)
table_entity: Table = self.metadata.get_by_name(
entity=Table, fqn=table_fqn
)
if table_entity:
return table_entity
return None
def _get_table_from_datasource_name(self, datasource: str):
def _get_table_from_datasource_name(self, datasource: str) -> Optional[Table]:
if (
not datasource
and not datasource.startswith("dbfs")
and not datasource.startswith("jdbc")
):
return
return None
try:
schema_name = None
@ -153,6 +155,8 @@ class SplineSource(PipelineServiceSource):
logger.debug(traceback.format_exc())
logger.warning(f"failed to parse datasource details due to: {exc}")
return None
def yield_pipeline_lineage_details(
self, pipeline_details: ExecutionEvent
) -> Optional[Iterable[AddLineageRequest]]:

View File

@ -8,7 +8,9 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Spline connector API response models
"""
from typing import List, Optional
from pydantic import BaseModel

View File

@ -8,7 +8,9 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Spline source processing utilities
"""
import traceback
from typing import Optional, Tuple
@ -36,13 +38,15 @@ def parse_dbfs_path(path: str) -> Optional[str]:
return None
def clean_name(name: str) -> str:
def clean_name(name: str) -> Optional[str]:
"""
replace empty string with None
"""
if name:
return name
return None
def parse_jdbc_url(url: str) -> Tuple[Optional[str], Optional[str], Optional[str]]:
"""
@ -52,7 +56,7 @@ def parse_jdbc_url(url: str) -> Tuple[Optional[str], Optional[str], Optional[str
lexer = JdbcUriLexer(InputStream(url))
stream = CommonTokenStream(lexer)
parser = JdbcUriParser(stream)
parser._errHandler = BailErrorStrategy() # pylint: disable=protected-acc ess
parser._errHandler = BailErrorStrategy() # pylint: disable=protected-access
tree = parser.jdbcUrl()
schema_table = tree.schemaTable()
if schema_table:

View File

@ -0,0 +1,60 @@
# Copyright 2021 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
GitHub client to read files with token auth
"""
from abc import ABC
from typing import Dict, Union
from metadata.generated.schema.security.credentials.bitbucketCredentials import (
BitBucketCredentials,
)
from metadata.generated.schema.security.credentials.githubCredentials import (
GitHubCredentials,
)
from metadata.readers.base import Reader
from metadata.utils.logger import ingestion_logger
logger = ingestion_logger()
ReadersCredentials = Union[GitHubCredentials, BitBucketCredentials]
class ApiReader(Reader, ABC):
"""
Generic API Reader
"""
def __init__(self, credentials: ReadersCredentials):
self._auth_headers = None
self.credentials = credentials
@property
def auth_headers(self) -> Dict[str, str]:
"""
Build the headers to authenticate
to the API
"""
if self._auth_headers is None and self.credentials.token:
self._auth_headers = {
"Authorization": f"Bearer {self.credentials.token.__root__.get_secret_value()}"
}
return self._auth_headers
@staticmethod
def _build_url(*parts: str):
"""
Build URL parts
"""
return "/".join(parts)

View File

@ -0,0 +1,76 @@
# Copyright 2021 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
GitHub client to read files with token auth
"""
import traceback
from enum import Enum
import requests
from metadata.generated.schema.security.credentials.bitbucketCredentials import (
BitBucketCredentials,
)
from metadata.readers.api_reader import ApiReader
from metadata.readers.base import ReadException
from metadata.utils.logger import ingestion_logger
logger = ingestion_logger()
HOST = "https://api.bitbucket.org/2.0"
class UrlParts(Enum):
REPOS = "repositories"
SRC = "src"
class BitBucketReader(ApiReader):
"""
Handle calls to the GitHub API against a repo
"""
credentials: BitBucketCredentials
def read(self, path: str) -> str:
"""
Read a file from a GitHub Repo and return its
contents as a string
https://docs.github.com/en/rest/repos/contents?apiVersion=2022-11-28#get-repository-content
This does not care if the path starts with `/` or not.
"""
try:
res = requests.get(
self._build_url(
HOST,
UrlParts.REPOS.value,
self.credentials.repositoryOwner.__root__,
self.credentials.repositoryName.__root__,
UrlParts.SRC.value,
self.credentials.branch,
path,
),
headers=self.auth_headers,
timeout=30,
)
if res.status_code == 200:
return res.text
# If we don't get a 200, raise
res.raise_for_status()
except Exception as err:
logger.debug(traceback.format_exc())
raise ReadException(f"Error fetching file [{path}] from repo: {err}")
raise ReadException(f"Could not fetch file [{path}] from repo")

View File

@ -0,0 +1,62 @@
# Copyright 2021 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Helper to manage readers' credentials functionalities
"""
from metadata.generated.schema.security.credentials.gitCredentials import RepositoryName
from metadata.readers.api_reader import ReadersCredentials
from metadata.utils.logger import ingestion_logger
logger = ingestion_logger()
def update_repository_name(
original: ReadersCredentials, name: str
) -> ReadersCredentials:
"""
Given an original set of credentials and a new repository name,
return the updated credentials
"""
updated = original.copy(deep=True)
updated.repositoryName = RepositoryName(__root__=name)
return updated
def get_credentials_from_url(
original: ReadersCredentials, url: str
) -> ReadersCredentials:
"""
Given a default set of credentials and a git URL, check if the
owner of the original credentials is part of the new URL.
If it is, return updated credentials with the new repository name.
If not, return the original credentials.
This is just a quick sanity check. Worst case scenario, we won't be able to pick
up information, which would still not happen since we work with a single
token which cannot have permissions on different owners.
"""
if original.repositoryOwner.__root__ not in url:
logger.warning(
f"Default repository owner [{original.repositoryOwner.__root__}] not found in [{url}]."
" We'll use the default reader credentials."
)
return original
# Your typical URL is git@bitbucket.org:owner/repo.git
# or git@github.com:owner/repo.git
url_repository = url.split(original.repositoryOwner.__root__ + "/")[-1]
repo_name = url_repository.replace(".git", "")
return update_repository_name(original=original, name=repo_name)

View File

@ -21,7 +21,8 @@ import requests
from metadata.generated.schema.security.credentials.githubCredentials import (
GitHubCredentials,
)
from metadata.readers.base import Reader, ReadException
from metadata.readers.api_reader import ApiReader
from metadata.readers.base import ReadException
from metadata.utils.constants import UTF_8
from metadata.utils.logger import ingestion_logger
@ -36,35 +37,12 @@ class UrlParts(Enum):
CONTENTS = "contents"
class GitHubReader(Reader):
class GitHubReader(ApiReader):
"""
Handle calls to the GitHub API against a repo
"""
def __init__(self, credentials: GitHubCredentials):
self.credentials = credentials
self._auth_headers = None
@property
def auth_headers(self) -> Dict[str, str]:
"""
Build the headers to authenticate
to the API
"""
if self._auth_headers is None:
self._auth_headers = {
"Authorization": f"Bearer {self.credentials.token.get_secret_value()}"
}
return self._auth_headers
@staticmethod
def _build_url(*parts: str):
"""
Build URL parts
"""
return "/".join(parts)
credentials: GitHubCredentials
@staticmethod
def _decode_content(json_response: Dict[str, Any]) -> str:
@ -80,14 +58,16 @@ class GitHubReader(Reader):
Read a file from a GitHub Repo and return its
contents as a string
https://docs.github.com/en/rest/repos/contents?apiVersion=2022-11-28#get-repository-content
This does not care if the path starts with `/` or not.
"""
try:
res = requests.get(
self._build_url(
HOST,
UrlParts.REPOS.value,
self.credentials.repositoryOwner,
self.credentials.repositoryName,
self.credentials.repositoryOwner.__root__,
self.credentials.repositoryName.__root__,
UrlParts.CONTENTS.value,
path,
),

View File

View File

@ -0,0 +1,120 @@
# Copyright 2021 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Test Credentials helpers
"""
from unittest import TestCase
from metadata.generated.schema.security.credentials.bitbucketCredentials import (
BitBucketCredentials,
)
from metadata.generated.schema.security.credentials.githubCredentials import (
GitHubCredentials,
)
from metadata.ingestion.models.custom_pydantic import CustomSecretStr
from metadata.readers.credentials import (
get_credentials_from_url,
update_repository_name,
)
class TestCreds(TestCase):
"""
Validate credentials scenarios
"""
def test_update_repository_name(self):
"""
Check we get new creds without updating the original
"""
original = GitHubCredentials(
repositoryOwner="owner",
repositoryName="name",
token="token",
)
updated = update_repository_name(original=original, name="new_name")
self.assertEqual(original.repositoryName.__root__, "name")
self.assertEqual(updated.repositoryName.__root__, "new_name")
self.assertEqual(
updated.repositoryOwner.__root__, original.repositoryOwner.__root__
)
self.assertEqual(updated.token.__root__, original.token.__root__)
bb_original = BitBucketCredentials(
repositoryOwner="owner",
repositoryName="name",
token="token",
branch="branch",
)
bb_updated = update_repository_name(original=bb_original, name="new_name")
self.assertEqual(bb_original.repositoryName.__root__, "name")
self.assertEqual(bb_updated.repositoryName.__root__, "new_name")
self.assertEqual(
bb_updated.repositoryOwner.__root__, bb_original.repositoryOwner.__root__
)
self.assertEqual(bb_updated.token.__root__, bb_original.token.__root__)
self.assertEqual(bb_updated.branch, bb_original.branch)
def test_get_credentials_from_url(self):
"""
With and without the right owner
"""
url = "git@github.com:owner/repo.git"
original = GitHubCredentials(
repositoryOwner="owner",
repositoryName="name",
token="token",
)
updated = get_credentials_from_url(original=original, url=url)
self.assertEqual(updated.repositoryName.__root__, "repo")
original_not_owner = GitHubCredentials(
repositoryOwner="not_owner",
repositoryName="name",
token="token",
)
updated_not_owner = get_credentials_from_url(
original=original_not_owner, url=url
)
self.assertEqual(updated_not_owner, original_not_owner)
bb_url = "git@gitbucket.org:owner/repo.git"
bb_original = BitBucketCredentials(
repositoryOwner="owner",
repositoryName="name",
token="token",
branch="branch",
)
bb_updated = get_credentials_from_url(original=bb_original, url=bb_url)
self.assertEqual(bb_updated.repositoryName.__root__, "repo")
bb_original_not_owner = BitBucketCredentials(
repositoryOwner="not_owner",
repositoryName="name",
token="token",
branch="branch",
)
bb_updated_not_owner = get_credentials_from_url(
original=bb_original_not_owner, url=bb_url
)
self.assertEqual(bb_updated_not_owner, bb_original_not_owner)

View File

@ -0,0 +1,50 @@
# Copyright 2021 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Test GitHub Reader
"""
from unittest import TestCase
from metadata.generated.schema.security.credentials.githubCredentials import (
GitHubCredentials,
)
from metadata.readers.github import GitHubReader
class TestGitHubReader(TestCase):
"""
Validate the github reader against the OM repo
"""
def test_headers(self):
"""
We build the headers correctly
"""
creds = GitHubCredentials(
repositoryName="name", repositoryOwner="owner", token="token"
)
reader = GitHubReader(creds)
self.assertEqual(reader.auth_headers, {"Authorization": "Bearer token"})
def test_read(self):
"""
We can read the OM README
"""
creds = GitHubCredentials(
repositoryName="OpenMetadata",
repositoryOwner="open-metadata",
)
reader = GitHubReader(creds)
self.assertIsNotNone(reader.read("README.md"))

View File

@ -14,6 +14,7 @@ Test the lkml parser
from pathlib import Path
from unittest import TestCase
from metadata.ingestion.source.dashboard.looker.links import get_path_from_link
from metadata.ingestion.source.dashboard.looker.parser import (
Includes,
LkmlParser,
@ -136,3 +137,20 @@ class TestLkmlParser(TestCase):
"views/cats.view.lkml": [],
},
)
def test_get_path_from_link(self):
"""
Validate utility
"""
simple_link = "/projects/my_project/files/hello.explore.lkml"
self.assertEqual(get_path_from_link(simple_link), "hello.explore.lkml")
link = "/projects/my_project/files/hello%2Fexplores%2Fmy_explore.explore.lkml?line=13"
self.assertEqual(
get_path_from_link(link), "hello/explores/my_explore.explore.lkml"
)
link_no_files = "hello%2Fexplores%2Fmy_explore.explore.lkml?line=13"
self.assertEqual(
get_path_from_link(link_no_files), "hello/explores/my_explore.explore.lkml"
)

View File

@ -21,6 +21,7 @@ import org.openmetadata.schema.entity.automations.Workflow;
import org.openmetadata.schema.metadataIngestion.DbtPipeline;
import org.openmetadata.schema.metadataIngestion.dbtconfig.DbtGCSConfig;
import org.openmetadata.schema.security.credentials.GCSCredentials;
import org.openmetadata.schema.services.connections.dashboard.LookerConnection;
import org.openmetadata.schema.services.connections.dashboard.SupersetConnection;
import org.openmetadata.schema.services.connections.dashboard.TableauConnection;
import org.openmetadata.schema.services.connections.database.BigQueryConnection;
@ -49,6 +50,7 @@ public final class ClassConverterFactory {
Map.entry(GCSConfig.class, new GCSConfigClassConverter()),
Map.entry(GCSCredentials.class, new GcsCredentialsClassConverter()),
Map.entry(GcsConnection.class, new GcsConnectionClassConverter()),
Map.entry(LookerConnection.class, new LookerConnectionClassConverter()),
Map.entry(OpenMetadataConnection.class, new OpenMetadataConnectionClassConverter()),
Map.entry(SSOAuthMechanism.class, new SSOAuthMechanismClassConverter()),
Map.entry(SupersetConnection.class, new SupersetConnectionClassConverter()),

View File

@ -0,0 +1,27 @@
package org.openmetadata.service.secrets.converter;
import java.util.List;
import org.openmetadata.schema.security.credentials.BitBucketCredentials;
import org.openmetadata.schema.security.credentials.GitHubCredentials;
import org.openmetadata.schema.services.connections.dashboard.LookerConnection;
import org.openmetadata.service.util.JsonUtils;
public class LookerConnectionClassConverter extends ClassConverter {
private static final List<Class<?>> CREDENTIALS_CLASSES =
List.of(GitHubCredentials.class, BitBucketCredentials.class);
public LookerConnectionClassConverter() {
super(LookerConnection.class);
}
@Override
public Object convert(Object object) {
LookerConnection lookerConnection = (LookerConnection) JsonUtils.convertValue(object, this.clazz);
tryToConvertOrFail(lookerConnection.getGitCredentials(), CREDENTIALS_CLASSES)
.ifPresent(lookerConnection::setGitCredentials);
return lookerConnection;
}
}

View File

@ -11,6 +11,13 @@
"shortCircuit": true,
"mandatory": true
},
{
"name": "ValidateVersion",
"description": "Validate that the API version support the SKD 4.0",
"errorMessage": "API Version 4.0 is not listed on your instance supported versions. Note that 4.0 is the stable version: https://cloud.google.com/looker/docs/api-sdk",
"shortCircuit": true,
"mandatory": true
},
{
"name": "ListDashboards",
"description": "The user has permissions to list a non-empty list of dashboards",

View File

@ -12,7 +12,9 @@ import org.openmetadata.schema.entity.automations.Workflow;
import org.openmetadata.schema.metadataIngestion.DbtPipeline;
import org.openmetadata.schema.metadataIngestion.dbtconfig.DbtGCSConfig;
import org.openmetadata.schema.security.credentials.GCSCredentials;
import org.openmetadata.schema.services.connections.dashboard.LookerConnection;
import org.openmetadata.schema.services.connections.dashboard.SupersetConnection;
import org.openmetadata.schema.services.connections.dashboard.TableauConnection;
import org.openmetadata.schema.services.connections.database.BigQueryConnection;
import org.openmetadata.schema.services.connections.database.DatalakeConnection;
import org.openmetadata.schema.services.connections.database.datalake.GCSConfig;
@ -26,16 +28,18 @@ public class ClassConverterFactoryTest {
@ValueSource(
classes = {
AirflowConnection.class,
BigQueryConnection.class,
DatalakeConnection.class,
DbtGCSConfig.class,
DbtPipeline.class,
GCSConfig.class,
GCSCredentials.class,
GcsConnection.class,
LookerConnection.class,
OpenMetadataConnection.class,
SSOAuthMechanism.class,
SupersetConnection.class,
GCSCredentials.class,
OpenMetadataConnection.class,
GcsConnection.class,
GCSConfig.class,
BigQueryConnection.class,
DbtGCSConfig.class,
TableauConnection.class,
TestServiceConnectionRequest.class,
Workflow.class
})
@ -45,6 +49,6 @@ public class ClassConverterFactoryTest {
@Test
void testClassConvertedMapIsNotModified() {
assertEquals(ClassConverterFactory.getConverterMap().size(), 14);
assertEquals(ClassConverterFactory.getConverterMap().size(), 15);
}
}

View File

@ -12,7 +12,7 @@
"enum": ["Looker"],
"default": "Looker"
},
"noGitHubCredentials": {
"noGitCredentials": {
"title": "No GitHub Credentials",
"description": "Do not set any credentials. Note that credentials are required to extract .lkml views and their lineage.",
"type": "object",
@ -44,15 +44,18 @@
"type": "string",
"format": "uri"
},
"githubCredentials": {
"gitCredentials": {
"title": "GitHub Credentials",
"description": "Credentials to extract the .lkml files from a repository. This is required to get all the lineage and definitions.",
"oneOf": [
{
"$ref": "#/definitions/noGitHubCredentials"
"$ref": "#/definitions/noGitCredentials"
},
{
"$ref": "../../../../security/credentials/githubCredentials.json"
},
{
"$ref": "../../../../security/credentials/bitbucketCredentials.json"
}
]
},

View File

@ -0,0 +1,40 @@
{
"$id": "https://open-metadata.org/security/credentials/bitbucketCredentials.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "BitBucketCredentials",
"description": "Credentials for a BitBucket repository",
"type": "object",
"javaType": "org.openmetadata.schema.security.credentials.BitBucketCredentials",
"definitions": {
"bitbucketType": {
"description": "BitBucket Credentials type",
"type": "string",
"enum": ["BitBucket"],
"default": "BitBucket"
}
},
"properties": {
"type": {
"title": "Credentials Type",
"description": "Credentials Type",
"$ref": "#/definitions/bitbucketType",
"default": "BitBucket"
},
"repositoryOwner": {
"$ref": "gitCredentials.json#/definitions/repositoryOwner"
},
"repositoryName": {
"$ref": "gitCredentials.json#/definitions/repositoryName"
},
"token": {
"$ref": "gitCredentials.json#/definitions/token"
},
"branch": {
"title": "Main Branch",
"description": "Main production branch of the repository. E.g., `main`",
"type": "string"
}
},
"additionalProperties": false,
"required": ["repositoryOwner", "repositoryName", "branch"]
}

View File

@ -0,0 +1,26 @@
{
"$id": "https://open-metadata.org/security/credentials/gitCredentials.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "GitCredentials",
"description": "Credentials for a generic Git repository",
"type": "object",
"javaType": "org.openmetadata.schema.security.credentials.GitCredentials",
"definitions": {
"repositoryOwner": {
"title": "Repository Owner",
"description": "The owner (user or organization) of a Git repository. For example, in https://github.com/open-metadata/OpenMetadata, the owner is `open-metadata`.",
"type": "string"
},
"repositoryName": {
"title": "Repository Name",
"description": "The name of a Git repository. For example, in https://github.com/open-metadata/OpenMetadata, the name is `OpenMetadata`.",
"type": "string"
},
"token": {
"title": "API Token",
"description": "Token to use the API. This is required for private repositories and to ensure we don't hit API limits.",
"type": "string",
"format": "password"
}
}
}

View File

@ -5,22 +5,29 @@
"description": "Credentials for a GitHub repository",
"type": "object",
"javaType": "org.openmetadata.schema.security.credentials.GitHubCredentials",
"definitions": {
"githubType": {
"description": "GitHub Credentials type",
"type": "string",
"enum": ["GitHub"],
"default": "GitHub"
}
},
"properties": {
"type": {
"title": "Credentials Type",
"description": "Credentials Type",
"$ref": "#/definitions/githubType",
"default": "GitHub"
},
"repositoryOwner": {
"title": "Repository Owner",
"description": "The owner (user or organization) of a GitHub repository. For example, in https://github.com/open-metadata/OpenMetadata, the owner is `open-metadata`.",
"type": "string"
"$ref": "gitCredentials.json#/definitions/repositoryOwner"
},
"repositoryName": {
"title": "Repository Name",
"description": "The name of a GitHub repository. For example, in https://github.com/open-metadata/OpenMetadata, the name is `OpenMetadata`.",
"type": "string"
"$ref": "gitCredentials.json#/definitions/repositoryName"
},
"token": {
"title": "API Token",
"description": "Token to use the API. This is required for private repositories and to ensure we don't hit API limits.",
"type": "string",
"format": "password"
"$ref": "gitCredentials.json#/definitions/token"
}
},
"additionalProperties": false,