mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-07-24 17:59:52 +00:00
Fixes 16010: Support Looker ingestion from Gitlab (#17284)
* introduce gitlab option to lookml ingestion * fix reader and disable test * fix copy paste in test case * fix file read and keyset pagination for tree * fix credentials to include gitlab credentials * uncomment arguments for unused credentials to fix validation error * fix credentials test * fix credentials test --------- Co-authored-by: Sriharsha Chintalapani <harshach@users.noreply.github.com>
This commit is contained in:
parent
a579431e4a
commit
11a49ef08d
@ -73,6 +73,9 @@ from metadata.generated.schema.security.credentials.bitbucketCredentials import
|
||||
from metadata.generated.schema.security.credentials.githubCredentials import (
|
||||
GitHubCredentials,
|
||||
)
|
||||
from metadata.generated.schema.security.credentials.gitlabCredentials import (
|
||||
GitlabCredentials,
|
||||
)
|
||||
from metadata.generated.schema.type.basic import (
|
||||
EntityName,
|
||||
FullyQualifiedEntityName,
|
||||
@ -203,6 +206,7 @@ class LookerSource(DashboardServiceSource):
|
||||
NoGitCredentials,
|
||||
GitHubCredentials,
|
||||
BitBucketCredentials,
|
||||
GitlabCredentials,
|
||||
]
|
||||
]
|
||||
) -> "LookMLRepo":
|
||||
@ -225,6 +229,7 @@ class LookerSource(DashboardServiceSource):
|
||||
NoGitCredentials,
|
||||
GitHubCredentials,
|
||||
BitBucketCredentials,
|
||||
GitlabCredentials,
|
||||
]
|
||||
],
|
||||
path="manifest.lkml",
|
||||
@ -284,7 +289,7 @@ class LookerSource(DashboardServiceSource):
|
||||
}
|
||||
logger.info(f"We found the following parsers:\n {self._project_parsers}")
|
||||
|
||||
def get_lookml_project_credentials(self, project_name: str) -> GitHubCredentials:
|
||||
def get_lookml_project_credentials(self, project_name: str) -> ReadersCredentials:
|
||||
"""
|
||||
Given a lookml project, get its git URL and build the credentials
|
||||
"""
|
||||
@ -305,7 +310,7 @@ class LookerSource(DashboardServiceSource):
|
||||
Depending on the type of the credentials we'll need a different reader
|
||||
"""
|
||||
if not self._reader_class and self.service_connection.gitCredentials:
|
||||
# Both credentials from Github & Bitbucket will process by LocalReader
|
||||
# Credentials from Github/Gitlab/Bitbucket will process by LocalReader
|
||||
self._reader_class = LocalReader
|
||||
|
||||
return self._reader_class
|
||||
@ -319,7 +324,7 @@ class LookerSource(DashboardServiceSource):
|
||||
"""
|
||||
if not self._repo_credentials:
|
||||
if self.service_connection.gitCredentials and isinstance(
|
||||
self.service_connection.gitCredentials, GitHubCredentials
|
||||
self.service_connection.gitCredentials, ReadersCredentials
|
||||
):
|
||||
self._repo_credentials = self.service_connection.gitCredentials
|
||||
|
||||
|
@ -28,6 +28,9 @@ from metadata.generated.schema.security.credentials.bitbucketCredentials import
|
||||
from metadata.generated.schema.security.credentials.githubCredentials import (
|
||||
GitHubCredentials,
|
||||
)
|
||||
from metadata.generated.schema.security.credentials.gitlabCredentials import (
|
||||
GitlabCredentials,
|
||||
)
|
||||
from metadata.utils.logger import ingestion_logger
|
||||
|
||||
logger = ingestion_logger()
|
||||
@ -38,9 +41,7 @@ def _clone_repo(
|
||||
path: str,
|
||||
credential: Optional[
|
||||
Union[
|
||||
NoGitCredentials,
|
||||
GitHubCredentials,
|
||||
BitBucketCredentials,
|
||||
NoGitCredentials, GitHubCredentials, BitBucketCredentials, GitlabCredentials
|
||||
]
|
||||
],
|
||||
overwrite: Optional[bool] = False,
|
||||
@ -60,6 +61,8 @@ def _clone_repo(
|
||||
elif isinstance(credential, BitBucketCredentials):
|
||||
url = f"https://x-token-auth:{credential.token.root.get_secret_value()}@bitbucket.org/{repo_name}.git"
|
||||
allow_unsafe_protocols = True
|
||||
elif isinstance(credential, GitlabCredentials):
|
||||
url = f"https://x-token-auth:{credential.token.root.get_secret_value()}@gitlab.com/{repo_name}.git"
|
||||
|
||||
assert url is not None
|
||||
|
||||
|
@ -21,12 +21,15 @@ from metadata.generated.schema.security.credentials.bitbucketCredentials import
|
||||
from metadata.generated.schema.security.credentials.githubCredentials import (
|
||||
GitHubCredentials,
|
||||
)
|
||||
from metadata.generated.schema.security.credentials.gitlabCredentials import (
|
||||
GitlabCredentials,
|
||||
)
|
||||
from metadata.readers.file.base import Reader
|
||||
from metadata.utils.logger import ingestion_logger
|
||||
|
||||
logger = ingestion_logger()
|
||||
|
||||
ReadersCredentials = Union[GitHubCredentials, BitBucketCredentials]
|
||||
ReadersCredentials = Union[GitHubCredentials, BitBucketCredentials, GitlabCredentials]
|
||||
|
||||
|
||||
class ApiReader(Reader, ABC):
|
||||
|
172
ingestion/src/metadata/readers/file/gitlab.py
Normal file
172
ingestion/src/metadata/readers/file/gitlab.py
Normal file
@ -0,0 +1,172 @@
|
||||
# Copyright 2021 Collate
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
Gitlab client to read files with token auth
|
||||
"""
|
||||
import base64
|
||||
import traceback
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, List, Optional
|
||||
from urllib.parse import quote_plus
|
||||
|
||||
import requests
|
||||
|
||||
from metadata.generated.schema.security.credentials.gitlabCredentials import (
|
||||
GitlabCredentials,
|
||||
)
|
||||
from metadata.readers.file.api_reader import ApiReader
|
||||
from metadata.readers.file.base import ReadException
|
||||
from metadata.utils.constants import UTF_8
|
||||
from metadata.utils.logger import ingestion_logger
|
||||
|
||||
logger = ingestion_logger()
|
||||
|
||||
|
||||
HOST = "https://gitlab.com/api/v4"
|
||||
|
||||
|
||||
class UrlParts(Enum):
|
||||
FILES = "files"
|
||||
PROJECTS = "projects"
|
||||
REPOSITORY = "repository"
|
||||
TREE = "tree"
|
||||
|
||||
|
||||
class GitlabReader(ApiReader):
|
||||
"""
|
||||
Handle calls to the Gitlab API against a repo
|
||||
"""
|
||||
|
||||
credentials: GitlabCredentials
|
||||
|
||||
def __init__(self, credentials):
|
||||
super().__init__(credentials)
|
||||
self._encoded_project_path = None
|
||||
|
||||
@property
|
||||
def auth_headers(self) -> Dict[str, str]:
|
||||
"""
|
||||
Build the headers to authenticate
|
||||
to the API
|
||||
"""
|
||||
if self._auth_headers is None and self.credentials.token:
|
||||
self._auth_headers = {
|
||||
"PRIVATE-TOKEN": self.credentials.token.root.get_secret_value()
|
||||
}
|
||||
|
||||
return self._auth_headers
|
||||
|
||||
@property
|
||||
def encoded_project_path(self) -> str:
|
||||
"""
|
||||
Build the URL-encoded project path for the Gitlab API
|
||||
"""
|
||||
if (
|
||||
self._encoded_project_path is None
|
||||
and self.credentials.repositoryOwner.root
|
||||
and self.credentials.repositoryName.root
|
||||
):
|
||||
self._encoded_project_path = quote_plus(
|
||||
"/".join(
|
||||
[
|
||||
self.credentials.repositoryOwner.root,
|
||||
self.credentials.repositoryName.root,
|
||||
]
|
||||
)
|
||||
)
|
||||
|
||||
return self._encoded_project_path
|
||||
|
||||
@staticmethod
|
||||
def _decode_content(json_response: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Return the content of the response
|
||||
|
||||
If no `content` there, throw the KeyError
|
||||
"""
|
||||
return base64.b64decode(json_response["content"]).decode(UTF_8)
|
||||
|
||||
def read(self, path: str, **__) -> str:
|
||||
"""
|
||||
Read a file from a Gitlab Repo and return its
|
||||
contents as a string
|
||||
https://docs.gitlab.com/ee/api/repository_files.html
|
||||
"""
|
||||
encoded_file_path = quote_plus(path)
|
||||
branch = self._get_default_branch()
|
||||
try:
|
||||
res = requests.get(
|
||||
self._build_url(
|
||||
HOST,
|
||||
UrlParts.PROJECTS.value,
|
||||
self.encoded_project_path,
|
||||
UrlParts.REPOSITORY.value,
|
||||
UrlParts.FILES.value,
|
||||
f"{encoded_file_path}?ref={branch}",
|
||||
),
|
||||
headers=self.auth_headers,
|
||||
timeout=30,
|
||||
)
|
||||
if res.status_code == 200:
|
||||
return self._decode_content(res.json())
|
||||
|
||||
# If we don't get a 200, raise
|
||||
res.raise_for_status()
|
||||
|
||||
except Exception as err:
|
||||
logger.debug(traceback.format_exc())
|
||||
raise ReadException(f"Error fetching file [{path}] from repo: {err}")
|
||||
|
||||
raise ReadException(f"Could not fetch file [{path}] from repo")
|
||||
|
||||
def _get_default_branch(self) -> str:
|
||||
"""
|
||||
Get repo default branch
|
||||
"""
|
||||
res = requests.get(
|
||||
self._build_url(HOST, UrlParts.PROJECTS.value, self.encoded_project_path),
|
||||
headers=self.auth_headers,
|
||||
timeout=30,
|
||||
)
|
||||
if res.status_code == 200:
|
||||
return res.json().get("default_branch")
|
||||
|
||||
# If we don't get a 200, raise
|
||||
res.raise_for_status()
|
||||
raise RuntimeError("Could not fetch the default branch")
|
||||
|
||||
def _get_tree(self, url: str = None) -> Optional[List[str]]:
|
||||
"""
|
||||
Use the Gitlab Repository Tree API to iterate over tree pages recursively
|
||||
"""
|
||||
if url is None:
|
||||
url = self._build_url(
|
||||
HOST,
|
||||
UrlParts.PROJECTS.value,
|
||||
self.encoded_project_path,
|
||||
UrlParts.REPOSITORY.value,
|
||||
f"{UrlParts.TREE.value}?recursive=true&pagination=keyset&per_page=100&order_by=path&sort=desc",
|
||||
)
|
||||
res = requests.get(
|
||||
url,
|
||||
headers=self.auth_headers,
|
||||
timeout=30,
|
||||
)
|
||||
if res.status_code == 200:
|
||||
paths = [elem.get("path") for elem in res.json()]
|
||||
if res.links.get("next"):
|
||||
paths.extend(self._get_tree(res.links["next"]["url"]))
|
||||
return paths
|
||||
|
||||
# If we don't get a 200, raise
|
||||
res.raise_for_status()
|
||||
|
||||
return None
|
@ -20,6 +20,9 @@ from metadata.generated.schema.security.credentials.bitbucketCredentials import
|
||||
from metadata.generated.schema.security.credentials.githubCredentials import (
|
||||
GitHubCredentials,
|
||||
)
|
||||
from metadata.generated.schema.security.credentials.gitlabCredentials import (
|
||||
GitlabCredentials,
|
||||
)
|
||||
from metadata.readers.file.credentials import (
|
||||
get_credentials_from_url,
|
||||
update_repository_name,
|
||||
@ -66,6 +69,21 @@ class TestCreds(TestCase):
|
||||
self.assertEqual(bb_updated.token.root, bb_original.token.root)
|
||||
self.assertEqual(bb_updated.branch, bb_original.branch)
|
||||
|
||||
gl_original = GitlabCredentials(
|
||||
repositoryOwner="owner",
|
||||
repositoryName="name",
|
||||
token="token",
|
||||
)
|
||||
|
||||
gl_updated = update_repository_name(original=gl_original, name="new_name")
|
||||
|
||||
self.assertEqual(gl_original.repositoryName.root, "name")
|
||||
self.assertEqual(gl_updated.repositoryName.root, "new_name")
|
||||
self.assertEqual(
|
||||
gl_updated.repositoryOwner.root, gl_original.repositoryOwner.root
|
||||
)
|
||||
self.assertEqual(gl_updated.token.root, gl_original.token.root)
|
||||
|
||||
def test_get_credentials_from_url(self):
|
||||
"""
|
||||
With and without the right owner
|
||||
@ -115,3 +133,25 @@ class TestCreds(TestCase):
|
||||
original=bb_original_not_owner, url=bb_url
|
||||
)
|
||||
self.assertEqual(bb_updated_not_owner, bb_original_not_owner)
|
||||
|
||||
gl_url = "git@gitlab.com:owner/repo.git"
|
||||
|
||||
gl_original = GitlabCredentials(
|
||||
repositoryOwner="owner",
|
||||
repositoryName="name",
|
||||
token="token",
|
||||
)
|
||||
|
||||
gl_updated = get_credentials_from_url(original=gl_original, url=gl_url)
|
||||
self.assertEqual(gl_updated.repositoryName.root, "repo")
|
||||
|
||||
gl_original_not_owner = GitlabCredentials(
|
||||
repositoryOwner="not_owner",
|
||||
repositoryName="name",
|
||||
token="token",
|
||||
)
|
||||
|
||||
gl_updated_not_owner = get_credentials_from_url(
|
||||
original=gl_original_not_owner, url=gl_url
|
||||
)
|
||||
self.assertEqual(gl_updated_not_owner, gl_original_not_owner)
|
||||
|
@ -0,0 +1,59 @@
|
||||
# Copyright 2021 Collate
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""
|
||||
Test GitHub Reader
|
||||
"""
|
||||
from unittest import TestCase
|
||||
|
||||
from metadata.generated.schema.security.credentials.gitlabCredentials import (
|
||||
GitlabCredentials,
|
||||
)
|
||||
from metadata.ingestion.source.dashboard.looker.models import Includes, ViewName
|
||||
from metadata.ingestion.source.dashboard.looker.parser import LkmlParser
|
||||
from metadata.readers.file.gitlab import GitlabReader
|
||||
|
||||
|
||||
class TestLookMLGitlabReader(TestCase):
|
||||
"""
|
||||
Validate the github reader against the OM repo
|
||||
"""
|
||||
|
||||
creds = GitlabCredentials(
|
||||
# An accessible LookML Gitlab Repository is required to run this Test
|
||||
repositoryName="test-group/test-repo",
|
||||
repositoryOwner="test-owner",
|
||||
token="token",
|
||||
)
|
||||
|
||||
reader = GitlabReader(creds)
|
||||
parser = LkmlParser(reader)
|
||||
|
||||
def x_test_lookml_read_and_parse(self):
|
||||
"""
|
||||
To test the Gitlab reader, update these files with files in your test LookML repository and enable the test
|
||||
"""
|
||||
|
||||
explore_file = "test_explore.model.lkml"
|
||||
self.parser.parse_file(Includes(explore_file))
|
||||
|
||||
contents = self.parser.parsed_files.get(Includes(explore_file))
|
||||
|
||||
# Check file contents
|
||||
self.assertIn("explore: test-explore", contents)
|
||||
|
||||
view = self.parser.find_view(
|
||||
view_name=ViewName("test-view"), path=Includes(explore_file)
|
||||
)
|
||||
|
||||
# We can get views that are resolved even if the include does not contain `.lkml`
|
||||
self.assertIsNotNone(view)
|
||||
self.assertEqual(view.name, "test-view")
|
@ -1762,6 +1762,8 @@ site_menu:
|
||||
url: /main-concepts/metadata-standard/schemas/security/credentials/gitcredentials
|
||||
- category: Main Concepts / Metadata Standard / Schemas / Security / Credentials / GithubCredentials
|
||||
url: /main-concepts/metadata-standard/schemas/security/credentials/githubcredentials
|
||||
- category: Main Concepts / Metadata Standard / Schemas / Security / Credentials / GitlabCredentials
|
||||
url: /main-concepts/metadata-standard/schemas/security/credentials/gitlabcredentials
|
||||
- category: Main Concepts / Metadata Standard / Schemas / Security / Credentials
|
||||
url: /main-concepts/metadata-standard/schemas/security/credentials
|
||||
- category: Main Concepts / Metadata Standard / Schemas / Security
|
||||
|
@ -3,13 +3,14 @@ package org.openmetadata.service.secrets.converter;
|
||||
import java.util.List;
|
||||
import org.openmetadata.schema.security.credentials.BitBucketCredentials;
|
||||
import org.openmetadata.schema.security.credentials.GitHubCredentials;
|
||||
import org.openmetadata.schema.security.credentials.GitlabCredentials;
|
||||
import org.openmetadata.schema.services.connections.dashboard.LookerConnection;
|
||||
import org.openmetadata.service.util.JsonUtils;
|
||||
|
||||
public class LookerConnectionClassConverter extends ClassConverter {
|
||||
|
||||
private static final List<Class<?>> CREDENTIALS_CLASSES =
|
||||
List.of(GitHubCredentials.class, BitBucketCredentials.class);
|
||||
List.of(GitHubCredentials.class, BitBucketCredentials.class, GitlabCredentials.class);
|
||||
|
||||
public LookerConnectionClassConverter() {
|
||||
super(LookerConnection.class);
|
||||
|
@ -13,7 +13,7 @@
|
||||
"default": "Looker"
|
||||
},
|
||||
"noGitCredentials": {
|
||||
"title": "No GitHub Credentials",
|
||||
"title": "No Git Credentials",
|
||||
"description": "Do not set any credentials. Note that credentials are required to extract .lkml views and their lineage.",
|
||||
"type": "object",
|
||||
"additionalProperties": false
|
||||
@ -56,6 +56,9 @@
|
||||
},
|
||||
{
|
||||
"$ref": "../../../../security/credentials/bitbucketCredentials.json"
|
||||
},
|
||||
{
|
||||
"$ref": "../../../../security/credentials/gitlabCredentials.json"
|
||||
}
|
||||
]
|
||||
},
|
||||
|
@ -0,0 +1,35 @@
|
||||
{
|
||||
"$id": "https://open-metadata.org/schema/security/credentials/gitlabCredentials.json",
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"title": "GitlabCredentials",
|
||||
"description": "Credentials for a Gitlab repository",
|
||||
"type": "object",
|
||||
"javaType": "org.openmetadata.schema.security.credentials.GitlabCredentials",
|
||||
"definitions": {
|
||||
"gitlabType": {
|
||||
"description": "Gitlab Credentials type",
|
||||
"type": "string",
|
||||
"enum": ["Gitlab"],
|
||||
"default": "Gitlab"
|
||||
}
|
||||
},
|
||||
"properties": {
|
||||
"type": {
|
||||
"title": "Credentials Type",
|
||||
"description": "Credentials Type",
|
||||
"$ref": "#/definitions/gitlabType",
|
||||
"default": "Gitlab"
|
||||
},
|
||||
"repositoryOwner": {
|
||||
"$ref": "gitCredentials.json#/definitions/repositoryOwner"
|
||||
},
|
||||
"repositoryName": {
|
||||
"$ref": "gitCredentials.json#/definitions/repositoryName"
|
||||
},
|
||||
"token": {
|
||||
"$ref": "gitCredentials.json#/definitions/token"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": ["repositoryOwner", "repositoryName"]
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user