Fixes 16010: Support Looker ingestion from Gitlab (#17284)

* introduce gitlab option to lookml ingestion

* fix reader and disable test

* fix copy paste in test case

* fix file read and keyset pagination for tree

* fix credentials to include gitlab credentials

* uncomment arguments for unused credentials to fix validation error

* fix credentials test

* fix credentials test

---------

Co-authored-by: Sriharsha Chintalapani <harshach@users.noreply.github.com>
This commit is contained in:
sam-mccarty-mavenclinic 2024-08-07 00:53:44 -04:00 committed by GitHub
parent a579431e4a
commit 11a49ef08d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 332 additions and 9 deletions

View File

@ -73,6 +73,9 @@ from metadata.generated.schema.security.credentials.bitbucketCredentials import
from metadata.generated.schema.security.credentials.githubCredentials import (
GitHubCredentials,
)
from metadata.generated.schema.security.credentials.gitlabCredentials import (
GitlabCredentials,
)
from metadata.generated.schema.type.basic import (
EntityName,
FullyQualifiedEntityName,
@ -203,6 +206,7 @@ class LookerSource(DashboardServiceSource):
NoGitCredentials,
GitHubCredentials,
BitBucketCredentials,
GitlabCredentials,
]
]
) -> "LookMLRepo":
@ -225,6 +229,7 @@ class LookerSource(DashboardServiceSource):
NoGitCredentials,
GitHubCredentials,
BitBucketCredentials,
GitlabCredentials,
]
],
path="manifest.lkml",
@ -284,7 +289,7 @@ class LookerSource(DashboardServiceSource):
}
logger.info(f"We found the following parsers:\n {self._project_parsers}")
def get_lookml_project_credentials(self, project_name: str) -> GitHubCredentials:
def get_lookml_project_credentials(self, project_name: str) -> ReadersCredentials:
"""
Given a lookml project, get its git URL and build the credentials
"""
@ -305,7 +310,7 @@ class LookerSource(DashboardServiceSource):
Depending on the type of the credentials we'll need a different reader
"""
if not self._reader_class and self.service_connection.gitCredentials:
# Both credentials from Github & Bitbucket will process by LocalReader
# Credentials from Github/Gitlab/Bitbucket will process by LocalReader
self._reader_class = LocalReader
return self._reader_class
@ -319,7 +324,7 @@ class LookerSource(DashboardServiceSource):
"""
if not self._repo_credentials:
if self.service_connection.gitCredentials and isinstance(
self.service_connection.gitCredentials, GitHubCredentials
self.service_connection.gitCredentials, ReadersCredentials
):
self._repo_credentials = self.service_connection.gitCredentials

View File

@ -28,6 +28,9 @@ from metadata.generated.schema.security.credentials.bitbucketCredentials import
from metadata.generated.schema.security.credentials.githubCredentials import (
GitHubCredentials,
)
from metadata.generated.schema.security.credentials.gitlabCredentials import (
GitlabCredentials,
)
from metadata.utils.logger import ingestion_logger
logger = ingestion_logger()
@ -38,9 +41,7 @@ def _clone_repo(
path: str,
credential: Optional[
Union[
NoGitCredentials,
GitHubCredentials,
BitBucketCredentials,
NoGitCredentials, GitHubCredentials, BitBucketCredentials, GitlabCredentials
]
],
overwrite: Optional[bool] = False,
@ -60,6 +61,8 @@ def _clone_repo(
elif isinstance(credential, BitBucketCredentials):
url = f"https://x-token-auth:{credential.token.root.get_secret_value()}@bitbucket.org/{repo_name}.git"
allow_unsafe_protocols = True
elif isinstance(credential, GitlabCredentials):
url = f"https://x-token-auth:{credential.token.root.get_secret_value()}@gitlab.com/{repo_name}.git"
assert url is not None

View File

@ -21,12 +21,15 @@ from metadata.generated.schema.security.credentials.bitbucketCredentials import
from metadata.generated.schema.security.credentials.githubCredentials import (
GitHubCredentials,
)
from metadata.generated.schema.security.credentials.gitlabCredentials import (
GitlabCredentials,
)
from metadata.readers.file.base import Reader
from metadata.utils.logger import ingestion_logger
logger = ingestion_logger()
ReadersCredentials = Union[GitHubCredentials, BitBucketCredentials]
ReadersCredentials = Union[GitHubCredentials, BitBucketCredentials, GitlabCredentials]
class ApiReader(Reader, ABC):

View File

@ -0,0 +1,172 @@
# Copyright 2021 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Gitlab client to read files with token auth
"""
import base64
import traceback
from enum import Enum
from typing import Any, Dict, List, Optional
from urllib.parse import quote_plus
import requests
from metadata.generated.schema.security.credentials.gitlabCredentials import (
GitlabCredentials,
)
from metadata.readers.file.api_reader import ApiReader
from metadata.readers.file.base import ReadException
from metadata.utils.constants import UTF_8
from metadata.utils.logger import ingestion_logger
logger = ingestion_logger()
HOST = "https://gitlab.com/api/v4"
class UrlParts(Enum):
FILES = "files"
PROJECTS = "projects"
REPOSITORY = "repository"
TREE = "tree"
class GitlabReader(ApiReader):
"""
Handle calls to the Gitlab API against a repo
"""
credentials: GitlabCredentials
def __init__(self, credentials):
super().__init__(credentials)
self._encoded_project_path = None
@property
def auth_headers(self) -> Dict[str, str]:
"""
Build the headers to authenticate
to the API
"""
if self._auth_headers is None and self.credentials.token:
self._auth_headers = {
"PRIVATE-TOKEN": self.credentials.token.root.get_secret_value()
}
return self._auth_headers
@property
def encoded_project_path(self) -> str:
"""
Build the URL-encoded project path for the Gitlab API
"""
if (
self._encoded_project_path is None
and self.credentials.repositoryOwner.root
and self.credentials.repositoryName.root
):
self._encoded_project_path = quote_plus(
"/".join(
[
self.credentials.repositoryOwner.root,
self.credentials.repositoryName.root,
]
)
)
return self._encoded_project_path
@staticmethod
def _decode_content(json_response: Dict[str, Any]) -> str:
"""
Return the content of the response
If no `content` there, throw the KeyError
"""
return base64.b64decode(json_response["content"]).decode(UTF_8)
def read(self, path: str, **__) -> str:
"""
Read a file from a Gitlab Repo and return its
contents as a string
https://docs.gitlab.com/ee/api/repository_files.html
"""
encoded_file_path = quote_plus(path)
branch = self._get_default_branch()
try:
res = requests.get(
self._build_url(
HOST,
UrlParts.PROJECTS.value,
self.encoded_project_path,
UrlParts.REPOSITORY.value,
UrlParts.FILES.value,
f"{encoded_file_path}?ref={branch}",
),
headers=self.auth_headers,
timeout=30,
)
if res.status_code == 200:
return self._decode_content(res.json())
# If we don't get a 200, raise
res.raise_for_status()
except Exception as err:
logger.debug(traceback.format_exc())
raise ReadException(f"Error fetching file [{path}] from repo: {err}")
raise ReadException(f"Could not fetch file [{path}] from repo")
def _get_default_branch(self) -> str:
"""
Get repo default branch
"""
res = requests.get(
self._build_url(HOST, UrlParts.PROJECTS.value, self.encoded_project_path),
headers=self.auth_headers,
timeout=30,
)
if res.status_code == 200:
return res.json().get("default_branch")
# If we don't get a 200, raise
res.raise_for_status()
raise RuntimeError("Could not fetch the default branch")
def _get_tree(self, url: str = None) -> Optional[List[str]]:
"""
Use the Gitlab Repository Tree API to iterate over tree pages recursively
"""
if url is None:
url = self._build_url(
HOST,
UrlParts.PROJECTS.value,
self.encoded_project_path,
UrlParts.REPOSITORY.value,
f"{UrlParts.TREE.value}?recursive=true&pagination=keyset&per_page=100&order_by=path&sort=desc",
)
res = requests.get(
url,
headers=self.auth_headers,
timeout=30,
)
if res.status_code == 200:
paths = [elem.get("path") for elem in res.json()]
if res.links.get("next"):
paths.extend(self._get_tree(res.links["next"]["url"]))
return paths
# If we don't get a 200, raise
res.raise_for_status()
return None

View File

@ -20,6 +20,9 @@ from metadata.generated.schema.security.credentials.bitbucketCredentials import
from metadata.generated.schema.security.credentials.githubCredentials import (
GitHubCredentials,
)
from metadata.generated.schema.security.credentials.gitlabCredentials import (
GitlabCredentials,
)
from metadata.readers.file.credentials import (
get_credentials_from_url,
update_repository_name,
@ -66,6 +69,21 @@ class TestCreds(TestCase):
self.assertEqual(bb_updated.token.root, bb_original.token.root)
self.assertEqual(bb_updated.branch, bb_original.branch)
gl_original = GitlabCredentials(
repositoryOwner="owner",
repositoryName="name",
token="token",
)
gl_updated = update_repository_name(original=gl_original, name="new_name")
self.assertEqual(gl_original.repositoryName.root, "name")
self.assertEqual(gl_updated.repositoryName.root, "new_name")
self.assertEqual(
gl_updated.repositoryOwner.root, gl_original.repositoryOwner.root
)
self.assertEqual(gl_updated.token.root, gl_original.token.root)
def test_get_credentials_from_url(self):
"""
With and without the right owner
@ -115,3 +133,25 @@ class TestCreds(TestCase):
original=bb_original_not_owner, url=bb_url
)
self.assertEqual(bb_updated_not_owner, bb_original_not_owner)
gl_url = "git@gitlab.com:owner/repo.git"
gl_original = GitlabCredentials(
repositoryOwner="owner",
repositoryName="name",
token="token",
)
gl_updated = get_credentials_from_url(original=gl_original, url=gl_url)
self.assertEqual(gl_updated.repositoryName.root, "repo")
gl_original_not_owner = GitlabCredentials(
repositoryOwner="not_owner",
repositoryName="name",
token="token",
)
gl_updated_not_owner = get_credentials_from_url(
original=gl_original_not_owner, url=gl_url
)
self.assertEqual(gl_updated_not_owner, gl_original_not_owner)

View File

@ -0,0 +1,59 @@
# Copyright 2021 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Test GitHub Reader
"""
from unittest import TestCase
from metadata.generated.schema.security.credentials.gitlabCredentials import (
GitlabCredentials,
)
from metadata.ingestion.source.dashboard.looker.models import Includes, ViewName
from metadata.ingestion.source.dashboard.looker.parser import LkmlParser
from metadata.readers.file.gitlab import GitlabReader
class TestLookMLGitlabReader(TestCase):
"""
Validate the github reader against the OM repo
"""
creds = GitlabCredentials(
# An accessible LookML Gitlab Repository is required to run this Test
repositoryName="test-group/test-repo",
repositoryOwner="test-owner",
token="token",
)
reader = GitlabReader(creds)
parser = LkmlParser(reader)
def x_test_lookml_read_and_parse(self):
"""
To test the Gitlab reader, update these files with files in your test LookML repository and enable the test
"""
explore_file = "test_explore.model.lkml"
self.parser.parse_file(Includes(explore_file))
contents = self.parser.parsed_files.get(Includes(explore_file))
# Check file contents
self.assertIn("explore: test-explore", contents)
view = self.parser.find_view(
view_name=ViewName("test-view"), path=Includes(explore_file)
)
# We can get views that are resolved even if the include does not contain `.lkml`
self.assertIsNotNone(view)
self.assertEqual(view.name, "test-view")

View File

@ -1762,6 +1762,8 @@ site_menu:
url: /main-concepts/metadata-standard/schemas/security/credentials/gitcredentials
- category: Main Concepts / Metadata Standard / Schemas / Security / Credentials / GithubCredentials
url: /main-concepts/metadata-standard/schemas/security/credentials/githubcredentials
- category: Main Concepts / Metadata Standard / Schemas / Security / Credentials / GitlabCredentials
url: /main-concepts/metadata-standard/schemas/security/credentials/gitlabcredentials
- category: Main Concepts / Metadata Standard / Schemas / Security / Credentials
url: /main-concepts/metadata-standard/schemas/security/credentials
- category: Main Concepts / Metadata Standard / Schemas / Security

View File

@ -3,13 +3,14 @@ package org.openmetadata.service.secrets.converter;
import java.util.List;
import org.openmetadata.schema.security.credentials.BitBucketCredentials;
import org.openmetadata.schema.security.credentials.GitHubCredentials;
import org.openmetadata.schema.security.credentials.GitlabCredentials;
import org.openmetadata.schema.services.connections.dashboard.LookerConnection;
import org.openmetadata.service.util.JsonUtils;
public class LookerConnectionClassConverter extends ClassConverter {
private static final List<Class<?>> CREDENTIALS_CLASSES =
List.of(GitHubCredentials.class, BitBucketCredentials.class);
List.of(GitHubCredentials.class, BitBucketCredentials.class, GitlabCredentials.class);
public LookerConnectionClassConverter() {
super(LookerConnection.class);

View File

@ -13,7 +13,7 @@
"default": "Looker"
},
"noGitCredentials": {
"title": "No GitHub Credentials",
"title": "No Git Credentials",
"description": "Do not set any credentials. Note that credentials are required to extract .lkml views and their lineage.",
"type": "object",
"additionalProperties": false
@ -56,6 +56,9 @@
},
{
"$ref": "../../../../security/credentials/bitbucketCredentials.json"
},
{
"$ref": "../../../../security/credentials/gitlabCredentials.json"
}
]
},

View File

@ -0,0 +1,35 @@
{
"$id": "https://open-metadata.org/schema/security/credentials/gitlabCredentials.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "GitlabCredentials",
"description": "Credentials for a Gitlab repository",
"type": "object",
"javaType": "org.openmetadata.schema.security.credentials.GitlabCredentials",
"definitions": {
"gitlabType": {
"description": "Gitlab Credentials type",
"type": "string",
"enum": ["Gitlab"],
"default": "Gitlab"
}
},
"properties": {
"type": {
"title": "Credentials Type",
"description": "Credentials Type",
"$ref": "#/definitions/gitlabType",
"default": "Gitlab"
},
"repositoryOwner": {
"$ref": "gitCredentials.json#/definitions/repositoryOwner"
},
"repositoryName": {
"$ref": "gitCredentials.json#/definitions/repositoryName"
},
"token": {
"$ref": "gitCredentials.json#/definitions/token"
}
},
"additionalProperties": false,
"required": ["repositoryOwner", "repositoryName"]
}