diff --git a/ingestion/src/metadata/ingestion/source/dashboard/looker/metadata.py b/ingestion/src/metadata/ingestion/source/dashboard/looker/metadata.py index 14ab898be53..89bb367a53f 100644 --- a/ingestion/src/metadata/ingestion/source/dashboard/looker/metadata.py +++ b/ingestion/src/metadata/ingestion/source/dashboard/looker/metadata.py @@ -73,6 +73,9 @@ from metadata.generated.schema.security.credentials.bitbucketCredentials import from metadata.generated.schema.security.credentials.githubCredentials import ( GitHubCredentials, ) +from metadata.generated.schema.security.credentials.gitlabCredentials import ( + GitlabCredentials, +) from metadata.generated.schema.type.basic import ( EntityName, FullyQualifiedEntityName, @@ -203,6 +206,7 @@ class LookerSource(DashboardServiceSource): NoGitCredentials, GitHubCredentials, BitBucketCredentials, + GitlabCredentials, ] ] ) -> "LookMLRepo": @@ -225,6 +229,7 @@ class LookerSource(DashboardServiceSource): NoGitCredentials, GitHubCredentials, BitBucketCredentials, + GitlabCredentials, ] ], path="manifest.lkml", @@ -284,7 +289,7 @@ class LookerSource(DashboardServiceSource): } logger.info(f"We found the following parsers:\n {self._project_parsers}") - def get_lookml_project_credentials(self, project_name: str) -> GitHubCredentials: + def get_lookml_project_credentials(self, project_name: str) -> ReadersCredentials: """ Given a lookml project, get its git URL and build the credentials """ @@ -305,7 +310,7 @@ class LookerSource(DashboardServiceSource): Depending on the type of the credentials we'll need a different reader """ if not self._reader_class and self.service_connection.gitCredentials: - # Both credentials from Github & Bitbucket will process by LocalReader + # Credentials from Github/Gitlab/Bitbucket will process by LocalReader self._reader_class = LocalReader return self._reader_class @@ -319,7 +324,7 @@ class LookerSource(DashboardServiceSource): """ if not self._repo_credentials: if self.service_connection.gitCredentials and isinstance( - self.service_connection.gitCredentials, GitHubCredentials + self.service_connection.gitCredentials, ReadersCredentials ): self._repo_credentials = self.service_connection.gitCredentials diff --git a/ingestion/src/metadata/ingestion/source/dashboard/looker/utils.py b/ingestion/src/metadata/ingestion/source/dashboard/looker/utils.py index 5bc663c187d..60d7046c9f8 100644 --- a/ingestion/src/metadata/ingestion/source/dashboard/looker/utils.py +++ b/ingestion/src/metadata/ingestion/source/dashboard/looker/utils.py @@ -28,6 +28,9 @@ from metadata.generated.schema.security.credentials.bitbucketCredentials import from metadata.generated.schema.security.credentials.githubCredentials import ( GitHubCredentials, ) +from metadata.generated.schema.security.credentials.gitlabCredentials import ( + GitlabCredentials, +) from metadata.utils.logger import ingestion_logger logger = ingestion_logger() @@ -38,9 +41,7 @@ def _clone_repo( path: str, credential: Optional[ Union[ - NoGitCredentials, - GitHubCredentials, - BitBucketCredentials, + NoGitCredentials, GitHubCredentials, BitBucketCredentials, GitlabCredentials ] ], overwrite: Optional[bool] = False, @@ -60,6 +61,8 @@ def _clone_repo( elif isinstance(credential, BitBucketCredentials): url = f"https://x-token-auth:{credential.token.root.get_secret_value()}@bitbucket.org/{repo_name}.git" allow_unsafe_protocols = True + elif isinstance(credential, GitlabCredentials): + url = f"https://x-token-auth:{credential.token.root.get_secret_value()}@gitlab.com/{repo_name}.git" assert url is not None diff --git a/ingestion/src/metadata/readers/file/api_reader.py b/ingestion/src/metadata/readers/file/api_reader.py index 53668a21b5a..62dc2b1bcae 100644 --- a/ingestion/src/metadata/readers/file/api_reader.py +++ b/ingestion/src/metadata/readers/file/api_reader.py @@ -21,12 +21,15 @@ from metadata.generated.schema.security.credentials.bitbucketCredentials import from metadata.generated.schema.security.credentials.githubCredentials import ( GitHubCredentials, ) +from metadata.generated.schema.security.credentials.gitlabCredentials import ( + GitlabCredentials, +) from metadata.readers.file.base import Reader from metadata.utils.logger import ingestion_logger logger = ingestion_logger() -ReadersCredentials = Union[GitHubCredentials, BitBucketCredentials] +ReadersCredentials = Union[GitHubCredentials, BitBucketCredentials, GitlabCredentials] class ApiReader(Reader, ABC): diff --git a/ingestion/src/metadata/readers/file/gitlab.py b/ingestion/src/metadata/readers/file/gitlab.py new file mode 100644 index 00000000000..67f0b912146 --- /dev/null +++ b/ingestion/src/metadata/readers/file/gitlab.py @@ -0,0 +1,172 @@ +# Copyright 2021 Collate +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Gitlab client to read files with token auth +""" +import base64 +import traceback +from enum import Enum +from typing import Any, Dict, List, Optional +from urllib.parse import quote_plus + +import requests + +from metadata.generated.schema.security.credentials.gitlabCredentials import ( + GitlabCredentials, +) +from metadata.readers.file.api_reader import ApiReader +from metadata.readers.file.base import ReadException +from metadata.utils.constants import UTF_8 +from metadata.utils.logger import ingestion_logger + +logger = ingestion_logger() + + +HOST = "https://gitlab.com/api/v4" + + +class UrlParts(Enum): + FILES = "files" + PROJECTS = "projects" + REPOSITORY = "repository" + TREE = "tree" + + +class GitlabReader(ApiReader): + """ + Handle calls to the Gitlab API against a repo + """ + + credentials: GitlabCredentials + + def __init__(self, credentials): + super().__init__(credentials) + self._encoded_project_path = None + + @property + def auth_headers(self) -> Dict[str, str]: + """ + Build the headers to authenticate + to the API + """ + if self._auth_headers is None and self.credentials.token: + self._auth_headers = { + "PRIVATE-TOKEN": self.credentials.token.root.get_secret_value() + } + + return self._auth_headers + + @property + def encoded_project_path(self) -> str: + """ + Build the URL-encoded project path for the Gitlab API + """ + if ( + self._encoded_project_path is None + and self.credentials.repositoryOwner.root + and self.credentials.repositoryName.root + ): + self._encoded_project_path = quote_plus( + "/".join( + [ + self.credentials.repositoryOwner.root, + self.credentials.repositoryName.root, + ] + ) + ) + + return self._encoded_project_path + + @staticmethod + def _decode_content(json_response: Dict[str, Any]) -> str: + """ + Return the content of the response + + If no `content` there, throw the KeyError + """ + return base64.b64decode(json_response["content"]).decode(UTF_8) + + def read(self, path: str, **__) -> str: + """ + Read a file from a Gitlab Repo and return its + contents as a string + https://docs.gitlab.com/ee/api/repository_files.html + """ + encoded_file_path = quote_plus(path) + branch = self._get_default_branch() + try: + res = requests.get( + self._build_url( + HOST, + UrlParts.PROJECTS.value, + self.encoded_project_path, + UrlParts.REPOSITORY.value, + UrlParts.FILES.value, + f"{encoded_file_path}?ref={branch}", + ), + headers=self.auth_headers, + timeout=30, + ) + if res.status_code == 200: + return self._decode_content(res.json()) + + # If we don't get a 200, raise + res.raise_for_status() + + except Exception as err: + logger.debug(traceback.format_exc()) + raise ReadException(f"Error fetching file [{path}] from repo: {err}") + + raise ReadException(f"Could not fetch file [{path}] from repo") + + def _get_default_branch(self) -> str: + """ + Get repo default branch + """ + res = requests.get( + self._build_url(HOST, UrlParts.PROJECTS.value, self.encoded_project_path), + headers=self.auth_headers, + timeout=30, + ) + if res.status_code == 200: + return res.json().get("default_branch") + + # If we don't get a 200, raise + res.raise_for_status() + raise RuntimeError("Could not fetch the default branch") + + def _get_tree(self, url: str = None) -> Optional[List[str]]: + """ + Use the Gitlab Repository Tree API to iterate over tree pages recursively + """ + if url is None: + url = self._build_url( + HOST, + UrlParts.PROJECTS.value, + self.encoded_project_path, + UrlParts.REPOSITORY.value, + f"{UrlParts.TREE.value}?recursive=true&pagination=keyset&per_page=100&order_by=path&sort=desc", + ) + res = requests.get( + url, + headers=self.auth_headers, + timeout=30, + ) + if res.status_code == 200: + paths = [elem.get("path") for elem in res.json()] + if res.links.get("next"): + paths.extend(self._get_tree(res.links["next"]["url"])) + return paths + + # If we don't get a 200, raise + res.raise_for_status() + + return None diff --git a/ingestion/tests/unit/readers/test_credentials.py b/ingestion/tests/unit/readers/test_credentials.py index 66381a5c73d..864dffda732 100644 --- a/ingestion/tests/unit/readers/test_credentials.py +++ b/ingestion/tests/unit/readers/test_credentials.py @@ -20,6 +20,9 @@ from metadata.generated.schema.security.credentials.bitbucketCredentials import from metadata.generated.schema.security.credentials.githubCredentials import ( GitHubCredentials, ) +from metadata.generated.schema.security.credentials.gitlabCredentials import ( + GitlabCredentials, +) from metadata.readers.file.credentials import ( get_credentials_from_url, update_repository_name, @@ -66,6 +69,21 @@ class TestCreds(TestCase): self.assertEqual(bb_updated.token.root, bb_original.token.root) self.assertEqual(bb_updated.branch, bb_original.branch) + gl_original = GitlabCredentials( + repositoryOwner="owner", + repositoryName="name", + token="token", + ) + + gl_updated = update_repository_name(original=gl_original, name="new_name") + + self.assertEqual(gl_original.repositoryName.root, "name") + self.assertEqual(gl_updated.repositoryName.root, "new_name") + self.assertEqual( + gl_updated.repositoryOwner.root, gl_original.repositoryOwner.root + ) + self.assertEqual(gl_updated.token.root, gl_original.token.root) + def test_get_credentials_from_url(self): """ With and without the right owner @@ -115,3 +133,25 @@ class TestCreds(TestCase): original=bb_original_not_owner, url=bb_url ) self.assertEqual(bb_updated_not_owner, bb_original_not_owner) + + gl_url = "git@gitlab.com:owner/repo.git" + + gl_original = GitlabCredentials( + repositoryOwner="owner", + repositoryName="name", + token="token", + ) + + gl_updated = get_credentials_from_url(original=gl_original, url=gl_url) + self.assertEqual(gl_updated.repositoryName.root, "repo") + + gl_original_not_owner = GitlabCredentials( + repositoryOwner="not_owner", + repositoryName="name", + token="token", + ) + + gl_updated_not_owner = get_credentials_from_url( + original=gl_original_not_owner, url=gl_url + ) + self.assertEqual(gl_updated_not_owner, gl_original_not_owner) diff --git a/ingestion/tests/unit/topology/dashboard/test_lookml_gitlab_reader.py b/ingestion/tests/unit/topology/dashboard/test_lookml_gitlab_reader.py new file mode 100644 index 00000000000..47eecdc3976 --- /dev/null +++ b/ingestion/tests/unit/topology/dashboard/test_lookml_gitlab_reader.py @@ -0,0 +1,59 @@ +# Copyright 2021 Collate +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Test GitHub Reader +""" +from unittest import TestCase + +from metadata.generated.schema.security.credentials.gitlabCredentials import ( + GitlabCredentials, +) +from metadata.ingestion.source.dashboard.looker.models import Includes, ViewName +from metadata.ingestion.source.dashboard.looker.parser import LkmlParser +from metadata.readers.file.gitlab import GitlabReader + + +class TestLookMLGitlabReader(TestCase): + """ + Validate the github reader against the OM repo + """ + + creds = GitlabCredentials( + # An accessible LookML Gitlab Repository is required to run this Test + repositoryName="test-group/test-repo", + repositoryOwner="test-owner", + token="token", + ) + + reader = GitlabReader(creds) + parser = LkmlParser(reader) + + def x_test_lookml_read_and_parse(self): + """ + To test the Gitlab reader, update these files with files in your test LookML repository and enable the test + """ + + explore_file = "test_explore.model.lkml" + self.parser.parse_file(Includes(explore_file)) + + contents = self.parser.parsed_files.get(Includes(explore_file)) + + # Check file contents + self.assertIn("explore: test-explore", contents) + + view = self.parser.find_view( + view_name=ViewName("test-view"), path=Includes(explore_file) + ) + + # We can get views that are resolved even if the include does not contain `.lkml` + self.assertIsNotNone(view) + self.assertEqual(view.name, "test-view") diff --git a/openmetadata-docs/content/v1.5.x-SNAPSHOT/menu.md b/openmetadata-docs/content/v1.5.x-SNAPSHOT/menu.md index 2b5a09c6d0d..33a9a5891cb 100644 --- a/openmetadata-docs/content/v1.5.x-SNAPSHOT/menu.md +++ b/openmetadata-docs/content/v1.5.x-SNAPSHOT/menu.md @@ -1762,6 +1762,8 @@ site_menu: url: /main-concepts/metadata-standard/schemas/security/credentials/gitcredentials - category: Main Concepts / Metadata Standard / Schemas / Security / Credentials / GithubCredentials url: /main-concepts/metadata-standard/schemas/security/credentials/githubcredentials + - category: Main Concepts / Metadata Standard / Schemas / Security / Credentials / GitlabCredentials + url: /main-concepts/metadata-standard/schemas/security/credentials/gitlabcredentials - category: Main Concepts / Metadata Standard / Schemas / Security / Credentials url: /main-concepts/metadata-standard/schemas/security/credentials - category: Main Concepts / Metadata Standard / Schemas / Security diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/secrets/converter/LookerConnectionClassConverter.java b/openmetadata-service/src/main/java/org/openmetadata/service/secrets/converter/LookerConnectionClassConverter.java index 08862fb6581..6abc1e1cdbc 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/secrets/converter/LookerConnectionClassConverter.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/secrets/converter/LookerConnectionClassConverter.java @@ -3,13 +3,14 @@ package org.openmetadata.service.secrets.converter; import java.util.List; import org.openmetadata.schema.security.credentials.BitBucketCredentials; import org.openmetadata.schema.security.credentials.GitHubCredentials; +import org.openmetadata.schema.security.credentials.GitlabCredentials; import org.openmetadata.schema.services.connections.dashboard.LookerConnection; import org.openmetadata.service.util.JsonUtils; public class LookerConnectionClassConverter extends ClassConverter { private static final List> CREDENTIALS_CLASSES = - List.of(GitHubCredentials.class, BitBucketCredentials.class); + List.of(GitHubCredentials.class, BitBucketCredentials.class, GitlabCredentials.class); public LookerConnectionClassConverter() { super(LookerConnection.class); diff --git a/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/dashboard/lookerConnection.json b/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/dashboard/lookerConnection.json index 09726306d35..7c0de8a3490 100644 --- a/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/dashboard/lookerConnection.json +++ b/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/dashboard/lookerConnection.json @@ -13,7 +13,7 @@ "default": "Looker" }, "noGitCredentials": { - "title": "No GitHub Credentials", + "title": "No Git Credentials", "description": "Do not set any credentials. Note that credentials are required to extract .lkml views and their lineage.", "type": "object", "additionalProperties": false @@ -56,6 +56,9 @@ }, { "$ref": "../../../../security/credentials/bitbucketCredentials.json" + }, + { + "$ref": "../../../../security/credentials/gitlabCredentials.json" } ] }, diff --git a/openmetadata-spec/src/main/resources/json/schema/security/credentials/gitlabCredentials.json b/openmetadata-spec/src/main/resources/json/schema/security/credentials/gitlabCredentials.json new file mode 100644 index 00000000000..86f132fa1ab --- /dev/null +++ b/openmetadata-spec/src/main/resources/json/schema/security/credentials/gitlabCredentials.json @@ -0,0 +1,35 @@ +{ + "$id": "https://open-metadata.org/schema/security/credentials/gitlabCredentials.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "GitlabCredentials", + "description": "Credentials for a Gitlab repository", + "type": "object", + "javaType": "org.openmetadata.schema.security.credentials.GitlabCredentials", + "definitions": { + "gitlabType": { + "description": "Gitlab Credentials type", + "type": "string", + "enum": ["Gitlab"], + "default": "Gitlab" + } + }, + "properties": { + "type": { + "title": "Credentials Type", + "description": "Credentials Type", + "$ref": "#/definitions/gitlabType", + "default": "Gitlab" + }, + "repositoryOwner": { + "$ref": "gitCredentials.json#/definitions/repositoryOwner" + }, + "repositoryName": { + "$ref": "gitCredentials.json#/definitions/repositoryName" + }, + "token": { + "$ref": "gitCredentials.json#/definitions/token" + } + }, + "additionalProperties": false, + "required": ["repositoryOwner", "repositoryName"] +}