mirror of
https://github.com/datahub-project/datahub.git
synced 2025-07-03 15:17:58 +00:00

Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Co-authored-by: harshal.sheth@acryl.io <harshal.sheth@acryl.io>
157 lines
5.0 KiB
Python
157 lines
5.0 KiB
Python
import os
|
|
import pathlib
|
|
|
|
import pytest
|
|
from pydantic import SecretStr
|
|
|
|
import datahub.ingestion.source.git.git_import
|
|
from datahub.configuration.common import ConfigurationWarning
|
|
from datahub.configuration.git import GitInfo, GitReference
|
|
from datahub.ingestion.source.git.git_import import GitClone
|
|
from datahub.testing.doctest import assert_doctest
|
|
|
|
LOOKML_TEST_SSH_KEY = os.environ.get("DATAHUB_LOOKML_GIT_TEST_SSH_KEY")
|
|
|
|
|
|
def test_base_url_guessing() -> None:
|
|
# Basic GitHub repo.
|
|
config = GitInfo(repo="https://github.com/datahub-project/datahub", branch="master")
|
|
assert config.repo_ssh_locator == "git@github.com:datahub-project/datahub.git"
|
|
|
|
# Defaults to GitHub.
|
|
config = GitInfo(repo="datahub-project/datahub", branch="master")
|
|
assert (
|
|
config.get_url_for_file_path("docker/README.md")
|
|
== "https://github.com/datahub-project/datahub/blob/master/docker/README.md"
|
|
)
|
|
assert config.repo_ssh_locator == "git@github.com:datahub-project/datahub.git"
|
|
|
|
# GitLab repo (notice the trailing slash).
|
|
config_ref = GitReference(
|
|
repo="https://gitlab.com/gitlab-tests/sample-project/", branch="master"
|
|
)
|
|
assert (
|
|
config_ref.get_url_for_file_path("hello_world.md")
|
|
== "https://gitlab.com/gitlab-tests/sample-project/-/blob/master/hello_world.md"
|
|
)
|
|
|
|
# Three-tier GitLab repo.
|
|
config = GitInfo(
|
|
repo="https://gitlab.com/gitlab-com/gl-infra/reliability", branch="master"
|
|
)
|
|
assert (
|
|
config.get_url_for_file_path("onboarding/gitlab.nix")
|
|
== "https://gitlab.com/gitlab-com/gl-infra/reliability/-/blob/master/onboarding/gitlab.nix"
|
|
)
|
|
assert (
|
|
config.repo_ssh_locator == "git@gitlab.com:gitlab-com/gl-infra/reliability.git"
|
|
)
|
|
|
|
# Overrides.
|
|
config = GitInfo(
|
|
repo="https://gitea.com/gitea/tea",
|
|
branch="main",
|
|
url_template="https://gitea.com/gitea/tea/src/branch/{branch}/{file_path}",
|
|
repo_ssh_locator="https://gitea.com/gitea/tea.git",
|
|
)
|
|
assert (
|
|
config.get_url_for_file_path("cmd/admin.go")
|
|
== "https://gitea.com/gitea/tea/src/branch/main/cmd/admin.go"
|
|
)
|
|
assert config.repo_ssh_locator == "https://gitea.com/gitea/tea.git"
|
|
|
|
# Deprecated: base_url.
|
|
with pytest.warns(ConfigurationWarning, match="base_url is deprecated"):
|
|
config = GitInfo.parse_obj(
|
|
dict(
|
|
repo="https://github.com/datahub-project/datahub",
|
|
branch="master",
|
|
base_url="http://mygithubmirror.local",
|
|
)
|
|
)
|
|
|
|
|
|
def test_github_branch() -> None:
|
|
config = GitInfo(
|
|
repo="owner/repo",
|
|
)
|
|
assert config.branch_for_clone is None
|
|
|
|
config = GitInfo(
|
|
repo="owner/repo",
|
|
branch="main",
|
|
)
|
|
assert config.branch_for_clone == "main"
|
|
|
|
|
|
def test_url_subdir() -> None:
|
|
git_ref = GitReference(repo="https://github.com/org/repo", url_subdir="dbt")
|
|
assert (
|
|
git_ref.get_url_for_file_path("model.sql")
|
|
== "https://github.com/org/repo/blob/main/dbt/model.sql"
|
|
)
|
|
|
|
git_ref = GitReference(repo="https://gitlab.com/org/repo", url_subdir="dbt")
|
|
assert (
|
|
git_ref.get_url_for_file_path("model.sql")
|
|
== "https://gitlab.com/org/repo/-/blob/main/dbt/model.sql"
|
|
)
|
|
|
|
git_ref = GitReference(repo="https://github.com/org/repo", url_subdir="")
|
|
assert (
|
|
git_ref.get_url_for_file_path("model.sql")
|
|
== "https://github.com/org/repo/blob/main/model.sql"
|
|
)
|
|
|
|
git_ref = GitReference(repo="https://github.com/org/repo", url_subdir="dbt/models")
|
|
assert (
|
|
git_ref.get_url_for_file_path("model.sql")
|
|
== "https://github.com/org/repo/blob/main/dbt/models/model.sql"
|
|
)
|
|
|
|
|
|
def test_sanitize_repo_url() -> None:
|
|
assert_doctest(datahub.ingestion.source.git.git_import)
|
|
|
|
|
|
def test_git_clone_public(tmp_path: pathlib.Path) -> None:
|
|
git_clone = GitClone(str(tmp_path))
|
|
checkout_dir = git_clone.clone(
|
|
ssh_key=None,
|
|
repo_url="https://gitlab.com/gitlab-tests/sample-project",
|
|
branch="90c439634077a85bcf42d38c2c79cd94664a94ad",
|
|
)
|
|
assert checkout_dir.exists()
|
|
assert set(os.listdir(checkout_dir)) == {
|
|
".git",
|
|
"README.md",
|
|
"hello_world.md",
|
|
"fork-sample-project.png",
|
|
}
|
|
|
|
|
|
@pytest.mark.skipif(
|
|
LOOKML_TEST_SSH_KEY is None,
|
|
reason="DATAHUB_LOOKML_GIT_TEST_SSH_KEY env variable is not configured",
|
|
)
|
|
def test_git_clone_private(tmp_path: pathlib.Path) -> None:
|
|
git_clone = GitClone(str(tmp_path))
|
|
secret_key = SecretStr(LOOKML_TEST_SSH_KEY) if LOOKML_TEST_SSH_KEY else None
|
|
|
|
checkout_dir = git_clone.clone(
|
|
ssh_key=secret_key,
|
|
repo_url="git@github.com:acryldata/long-tail-companions-looker",
|
|
branch="d380a2b777ec6f4653626f39c68dba85893faa74",
|
|
)
|
|
assert checkout_dir.exists()
|
|
assert set(os.listdir(checkout_dir)) == {
|
|
".datahub",
|
|
"models",
|
|
"README.md",
|
|
".github",
|
|
".git",
|
|
"views",
|
|
"manifest_lock.lkml",
|
|
"manifest.lkml",
|
|
}
|