125 lines
3.8 KiB
Python

import os
import pytest
from pydantic import SecretStr
from datahub.configuration.common import ConfigurationWarning
from datahub.configuration.git import GitInfo, GitReference
from datahub.ingestion.source.git.git_import import GitClone
LOOKML_TEST_SSH_KEY = os.environ.get("DATAHUB_LOOKML_GIT_TEST_SSH_KEY")
def test_base_url_guessing():
# Basic GitHub repo.
config = GitInfo(repo="https://github.com/datahub-project/datahub", branch="master")
assert config.repo_ssh_locator == "git@github.com:datahub-project/datahub.git"
# Defaults to GitHub.
config = GitInfo(repo="datahub-project/datahub", branch="master")
assert (
config.get_url_for_file_path("docker/README.md")
== "https://github.com/datahub-project/datahub/blob/master/docker/README.md"
)
assert config.repo_ssh_locator == "git@github.com:datahub-project/datahub.git"
# GitLab repo (notice the trailing slash).
config_ref = GitReference(
repo="https://gitlab.com/gitlab-tests/sample-project/", branch="master"
)
assert (
config_ref.get_url_for_file_path("hello_world.md")
== "https://gitlab.com/gitlab-tests/sample-project/-/blob/master/hello_world.md"
)
# Three-tier GitLab repo.
config = GitInfo(
repo="https://gitlab.com/gitlab-com/gl-infra/reliability", branch="master"
)
assert (
config.get_url_for_file_path("onboarding/gitlab.nix")
== "https://gitlab.com/gitlab-com/gl-infra/reliability/-/blob/master/onboarding/gitlab.nix"
)
assert (
config.repo_ssh_locator == "git@gitlab.com:gitlab-com/gl-infra/reliability.git"
)
# Overrides.
config = GitInfo(
repo="https://gitea.com/gitea/tea",
branch="main",
url_template="https://gitea.com/gitea/tea/src/branch/{branch}/{file_path}",
repo_ssh_locator="https://gitea.com/gitea/tea.git",
)
config.get_url_for_file_path(
"cmd/admin.go"
) == "https://gitea.com/gitea/tea/src/branch/main/cmd/admin.go"
config.repo_ssh_locator == "https://gitea.com/gitea/tea.git"
# Deprecated: base_url.
with pytest.warns(ConfigurationWarning, match="base_url is deprecated"):
config = GitInfo.parse_obj(
dict(
repo="https://github.com/datahub-project/datahub",
branch="master",
base_url="http://mygithubmirror.local",
)
)
def test_github_branch():
config = GitInfo(
repo="owner/repo",
)
assert config.branch_for_clone is None
config = GitInfo(
repo="owner/repo",
branch="main",
)
assert config.branch_for_clone == "main"
def test_git_clone_public(tmp_path):
git_clone = GitClone(str(tmp_path))
checkout_dir = git_clone.clone(
ssh_key=None,
repo_url="https://gitlab.com/gitlab-tests/sample-project",
branch="90c439634077a85bcf42d38c2c79cd94664a94ad",
)
assert checkout_dir.exists()
assert set(os.listdir(checkout_dir)) == {
".git",
"README.md",
"hello_world.md",
"fork-sample-project.png",
}
@pytest.mark.skipif(
LOOKML_TEST_SSH_KEY is None,
reason="DATAHUB_LOOKML_GIT_TEST_SSH_KEY env variable is not configured",
)
def test_git_clone_private(tmp_path):
git_clone = GitClone(str(tmp_path))
secret_key = SecretStr(LOOKML_TEST_SSH_KEY) if LOOKML_TEST_SSH_KEY else None
checkout_dir = git_clone.clone(
ssh_key=secret_key,
repo_url="git@github.com:acryldata/long-tail-companions-looker",
branch="d380a2b777ec6f4653626f39c68dba85893faa74",
)
assert checkout_dir.exists()
assert set(os.listdir(checkout_dir)) == set(
[
".datahub",
"models",
"README.md",
".github",
".git",
"views",
"manifest_lock.lkml",
"manifest.lkml",
]
)