2024-05-28 09:30:30 +02:00
|
|
|
import contextlib
|
|
|
|
import logging
|
2024-04-19 19:00:37 +02:00
|
|
|
import os
|
2024-05-14 11:11:16 +02:00
|
|
|
import tarfile
|
2024-04-19 19:00:37 +02:00
|
|
|
import zipfile
|
|
|
|
from subprocess import CalledProcessError
|
|
|
|
|
2024-05-28 09:30:30 +02:00
|
|
|
import docker
|
2024-04-19 19:00:37 +02:00
|
|
|
import pytest
|
|
|
|
from testcontainers.postgres import PostgresContainer
|
|
|
|
|
2024-05-28 09:30:30 +02:00
|
|
|
from metadata.generated.schema.api.services.createDatabaseService import (
|
|
|
|
CreateDatabaseServiceRequest,
|
|
|
|
)
|
|
|
|
from metadata.generated.schema.entity.services.connections.database.common.basicAuth import (
|
|
|
|
BasicAuth,
|
|
|
|
)
|
|
|
|
from metadata.generated.schema.entity.services.connections.database.postgresConnection import (
|
|
|
|
PostgresConnection,
|
|
|
|
)
|
|
|
|
from metadata.generated.schema.entity.services.databaseService import (
|
|
|
|
DatabaseConnection,
|
|
|
|
DatabaseService,
|
|
|
|
DatabaseServiceType,
|
|
|
|
)
|
|
|
|
from metadata.generated.schema.metadataIngestion.workflow import (
|
|
|
|
OpenMetadataWorkflowConfig,
|
|
|
|
Sink,
|
|
|
|
Source,
|
|
|
|
SourceConfig,
|
|
|
|
WorkflowConfig,
|
|
|
|
)
|
|
|
|
from metadata.ingestion.lineage.sql_lineage import search_cache
|
2024-06-05 21:18:37 +02:00
|
|
|
from metadata.ingestion.models.custom_pydantic import CustomSecretStr
|
2024-05-28 09:30:30 +02:00
|
|
|
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
|
|
|
from metadata.workflow.metadata import MetadataWorkflow
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture(autouse=True, scope="session")
|
|
|
|
def config_logging():
|
|
|
|
logging.getLogger("sqlfluff").setLevel(logging.CRITICAL)
|
|
|
|
|
|
|
|
|
|
|
|
@contextlib.contextmanager
|
|
|
|
def try_bind(container, container_port, host_port):
|
|
|
|
try:
|
|
|
|
with container.with_bind_ports(container_port, host_port) as container:
|
|
|
|
yield container
|
|
|
|
except docker.errors.APIError:
|
|
|
|
logging.warning("Port %s is already in use, trying another port", host_port)
|
|
|
|
with container.with_bind_ports(container_port, None) as container:
|
|
|
|
yield container
|
|
|
|
|
2024-04-19 19:00:37 +02:00
|
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
|
|
def postgres_container(tmp_path_factory):
|
|
|
|
data_dir = tmp_path_factory.mktemp("data")
|
|
|
|
dvd_rental_zip = os.path.join(os.path.dirname(__file__), "data", "dvdrental.zip")
|
|
|
|
zipfile.ZipFile(dvd_rental_zip, "r").extractall(str(data_dir))
|
2024-05-14 11:11:16 +02:00
|
|
|
with tarfile.open(data_dir / "dvdrental_data.tar", "w") as tar:
|
|
|
|
tar.add(data_dir / "dvdrental.tar", arcname="dvdrental.tar")
|
2024-04-19 19:00:37 +02:00
|
|
|
|
|
|
|
container = PostgresContainer("postgres:15", dbname="dvdrental")
|
|
|
|
container._command = [
|
|
|
|
"-c",
|
|
|
|
"shared_preload_libraries=pg_stat_statements",
|
|
|
|
"-c",
|
|
|
|
"pg_stat_statements.max=10000",
|
|
|
|
"-c",
|
|
|
|
"pg_stat_statements.track=all",
|
|
|
|
]
|
|
|
|
|
2024-05-28 09:30:30 +02:00
|
|
|
with try_bind(container, 5432, 5432) if not os.getenv(
|
|
|
|
"CI"
|
|
|
|
) else container as container:
|
2024-04-19 19:00:37 +02:00
|
|
|
docker_container = container.get_wrapped_container()
|
2024-05-14 11:11:16 +02:00
|
|
|
docker_container.exec_run(["mkdir", "/data"])
|
|
|
|
docker_container.put_archive(
|
|
|
|
"/data/", open(data_dir / "dvdrental_data.tar", "rb")
|
|
|
|
)
|
2024-04-19 19:00:37 +02:00
|
|
|
for query in (
|
|
|
|
"CREATE USER postgres SUPERUSER;",
|
|
|
|
"CREATE EXTENSION pg_stat_statements;",
|
|
|
|
):
|
|
|
|
res = docker_container.exec_run(
|
|
|
|
["psql", "-U", container.username, "-d", container.dbname, "-c", query]
|
|
|
|
)
|
|
|
|
if res[0] != 0:
|
|
|
|
raise CalledProcessError(
|
|
|
|
returncode=res[0], cmd=res, output=res[1].decode("utf-8")
|
|
|
|
)
|
|
|
|
res = docker_container.exec_run(
|
|
|
|
[
|
|
|
|
"pg_restore",
|
|
|
|
"-U",
|
|
|
|
container.username,
|
|
|
|
"-d",
|
|
|
|
container.dbname,
|
|
|
|
"/data/dvdrental.tar",
|
|
|
|
]
|
|
|
|
)
|
|
|
|
if res[0] != 0:
|
|
|
|
raise CalledProcessError(
|
|
|
|
returncode=res[0], cmd=res, output=res[1].decode("utf-8")
|
|
|
|
)
|
|
|
|
yield container
|
2024-05-28 09:30:30 +02:00
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
|
|
def db_service(metadata, postgres_container):
|
|
|
|
service = CreateDatabaseServiceRequest(
|
|
|
|
name="docker_test_db",
|
|
|
|
serviceType=DatabaseServiceType.Postgres,
|
|
|
|
connection=DatabaseConnection(
|
|
|
|
config=PostgresConnection(
|
|
|
|
username=postgres_container.username,
|
|
|
|
authType=BasicAuth(password=postgres_container.password),
|
|
|
|
hostPort="localhost:"
|
|
|
|
+ postgres_container.get_exposed_port(postgres_container.port),
|
|
|
|
database="dvdrental",
|
|
|
|
)
|
|
|
|
),
|
|
|
|
)
|
|
|
|
service_entity = metadata.create_or_update(data=service)
|
2024-06-05 21:18:37 +02:00
|
|
|
# Since we're using admin JWT (not ingestion-bot), the secret is not sent by the API
|
|
|
|
service_entity.connection.config.authType.password = CustomSecretStr(
|
|
|
|
postgres_container.password
|
|
|
|
)
|
2024-05-28 09:30:30 +02:00
|
|
|
yield service_entity
|
|
|
|
metadata.delete(
|
|
|
|
DatabaseService, service_entity.id, recursive=True, hard_delete=True
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
|
|
def ingest_metadata(db_service, metadata: OpenMetadata):
|
|
|
|
workflow_config = OpenMetadataWorkflowConfig(
|
|
|
|
source=Source(
|
|
|
|
type=db_service.connection.config.type.value.lower(),
|
2024-06-05 21:18:37 +02:00
|
|
|
serviceName=db_service.fullyQualifiedName.root,
|
2024-05-28 09:30:30 +02:00
|
|
|
serviceConnection=db_service.connection,
|
|
|
|
sourceConfig=SourceConfig(config={}),
|
|
|
|
),
|
|
|
|
sink=Sink(
|
|
|
|
type="metadata-rest",
|
|
|
|
config={},
|
|
|
|
),
|
|
|
|
workflowConfig=WorkflowConfig(openMetadataServerConfig=metadata.config),
|
|
|
|
)
|
|
|
|
metadata_ingestion = MetadataWorkflow.create(workflow_config)
|
|
|
|
search_cache.clear()
|
|
|
|
metadata_ingestion.execute()
|
|
|
|
metadata_ingestion.raise_from_status()
|