mirror of
https://github.com/datahub-project/datahub.git
synced 2025-12-29 02:48:24 +00:00
test(ingest/minio): Configure delta lake minio tests for arm64 (#8364)
Co-authored-by: Tamas Nemeth <treff7es@gmail.com>
This commit is contained in:
parent
64bcc132a3
commit
851c5bab9e
1
metadata-ingestion/.gitignore
vendored
1
metadata-ingestion/.gitignore
vendored
@ -6,7 +6,6 @@ pvenv36/
|
||||
/venv*/
|
||||
bq_credentials.json
|
||||
junit.*.xml
|
||||
tests/integrations/delta_lake/minio/minio
|
||||
/tmp
|
||||
*.bak
|
||||
|
||||
|
||||
@ -0,0 +1,12 @@
|
||||
version: "3"
|
||||
services:
|
||||
minio:
|
||||
image: minio/minio:RELEASE.2023-07-07T07-13-57Z
|
||||
container_name: "minio_test"
|
||||
environment:
|
||||
MINIO_ROOT_USER: "miniouser"
|
||||
MINIO_ROOT_PASSWORD: "miniopassword"
|
||||
ports:
|
||||
- 9000:9000 # S3 API
|
||||
- 9001:9001 # Web UI
|
||||
command: server /data --console-address ":9001"
|
||||
@ -1,3 +0,0 @@
|
||||
kill -9 `cat minio_pid.txt`
|
||||
rm ./tests/integrations/delta_lake/minio/minio_pid.txt
|
||||
rm -rf ./tests/integrations/delta_lake/minio/data
|
||||
@ -1,6 +0,0 @@
|
||||
#ref https://hub.docker.com/r/minio/minio/
|
||||
|
||||
wget https://dl.min.io/server/minio/release/linux-amd64/minio -P ./tests/integrations/delta_lake/minio/
|
||||
chmod +x ./tests/integrations/delta_lake/minio/minio
|
||||
nohup ./tests/integrations/delta_lake/minio/minio server ./tests/integrations/delta_lake/minio/data > temp.log 2>&1 &
|
||||
echo $! > ./tests/integrations/delta_lake/minio/minio_pid.txt
|
||||
@ -1,4 +0,0 @@
|
||||
{"commitInfo":{"timestamp":1655724801453,"operation":"WRITE","operationParameters":{},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"local Delta-Standalone/0.4.0"}}
|
||||
{"protocol":{"minReaderVersion":1,"minWriterVersion":2}}
|
||||
{"metaData":{"id":"468553a9-3b52-4e50-b855-e64d2d070cb8","name":"test-table","description":"test delta table","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"foo\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"bar\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"zip\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["foo","bar"],"configuration":{},"createdTime":1655724801057}}
|
||||
{"add":{"path":"0","partitionValues":{"bar":"0","foo":"0"},"size":100,"modificationTime":1655724801375,"dataChange":true,"tags":{"someTagKey":"someTagVal"}}}
|
||||
@ -1,2 +0,0 @@
|
||||
{"commitInfo":{"timestamp":1655724802209,"operation":"WRITE","operationParameters":{},"readVersion":0,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"local Delta-Standalone/0.4.0"}}
|
||||
{"add":{"path":"1","partitionValues":{"bar":"1","foo":"1"},"size":100,"modificationTime":1655724802208,"dataChange":true,"tags":{"someTagKey":"someTagVal"}}}
|
||||
@ -1,2 +0,0 @@
|
||||
{"commitInfo":{"timestamp":1655724802227,"operation":"WRITE","operationParameters":{},"readVersion":1,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"local Delta-Standalone/0.4.0"}}
|
||||
{"add":{"path":"2","partitionValues":{"bar":"0","foo":"2"},"size":100,"modificationTime":1655724802226,"dataChange":true,"tags":{"someTagKey":"someTagVal"}}}
|
||||
@ -1,2 +0,0 @@
|
||||
{"commitInfo":{"timestamp":1655724802244,"operation":"WRITE","operationParameters":{},"readVersion":2,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"local Delta-Standalone/0.4.0"}}
|
||||
{"add":{"path":"3","partitionValues":{"bar":"1","foo":"0"},"size":100,"modificationTime":1655724802243,"dataChange":true,"tags":{"someTagKey":"someTagVal"}}}
|
||||
@ -1,2 +0,0 @@
|
||||
{"commitInfo":{"timestamp":1655724802264,"operation":"WRITE","operationParameters":{},"readVersion":3,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"local Delta-Standalone/0.4.0"}}
|
||||
{"add":{"path":"4","partitionValues":{"bar":"0","foo":"1"},"size":100,"modificationTime":1655724802263,"dataChange":true,"tags":{"someTagKey":"someTagVal"}}}
|
||||
@ -2,47 +2,58 @@ import os
|
||||
import subprocess
|
||||
|
||||
import boto3
|
||||
import freezegun
|
||||
import pytest
|
||||
|
||||
from datahub.ingestion.run.pipeline import Pipeline
|
||||
from tests.test_helpers import mce_helpers
|
||||
from tests.test_helpers.docker_helpers import wait_for_port
|
||||
|
||||
FROZEN_TIME = "2020-04-14 07:00:00"
|
||||
MINIO_PORT = 9000
|
||||
|
||||
|
||||
@pytest.fixture(scope="module", autouse=True)
|
||||
def minio_startup():
|
||||
cmd = "./tests/integration/delta_lake/minio/setup_minio.sh"
|
||||
def is_minio_up(container_name: str) -> bool:
|
||||
"""A cheap way to figure out if postgres is responsive on a container"""
|
||||
|
||||
cmd = f"docker logs {container_name} 2>&1 | grep '1 Online'"
|
||||
ret = subprocess.run(
|
||||
cmd,
|
||||
shell=True,
|
||||
)
|
||||
assert ret.returncode == 0
|
||||
yield
|
||||
return ret.returncode == 0
|
||||
|
||||
# Shutdown minio server
|
||||
cmd = "./tests/integration/delta_lake/minio/kill_minio.sh"
|
||||
ret = subprocess.run(
|
||||
cmd,
|
||||
shell=True,
|
||||
)
|
||||
assert ret.returncode == 0
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def test_resources_dir(pytestconfig):
|
||||
return pytestconfig.rootpath / "tests/integration/delta_lake"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def minio_runner(docker_compose_runner, pytestconfig, test_resources_dir):
|
||||
container_name = "minio_test"
|
||||
with docker_compose_runner(
|
||||
test_resources_dir / "docker-compose.yml", container_name
|
||||
) as docker_services:
|
||||
wait_for_port(
|
||||
docker_services,
|
||||
container_name,
|
||||
MINIO_PORT,
|
||||
timeout=120,
|
||||
checker=lambda: is_minio_up(container_name),
|
||||
)
|
||||
yield docker_services
|
||||
|
||||
|
||||
@pytest.fixture(scope="module", autouse=True)
|
||||
def bucket_name():
|
||||
return "my-test-bucket"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module", autouse=True)
|
||||
def s3_bkt(bucket_name, minio_startup):
|
||||
def s3_bkt(minio_runner):
|
||||
s3 = boto3.resource(
|
||||
"s3",
|
||||
endpoint_url="http://localhost:9000",
|
||||
aws_access_key_id="minioadmin",
|
||||
aws_secret_access_key="minioadmin",
|
||||
endpoint_url=f"http://localhost:{MINIO_PORT}",
|
||||
aws_access_key_id="miniouser",
|
||||
aws_secret_access_key="miniopassword",
|
||||
)
|
||||
bkt = s3.Bucket(bucket_name)
|
||||
bkt = s3.Bucket("my-test-bucket")
|
||||
bkt.create()
|
||||
return bkt
|
||||
|
||||
@ -62,9 +73,8 @@ def populate_minio(pytestconfig, s3_bkt):
|
||||
|
||||
|
||||
@pytest.mark.slow_integration
|
||||
def test_delta_lake_ingest(pytestconfig, tmp_path, mock_time):
|
||||
test_resources_dir = pytestconfig.rootpath / "tests/integration/delta_lake/"
|
||||
|
||||
@freezegun.freeze_time("2023-01-01 00:00:00+00:00")
|
||||
def test_delta_lake_ingest(pytestconfig, tmp_path, test_resources_dir):
|
||||
# Run the metadata ingestion pipeline.
|
||||
pipeline = Pipeline.create(
|
||||
{
|
||||
@ -76,9 +86,9 @@ def test_delta_lake_ingest(pytestconfig, tmp_path, mock_time):
|
||||
"base_path": "s3://my-test-bucket/delta_tables/sales",
|
||||
"s3": {
|
||||
"aws_config": {
|
||||
"aws_access_key_id": "minioadmin",
|
||||
"aws_secret_access_key": "minioadmin",
|
||||
"aws_endpoint_url": "http://localhost:9000",
|
||||
"aws_access_key_id": "miniouser",
|
||||
"aws_secret_access_key": "miniopassword",
|
||||
"aws_endpoint_url": f"http://localhost:{MINIO_PORT}",
|
||||
"aws_region": "us-east-1",
|
||||
},
|
||||
},
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user