test(ingest/minio): Configure delta lake minio tests for arm64 (#8364)

Co-authored-by: Tamas Nemeth <treff7es@gmail.com>
This commit is contained in:
Andrew Sikowitz 2023-07-13 02:41:07 -04:00 committed by GitHub
parent 64bcc132a3
commit 851c5bab9e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 50 additions and 50 deletions

View File

@ -6,7 +6,6 @@ pvenv36/
/venv*/
bq_credentials.json
junit.*.xml
tests/integrations/delta_lake/minio/minio
/tmp
*.bak

View File

@ -0,0 +1,12 @@
version: "3"
services:
minio:
image: minio/minio:RELEASE.2023-07-07T07-13-57Z
container_name: "minio_test"
environment:
MINIO_ROOT_USER: "miniouser"
MINIO_ROOT_PASSWORD: "miniopassword"
ports:
- 9000:9000 # S3 API
- 9001:9001 # Web UI
command: server /data --console-address ":9001"

View File

@ -1,3 +0,0 @@
kill -9 `cat minio_pid.txt`
rm ./tests/integrations/delta_lake/minio/minio_pid.txt
rm -rf ./tests/integrations/delta_lake/minio/data

View File

@ -1,6 +0,0 @@
#ref https://hub.docker.com/r/minio/minio/
wget https://dl.min.io/server/minio/release/linux-amd64/minio -P ./tests/integrations/delta_lake/minio/
chmod +x ./tests/integrations/delta_lake/minio/minio
nohup ./tests/integrations/delta_lake/minio/minio server ./tests/integrations/delta_lake/minio/data > temp.log 2>&1 &
echo $! > ./tests/integrations/delta_lake/minio/minio_pid.txt

View File

@ -1,4 +0,0 @@
{"commitInfo":{"timestamp":1655724801453,"operation":"WRITE","operationParameters":{},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"local Delta-Standalone/0.4.0"}}
{"protocol":{"minReaderVersion":1,"minWriterVersion":2}}
{"metaData":{"id":"468553a9-3b52-4e50-b855-e64d2d070cb8","name":"test-table","description":"test delta table","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"foo\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"bar\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"zip\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["foo","bar"],"configuration":{},"createdTime":1655724801057}}
{"add":{"path":"0","partitionValues":{"bar":"0","foo":"0"},"size":100,"modificationTime":1655724801375,"dataChange":true,"tags":{"someTagKey":"someTagVal"}}}

View File

@ -1,2 +0,0 @@
{"commitInfo":{"timestamp":1655724802209,"operation":"WRITE","operationParameters":{},"readVersion":0,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"local Delta-Standalone/0.4.0"}}
{"add":{"path":"1","partitionValues":{"bar":"1","foo":"1"},"size":100,"modificationTime":1655724802208,"dataChange":true,"tags":{"someTagKey":"someTagVal"}}}

View File

@ -1,2 +0,0 @@
{"commitInfo":{"timestamp":1655724802227,"operation":"WRITE","operationParameters":{},"readVersion":1,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"local Delta-Standalone/0.4.0"}}
{"add":{"path":"2","partitionValues":{"bar":"0","foo":"2"},"size":100,"modificationTime":1655724802226,"dataChange":true,"tags":{"someTagKey":"someTagVal"}}}

View File

@ -1,2 +0,0 @@
{"commitInfo":{"timestamp":1655724802244,"operation":"WRITE","operationParameters":{},"readVersion":2,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"local Delta-Standalone/0.4.0"}}
{"add":{"path":"3","partitionValues":{"bar":"1","foo":"0"},"size":100,"modificationTime":1655724802243,"dataChange":true,"tags":{"someTagKey":"someTagVal"}}}

View File

@ -1,2 +0,0 @@
{"commitInfo":{"timestamp":1655724802264,"operation":"WRITE","operationParameters":{},"readVersion":3,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"local Delta-Standalone/0.4.0"}}
{"add":{"path":"4","partitionValues":{"bar":"0","foo":"1"},"size":100,"modificationTime":1655724802263,"dataChange":true,"tags":{"someTagKey":"someTagVal"}}}

View File

@ -2,47 +2,58 @@ import os
import subprocess
import boto3
import freezegun
import pytest
from datahub.ingestion.run.pipeline import Pipeline
from tests.test_helpers import mce_helpers
from tests.test_helpers.docker_helpers import wait_for_port
FROZEN_TIME = "2020-04-14 07:00:00"
MINIO_PORT = 9000
@pytest.fixture(scope="module", autouse=True)
def minio_startup():
cmd = "./tests/integration/delta_lake/minio/setup_minio.sh"
def is_minio_up(container_name: str) -> bool:
"""A cheap way to figure out if postgres is responsive on a container"""
cmd = f"docker logs {container_name} 2>&1 | grep '1 Online'"
ret = subprocess.run(
cmd,
shell=True,
)
assert ret.returncode == 0
yield
return ret.returncode == 0
# Shutdown minio server
cmd = "./tests/integration/delta_lake/minio/kill_minio.sh"
ret = subprocess.run(
cmd,
shell=True,
)
assert ret.returncode == 0
@pytest.fixture(scope="module")
def test_resources_dir(pytestconfig):
return pytestconfig.rootpath / "tests/integration/delta_lake"
@pytest.fixture(scope="module")
def minio_runner(docker_compose_runner, pytestconfig, test_resources_dir):
container_name = "minio_test"
with docker_compose_runner(
test_resources_dir / "docker-compose.yml", container_name
) as docker_services:
wait_for_port(
docker_services,
container_name,
MINIO_PORT,
timeout=120,
checker=lambda: is_minio_up(container_name),
)
yield docker_services
@pytest.fixture(scope="module", autouse=True)
def bucket_name():
return "my-test-bucket"
@pytest.fixture(scope="module", autouse=True)
def s3_bkt(bucket_name, minio_startup):
def s3_bkt(minio_runner):
s3 = boto3.resource(
"s3",
endpoint_url="http://localhost:9000",
aws_access_key_id="minioadmin",
aws_secret_access_key="minioadmin",
endpoint_url=f"http://localhost:{MINIO_PORT}",
aws_access_key_id="miniouser",
aws_secret_access_key="miniopassword",
)
bkt = s3.Bucket(bucket_name)
bkt = s3.Bucket("my-test-bucket")
bkt.create()
return bkt
@ -62,9 +73,8 @@ def populate_minio(pytestconfig, s3_bkt):
@pytest.mark.slow_integration
def test_delta_lake_ingest(pytestconfig, tmp_path, mock_time):
test_resources_dir = pytestconfig.rootpath / "tests/integration/delta_lake/"
@freezegun.freeze_time("2023-01-01 00:00:00+00:00")
def test_delta_lake_ingest(pytestconfig, tmp_path, test_resources_dir):
# Run the metadata ingestion pipeline.
pipeline = Pipeline.create(
{
@ -76,9 +86,9 @@ def test_delta_lake_ingest(pytestconfig, tmp_path, mock_time):
"base_path": "s3://my-test-bucket/delta_tables/sales",
"s3": {
"aws_config": {
"aws_access_key_id": "minioadmin",
"aws_secret_access_key": "minioadmin",
"aws_endpoint_url": "http://localhost:9000",
"aws_access_key_id": "miniouser",
"aws_secret_access_key": "miniopassword",
"aws_endpoint_url": f"http://localhost:{MINIO_PORT}",
"aws_region": "us-east-1",
},
},