fix(ingest/ci): fix integration test failures (#15284)

This commit is contained in:
kyungsoo-datahub 2025-11-12 21:56:17 -08:00 committed by GitHub
parent ac20d06865
commit ae1406a3a7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 100 additions and 17 deletions

View File

@ -1,4 +1,3 @@
version: '3'
services:
hex-mock-api:
image: python:3.9-alpine
@ -8,12 +7,28 @@ services:
volumes:
- ./hex_projects_response.json:/app/hex_projects_response.json
- ./mock_hex_server.py:/app/mock_hex_server.py
command: ["python", "/app/mock_hex_server.py"]
command:
- sh
- -c
- |
apk add --no-cache wget
python /app/mock_hex_server.py &
SERVER_PID=$$!
for i in $$(seq 1 30); do
if wget --no-verbose --tries=1 --spider http://localhost:8000/health 2>/dev/null; then
wait $$SERVER_PID
exit 0
fi
sleep 1
done
kill $$SERVER_PID 2>/dev/null || true
exit 1
healthcheck:
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:8000/health"]
test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:8000/health || exit 1"]
interval: 5s
timeout: 5s
retries: 3
timeout: 10s
retries: 60
start_period: 30s
datahub-mock-api:
image: python:3.9-alpine
container_name: datahub-mock-api
@ -25,9 +40,25 @@ services:
- ./datahub_get_urns_by_filter_page1.json:/app/datahub_get_urns_by_filter_page1.json
- ./datahub_get_urns_by_filter_page2.json:/app/datahub_get_urns_by_filter_page2.json
- ./mock_datahub_server.py:/app/mock_datahub_server.py
command: ["python", "/app/mock_datahub_server.py"]
command:
- sh
- -c
- |
apk add --no-cache wget
python /app/mock_datahub_server.py &
SERVER_PID=$$!
for i in $$(seq 1 30); do
if wget --no-verbose --tries=1 --spider http://localhost:8010/health 2>/dev/null; then
wait $$SERVER_PID
exit 0
fi
sleep 1
done
kill $$SERVER_PID 2>/dev/null || true
exit 1
healthcheck:
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:8010/health"]
test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:8010/health || exit 1"]
interval: 5s
timeout: 5s
retries: 3
timeout: 10s
retries: 60
start_period: 30s

View File

@ -59,6 +59,31 @@ class MockDataHubAPIHandler(http.server.SimpleHTTPRequestHandler):
self.end_headers()
self.wfile.write(json.dumps({"error": "Not found", "path": self.path}).encode())
def do_HEAD(self):
"""Handle HEAD requests (used by wget --spider for health checks)"""
parsed_url = urlparse(self.path)
path = parsed_url.path
# Health check endpoint
if path == "/health":
self.send_response(HTTPStatus.OK)
self.send_header("Content-type", "text/plain")
self.end_headers()
return
# Mock DataHub API endpoints
if path.startswith("/config"):
self.send_response(HTTPStatus.OK)
self.send_header("Content-type", "application/json")
self.send_header("Access-Control-Allow-Origin", "*")
self.end_headers()
return
# Default 404 response
self.send_response(HTTPStatus.NOT_FOUND)
self.send_header("Content-type", "application/json")
self.end_headers()
def do_POST(self):
parsed_url = urlparse(self.path)
path = parsed_url.path

View File

@ -45,6 +45,31 @@ class MockHexAPIHandler(http.server.SimpleHTTPRequestHandler):
self.end_headers()
self.wfile.write(json.dumps({"error": "Not found", "path": self.path}).encode())
def do_HEAD(self):
"""Handle HEAD requests (used by wget --spider for health checks)"""
parsed_url = urlparse(self.path)
path = parsed_url.path
# Health check endpoint
if path == "/health":
self.send_response(HTTPStatus.OK)
self.send_header("Content-type", "text/plain")
self.end_headers()
return
# Mock Hex API endpoints
if path.startswith("/api/v1/projects"):
self.send_response(HTTPStatus.OK)
self.send_header("Content-type", "application/json")
self.send_header("Access-Control-Allow-Origin", "*")
self.end_headers()
return
# Default 404 response
self.send_response(HTTPStatus.NOT_FOUND)
self.send_header("Content-type", "application/json")
self.end_headers()
# Set up the server
handler = MockHexAPIHandler

View File

@ -8,6 +8,7 @@ from tests.test_helpers.docker_helpers import wait_for_port
FROZEN_TIME = "2025-03-25 12:00:00"
pytestmark = pytest.mark.integration_batch_2

View File

@ -66,7 +66,14 @@ services:
/usr/bin/mc rm -r --force minio/warehouse;
/usr/bin/mc mb minio/warehouse;
/usr/bin/mc anonymous set public minio/warehouse;
exit 0;
touch /tmp/mc_done;
tail -f /dev/null;
"
healthcheck:
test: ["CMD", "test", "-f", "/tmp/mc_done"]
interval: 5s
timeout: 5s
retries: 5
start_period: 10s
networks:
iceberg_net:

View File

@ -204,13 +204,7 @@ services:
environment:
- initialBuckets=test-bucket
ports:
- "9090:9090"
healthcheck:
test: ["CMD-SHELL", "curl -s -f http://localhost:9090/ || exit 1"]
interval: 5s
timeout: 3s
retries: 3
start_period: 10s
- "9090:9191"
restart: on-failure:3
deploy:
resources: