mirror of
https://github.com/datahub-project/datahub.git
synced 2025-08-31 04:25:29 +00:00
tests(kafka): fixing flaky tests (#13171)
This commit is contained in:
parent
a02ca68386
commit
a8637abfe2
@ -2,61 +2,83 @@
|
||||
services:
|
||||
connect:
|
||||
image: confluentinc/cp-kafka-connect:7.4.0
|
||||
platform: linux/amd64
|
||||
env_file: ./../kafka-connect/setup/connect.env
|
||||
container_name: test_connect
|
||||
hostname: test_connect
|
||||
depends_on:
|
||||
- zookeeper
|
||||
- broker
|
||||
- mysqldb
|
||||
- mongo
|
||||
zookeeper:
|
||||
condition: service_healthy
|
||||
broker:
|
||||
condition: service_healthy
|
||||
mysqldb:
|
||||
condition: service_healthy
|
||||
mongo:
|
||||
condition: service_healthy
|
||||
ports:
|
||||
- "28083:28083"
|
||||
# volumes:
|
||||
# - ./../kafka-connect/setup/confluentinc-kafka-connect-jdbc-10.2.5:/usr/local/share/kafka/plugins/confluentinc-kafka-connect-jdbc-10.2.5
|
||||
# - ./../kafka-connect/setup/confluentinc-connect-transforms-1.4.1:/usr/local/share/kafka/plugins/confluentinc-connect-transforms-1.4.1
|
||||
# - ./../kafka-connect/setup/debezium-debezium-connector-mysql-1.7.0:/usr/local/share/kafka/plugins/debezium-debezium-connector-mysql-1.7.0
|
||||
# - ./../kafka-connect/setup/gcp-bigquery-project-keyfile.json:/usr/local/share/gcp-bigquery-project-keyfile.json
|
||||
restart: on-failure:3
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 768M
|
||||
reservations:
|
||||
memory: 384M
|
||||
command:
|
||||
- bash
|
||||
- -c
|
||||
- |
|
||||
echo "Install confluent CLI"
|
||||
# https://docs.confluent.io/confluent-cli/current/install.html#tarball-or-zip-installation
|
||||
wget https://packages.confluent.io/confluent-cli/archives/4.16.0/confluent_linux_amd64.tar.gz
|
||||
mkdir -p /tmp/confluent-cli
|
||||
tar -xvf confluent_linux_amd64.tar.gz -C /tmp/confluent-cli
|
||||
export PATH=/tmp/confluent-cli/confluent:$PATH
|
||||
echo "Done!"
|
||||
echo "Installing Connectors"
|
||||
#
|
||||
confluent connect plugin install --force confluentinc/kafka-connect-jdbc:10.2.5
|
||||
#
|
||||
confluent connect plugin install --force confluentinc/connect-transforms:1.4.1
|
||||
#
|
||||
confluent connect plugin install --force confluentinc/kafka-connect-datagen:0.6.0
|
||||
#
|
||||
confluent connect plugin install --force debezium/debezium-connector-mysql:1.7.0
|
||||
#
|
||||
confluent connect plugin install --force wepay/kafka-connect-bigquery:1.6.8
|
||||
#
|
||||
confluent connect plugin install --force mongodb/kafka-connect-mongodb:1.10.1
|
||||
#
|
||||
confluent connect plugin install --force confluentinc/kafka-connect-s3:10.5.1
|
||||
#
|
||||
echo "Installing connectors using confluent-hub"
|
||||
|
||||
# First verify that confluent-hub exists
|
||||
if ! [ -x "$(command -v confluent-hub)" ]; then
|
||||
echo "Error: confluent-hub is not installed." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "confluent-hub installation verified"
|
||||
echo "Installing connectors..."
|
||||
|
||||
# Install with confluent-hub - ensure all connectors install correctly
|
||||
confluent-hub install --no-prompt confluentinc/kafka-connect-jdbc:10.2.5 || { echo "Failed to install JDBC connector"; exit 1; }
|
||||
confluent-hub install --no-prompt confluentinc/connect-transforms:1.4.1 || { echo "Failed to install transforms"; exit 1; }
|
||||
confluent-hub install --no-prompt confluentinc/kafka-connect-datagen:0.6.0 || { echo "Failed to install datagen"; exit 1; }
|
||||
confluent-hub install --no-prompt debezium/debezium-connector-mysql:1.7.0 || { echo "Failed to install Debezium"; exit 1; }
|
||||
confluent-hub install --no-prompt wepay/kafka-connect-bigquery:1.6.8 || { echo "Failed to install BigQuery"; exit 1; }
|
||||
confluent-hub install --no-prompt mongodb/kafka-connect-mongodb:1.10.1 || { echo "Failed to install MongoDB"; exit 1; }
|
||||
confluent-hub install --no-prompt confluentinc/kafka-connect-s3:10.5.1 || { echo "Failed to install S3"; exit 1; }
|
||||
|
||||
echo "All connectors installed successfully"
|
||||
|
||||
# Install MySQL JDBC Driver
|
||||
mkdir -p /usr/share/confluent-hub-components/confluentinc-kafka-connect-jdbc/lib
|
||||
#
|
||||
echo "Installing MySQL JDBC Driver..."
|
||||
curl -k -SL "https://dev.mysql.com/get/Downloads/Connector-J/mysql-connector-java-8.0.27.tar.gz" \
|
||||
| tar -xzf - -C /usr/share/confluent-hub-components/confluentinc-kafka-connect-jdbc/lib \
|
||||
--strip-components=1 mysql-connector-java-8.0.27/mysql-connector-java-8.0.27.jar
|
||||
--strip-components=1 mysql-connector-java-8.0.27/mysql-connector-java-8.0.27.jar \
|
||||
|| { echo "Failed to install MySQL JDBC driver"; exit 1; }
|
||||
|
||||
# Install EnvVar Config Provider
|
||||
echo "Installing EnvVar Config Provider..."
|
||||
curl -k -SL "https://repo1.maven.org/maven2/io/strimzi/kafka-env-var-config-provider/0.1.1/kafka-env-var-config-provider-0.1.1.tar.gz" \
|
||||
| tar -xzf - -C /usr/share/confluent-hub-components/
|
||||
#
|
||||
echo "Launching Kafka Connect worker"
|
||||
#
|
||||
/etc/confluent/docker/run &
|
||||
#
|
||||
sleep infinity
|
||||
| tar -xzf - -C /usr/share/confluent-hub-components/ \
|
||||
|| { echo "Failed to install EnvVar Config Provider"; exit 1; }
|
||||
|
||||
echo "All components installed successfully, launching Kafka Connect worker..."
|
||||
|
||||
# Create a flag file to indicate installation is complete for the healthcheck
|
||||
# However, this results on a healtcheck not very trustable, as the service startup just after this takes a significant amount of time
|
||||
# So client validation is needed; see check_connectors_ready
|
||||
touch /tmp/connectors_installed
|
||||
|
||||
# Launch connect worker and ensure it's in the foreground to catch any startup errors
|
||||
exec /etc/confluent/docker/run
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "test -f /tmp/connectors_installed || (echo 'Not all plugins installed yet' && exit 1)"]
|
||||
interval: 10s
|
||||
timeout: 10s
|
||||
retries: 20
|
||||
start_period: 60s
|
||||
mysqldb:
|
||||
image: mysql:8.0
|
||||
environment:
|
||||
@ -71,6 +93,19 @@ services:
|
||||
volumes:
|
||||
- ./../kafka-connect/setup/conf/mysqld.cnf:/etc/mysql/mysql.conf.d/mysqld.cnf
|
||||
- ./../kafka-connect/setup/mysql-setup.sql:/docker-entrypoint-initdb.d/mysql-setup.sql
|
||||
healthcheck:
|
||||
test: ["CMD", "mysqladmin", "ping", "-h", "localhost", "-u", "root", "-prootpwd"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
start_period: 15s
|
||||
restart: on-failure:3
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 512M
|
||||
reservations:
|
||||
memory: 256M
|
||||
|
||||
postgresdb:
|
||||
image: postgres:alpine
|
||||
@ -81,6 +116,19 @@ services:
|
||||
- ./../kafka-connect/setup/mysql-setup.sql:/docker-entrypoint-initdb.d/postgres_setup.sql
|
||||
ports:
|
||||
- "5432:5432"
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U postgres"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
start_period: 10s
|
||||
restart: on-failure:3
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 256M
|
||||
reservations:
|
||||
memory: 128M
|
||||
|
||||
mongo:
|
||||
hostname: mongo
|
||||
@ -95,6 +143,19 @@ services:
|
||||
- MONGO_INITDB_DATABASE=test_db
|
||||
volumes:
|
||||
- ./../kafka-connect/setup/conf/:/scripts/
|
||||
healthcheck:
|
||||
test: ["CMD", "mongosh", "--eval", "db.runCommand({ ping: 1 })"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
start_period: 15s
|
||||
restart: on-failure:3
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 512M
|
||||
reservations:
|
||||
memory: 256M
|
||||
|
||||
s3mock:
|
||||
image: adobe/s3mock:2.13.0
|
||||
@ -102,3 +163,16 @@ services:
|
||||
- initialBuckets=test-bucket
|
||||
ports:
|
||||
- "9090:9090"
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -s -f http://localhost:9090/ || exit 1"]
|
||||
interval: 5s
|
||||
timeout: 3s
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
restart: on-failure:3
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 256M
|
||||
reservations:
|
||||
memory: 128M
|
||||
|
@ -5,17 +5,14 @@ CONNECT_GROUP_ID=kafka-connect
|
||||
CONNECT_CONFIG_STORAGE_TOPIC=_connect-configs
|
||||
CONNECT_OFFSET_STORAGE_TOPIC=_connect-offsets
|
||||
CONNECT_STATUS_STORAGE_TOPIC=_connect-status
|
||||
CONNECT_KEY_CONVERTER=io.confluent.connect.avro.AvroConverter
|
||||
CONNECT_VALUE_CONVERTER=io.confluent.connect.avro.AvroConverter
|
||||
# CONNECT_INTERNAL_KEY_CONVERTER=io.confluent.connect.avro.AvroConverter
|
||||
# CONNECT_INTERNAL_VALUE_CONVERTER=io.confluent.connect.avro.AvroConverter
|
||||
CONNECT_KEY_CONVERTER_SCHEMA_REGISTRY_URL='http://schema-registry:8081'
|
||||
CONNECT_VALUE_CONVERTER_SCHEMA_REGISTRY_URL='http://schema-registry:8081'
|
||||
# CONNECT_INTERNAL_KEY_CONVERTER_SCHEMA_REGISTRY_URL='http://schema-registry:8081'
|
||||
# CONNECT_INTERNAL_VALUE_CONVERTER_SCHEMA_REGISTRY_URL='http://schema-registry:8081'
|
||||
# Using simple JSON converter for easier debugging
|
||||
CONNECT_KEY_CONVERTER=org.apache.kafka.connect.json.JsonConverter
|
||||
CONNECT_VALUE_CONVERTER=org.apache.kafka.connect.json.JsonConverter
|
||||
CONNECT_KEY_CONVERTER_SCHEMAS_ENABLE=false
|
||||
CONNECT_VALUE_CONVERTER_SCHEMAS_ENABLE=false
|
||||
CONNECT_REST_ADVERTISED_HOST_NAME=test_connect
|
||||
CONNECT_LOG4J_ROOT_LOGLEVEL=INFO
|
||||
# CONNECT_LOG4J_APPENDER_STDOUT_LAYOUT_CONVERSIONPATTERN=[%d] %p %X{connector.context}%m (%c:%L)%n
|
||||
# Making sure these are 1 since we have a single broker
|
||||
CONNECT_CONFIG_STORAGE_REPLICATION_FACTOR=1
|
||||
CONNECT_OFFSET_STORAGE_REPLICATION_FACTOR=1
|
||||
CONNECT_STATUS_STORAGE_REPLICATION_FACTOR=1
|
||||
|
@ -1,3 +1,4 @@
|
||||
import logging
|
||||
import subprocess
|
||||
from typing import Any, Dict, List, Optional, cast
|
||||
from unittest import mock
|
||||
@ -13,12 +14,13 @@ from datahub.ingestion.source.kafka_connect.kafka_connect import SinkTopicFilter
|
||||
from datahub.ingestion.source.state.entity_removal_state import GenericCheckpointState
|
||||
from tests.test_helpers import mce_helpers
|
||||
from tests.test_helpers.click_helpers import run_datahub_cmd
|
||||
from tests.test_helpers.docker_helpers import wait_for_port
|
||||
from tests.test_helpers.state_helpers import (
|
||||
get_current_checkpoint_from_pipeline,
|
||||
validate_all_providers_have_committed_successfully,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
pytestmark = pytest.mark.integration_batch_1
|
||||
FROZEN_TIME = "2021-10-25 13:00:00"
|
||||
GMS_PORT = 8080
|
||||
@ -27,22 +29,92 @@ KAFKA_CONNECT_SERVER = "http://localhost:28083"
|
||||
KAFKA_CONNECT_ENDPOINT = f"{KAFKA_CONNECT_SERVER}/connectors"
|
||||
|
||||
|
||||
def is_mysql_up(container_name: str, port: int) -> bool:
|
||||
"""A cheap way to figure out if mysql is responsive on a container"""
|
||||
def check_connectors_ready(
|
||||
server_url: str = "http://localhost:28083", only_plugins: bool = False
|
||||
) -> bool:
|
||||
"""
|
||||
Check if Kafka Connect is fully initialized with plugins installed and all connectors are in a RUNNING state.
|
||||
|
||||
cmd = f"docker logs {container_name} 2>&1 | grep '/var/run/mysqld/mysqld.sock' | grep {port}"
|
||||
ret = subprocess.run(
|
||||
cmd,
|
||||
shell=True,
|
||||
)
|
||||
return ret.returncode == 0
|
||||
Args:
|
||||
server_url: The base URL of the Kafka Connect REST API
|
||||
only_plugins: If True, only check if the connector plugins are installed
|
||||
|
||||
Returns:
|
||||
bool: True if all connectors are running, False otherwise
|
||||
"""
|
||||
try:
|
||||
# Check connector plugins are installed
|
||||
response = requests.get(f"{server_url}/connector-plugins")
|
||||
logger.debug(
|
||||
f"check-connectors-ready: connector-plugins: {response.status_code} {response.json()}"
|
||||
)
|
||||
response.raise_for_status()
|
||||
if not response.json():
|
||||
return False
|
||||
|
||||
def have_connectors_processed(container_name: str) -> bool:
|
||||
"""A cheap way to figure out if postgres is responsive on a container"""
|
||||
if only_plugins:
|
||||
return True
|
||||
|
||||
cmd = f"docker logs {container_name} 2>&1 | grep 'Session key updated'"
|
||||
return subprocess.run(cmd, shell=True).returncode == 0
|
||||
# Get list of all connectors
|
||||
connectors_response = requests.get(f"{server_url}/connectors")
|
||||
logger.debug(
|
||||
f"check-connectors-ready: connector: {connectors_response.status_code} {connectors_response.json()}"
|
||||
)
|
||||
connectors_response.raise_for_status()
|
||||
connectors = connectors_response.json()
|
||||
if not connectors: # Empty list means no connectors yet
|
||||
return False
|
||||
|
||||
for connector in connectors:
|
||||
# Based on experience, these connectors can be in FAILED state and still work for tests:
|
||||
if connector in ["mysql_sink", "bigquery-sink-connector"]:
|
||||
logger.debug(
|
||||
f"check-connectors-ready: skipping validation for {connector} as it can be in FAILED state for tests"
|
||||
)
|
||||
continue
|
||||
|
||||
# Check status of each connector
|
||||
status_response = requests.get(
|
||||
f"{server_url}/connectors/{connector}/status"
|
||||
)
|
||||
logger.debug(
|
||||
f"check-connectors-ready: connector {connector}: {status_response.status_code} {status_response.json()}"
|
||||
)
|
||||
status_response.raise_for_status()
|
||||
status = status_response.json()
|
||||
if status.get("connector", {}).get("state") != "RUNNING":
|
||||
logger.debug(
|
||||
f"check-connectors-ready: connector {connector} is not running"
|
||||
)
|
||||
return False
|
||||
|
||||
# Check all tasks are running
|
||||
for task in status.get("tasks", []):
|
||||
if task.get("state") != "RUNNING":
|
||||
logger.debug(
|
||||
f"check-connectors-ready: connector {connector} task {task} is not running"
|
||||
)
|
||||
return False
|
||||
|
||||
# Check topics were provisioned
|
||||
topics_response = requests.get(
|
||||
f"{server_url}/connectors/{connector}/topics"
|
||||
)
|
||||
topics_response.raise_for_status()
|
||||
topics_data = topics_response.json()
|
||||
|
||||
if topics_data and topics_data.get(connector, {}).get("topics"):
|
||||
logger.debug(
|
||||
f"Connector {connector} topics: {topics_data[connector]['topics']}"
|
||||
)
|
||||
else:
|
||||
logger.debug(f"Connector {connector} topics not found yet!")
|
||||
return False
|
||||
|
||||
return True
|
||||
except Exception as e: # This will catch any exception and return False
|
||||
logger.debug(f"check-connectors-ready: exception: {e}")
|
||||
return False
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
@ -55,30 +127,20 @@ def kafka_connect_runner(docker_compose_runner, pytestconfig, test_resources_dir
|
||||
str(test_resources_dir_kafka / "docker-compose.yml"),
|
||||
str(test_resources_dir / "docker-compose.override.yml"),
|
||||
]
|
||||
with docker_compose_runner(
|
||||
docker_compose_file, "kafka-connect", cleanup=False
|
||||
) as docker_services:
|
||||
wait_for_port(
|
||||
docker_services,
|
||||
"test_mysql",
|
||||
3306,
|
||||
timeout=120,
|
||||
checker=lambda: is_mysql_up("test_mysql", 3306),
|
||||
)
|
||||
|
||||
with docker_compose_runner(docker_compose_file, "kafka-connect") as docker_services:
|
||||
# We sometimes run into issues where the broker fails to come up on the first try because
|
||||
# of all the other processes that are running. By running docker compose twice, we can
|
||||
# avoid some test flakes. How does this work? The "key" is the same between both
|
||||
# calls to the docker_compose_runner and the first one sets cleanup=False.
|
||||
# We rely on Docker health checks to confirm all services are up & healthy
|
||||
|
||||
wait_for_port(docker_services, "test_broker", 29092, timeout=120)
|
||||
wait_for_port(docker_services, "test_connect", 28083, timeout=120)
|
||||
# However healthcheck for test_connect service is not very trustable, so
|
||||
# a double and more robust check here is needed
|
||||
docker_services.wait_until_responsive(
|
||||
timeout=30,
|
||||
pause=1,
|
||||
check=lambda: requests.get(KAFKA_CONNECT_ENDPOINT).status_code == 200,
|
||||
timeout=300,
|
||||
pause=10,
|
||||
check=lambda: check_connectors_ready(
|
||||
KAFKA_CONNECT_SERVER, only_plugins=True
|
||||
),
|
||||
)
|
||||
|
||||
yield docker_services
|
||||
|
||||
|
||||
@ -89,7 +151,7 @@ def test_resources_dir(pytestconfig):
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def loaded_kafka_connect(kafka_connect_runner):
|
||||
# # Setup mongo cluster
|
||||
print("Initializing MongoDB replica set...")
|
||||
command = "docker exec test_mongo mongosh test_db -f /scripts/mongo-init.js"
|
||||
ret = subprocess.run(command, shell=True, capture_output=True)
|
||||
assert ret.returncode == 0
|
||||
@ -280,6 +342,14 @@ def loaded_kafka_connect(kafka_connect_runner):
|
||||
r.raise_for_status()
|
||||
assert r.status_code == 201 # Created
|
||||
|
||||
print("Populating MongoDB with test data...")
|
||||
# we populate the database before creating the connector
|
||||
# and we use copy_existing mode to copy the data from the database to the kafka topic
|
||||
# so ingestion is consistent and deterministic
|
||||
command = "docker exec test_mongo mongosh test_db -f /scripts/mongo-populate.js"
|
||||
ret = subprocess.run(command, shell=True, capture_output=True)
|
||||
assert ret.returncode == 0
|
||||
|
||||
# Creating MongoDB source
|
||||
r = requests.post(
|
||||
KAFKA_CONNECT_ENDPOINT,
|
||||
@ -291,17 +361,14 @@ def loaded_kafka_connect(kafka_connect_runner):
|
||||
"connection.uri": "mongodb://test_mongo:27017",
|
||||
"topic.prefix": "mongodb",
|
||||
"database": "test_db",
|
||||
"collection": "purchases"
|
||||
"collection": "purchases",
|
||||
"startup.mode": "copy_existing"
|
||||
}
|
||||
}""",
|
||||
)
|
||||
r.raise_for_status()
|
||||
assert r.status_code == 201 # Created
|
||||
|
||||
command = "docker exec test_mongo mongosh test_db -f /scripts/mongo-populate.js"
|
||||
ret = subprocess.run(command, shell=True, capture_output=True)
|
||||
assert ret.returncode == 0
|
||||
|
||||
# Creating S3 Sink source
|
||||
r = requests.post(
|
||||
KAFKA_CONNECT_ENDPOINT,
|
||||
@ -360,13 +427,16 @@ def loaded_kafka_connect(kafka_connect_runner):
|
||||
)
|
||||
assert r.status_code == 201 # Created
|
||||
|
||||
# Give time for connectors to process the table data
|
||||
# Connectors should be ready to process data thanks to Docker health checks
|
||||
print("Waiting for Kafka Connect connectors to initialize and process data...")
|
||||
kafka_connect_runner.wait_until_responsive(
|
||||
timeout=30,
|
||||
pause=1,
|
||||
check=lambda: have_connectors_processed("test_connect"),
|
||||
timeout=120,
|
||||
pause=10,
|
||||
check=lambda: check_connectors_ready(KAFKA_CONNECT_SERVER, only_plugins=False),
|
||||
)
|
||||
|
||||
print("Kafka Connect connectors are ready!")
|
||||
|
||||
|
||||
@freeze_time(FROZEN_TIME)
|
||||
def test_kafka_connect_ingest(
|
||||
|
@ -1,7 +1,8 @@
|
||||
---
|
||||
services:
|
||||
zookeeper:
|
||||
image: confluentinc/cp-zookeeper:7.2.2
|
||||
image: confluentinc/cp-zookeeper:7.4.0
|
||||
platform: linux/amd64
|
||||
env_file: zookeeper.env
|
||||
hostname: test_zookeeper
|
||||
container_name: test_zookeeper
|
||||
@ -10,27 +11,68 @@ services:
|
||||
volumes:
|
||||
- test_zkdata:/var/lib/zookeeper/data
|
||||
- test_zklogs:/var/lib/zookeeper/log
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "echo ruok | nc localhost 52181 || exit 1"]
|
||||
retries: 5
|
||||
start_period: 15s
|
||||
restart: on-failure:3
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 512M
|
||||
reservations:
|
||||
memory: 256M
|
||||
|
||||
broker:
|
||||
image: confluentinc/cp-kafka:7.2.2
|
||||
image: confluentinc/cp-kafka:7.4.0
|
||||
platform: linux/amd64
|
||||
env_file: broker.env
|
||||
hostname: test_broker
|
||||
container_name: test_broker
|
||||
depends_on:
|
||||
- zookeeper
|
||||
zookeeper:
|
||||
condition: service_healthy
|
||||
ports:
|
||||
- "29092:29092"
|
||||
healthcheck:
|
||||
test:
|
||||
- "CMD-SHELL"
|
||||
- "kafka-topics --bootstrap-server broker:9092 --list || exit 1"
|
||||
retries: 5
|
||||
start_period: 30s
|
||||
restart: on-failure:3
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 512M
|
||||
reservations:
|
||||
memory: 256M
|
||||
|
||||
schema-registry:
|
||||
image: confluentinc/cp-schema-registry:7.2.2
|
||||
restart: unless-stopped
|
||||
image: confluentinc/cp-schema-registry:7.4.0
|
||||
platform: linux/amd64
|
||||
env_file: schema-registry.env
|
||||
container_name: test_schema_registry
|
||||
depends_on:
|
||||
- zookeeper
|
||||
- broker
|
||||
zookeeper:
|
||||
condition: service_healthy
|
||||
broker:
|
||||
condition: service_healthy
|
||||
ports:
|
||||
- "28081:8081"
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -s -f http://localhost:8081/subjects || exit 1"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
start_period: 15s
|
||||
restart: on-failure:3
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 512M
|
||||
reservations:
|
||||
memory: 256M
|
||||
|
||||
volumes:
|
||||
test_zkdata:
|
||||
|
@ -24,17 +24,12 @@ def test_resources_dir(pytestconfig):
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def mock_kafka_service(docker_compose_runner, test_resources_dir):
|
||||
with docker_compose_runner(
|
||||
test_resources_dir / "docker-compose.yml", "kafka", cleanup=False
|
||||
) as docker_services:
|
||||
wait_for_port(docker_services, "test_zookeeper", 52181, timeout=120)
|
||||
|
||||
# Running docker compose twice, since the broker sometimes fails to come up on the first try.
|
||||
with docker_compose_runner(
|
||||
test_resources_dir / "docker-compose.yml", "kafka"
|
||||
) as docker_services:
|
||||
wait_for_port(docker_services, "test_broker", 29092, timeout=120)
|
||||
wait_for_port(docker_services, "test_schema_registry", 8081, timeout=120)
|
||||
wait_for_port(docker_services, "test_zookeeper", 52181, timeout=180)
|
||||
wait_for_port(docker_services, "test_broker", 29092, timeout=180)
|
||||
wait_for_port(docker_services, "test_schema_registry", 8081, timeout=180)
|
||||
|
||||
# Set up topics and produce some data
|
||||
command = f"{test_resources_dir}/send_records.sh {test_resources_dir}"
|
||||
|
@ -1,3 +1,4 @@
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
from typing import List, Optional
|
||||
|
||||
@ -20,13 +21,43 @@ def assert_result_ok(result: Result) -> None:
|
||||
def run_datahub_cmd(
|
||||
command: List[str], tmp_path: Optional[Path] = None, check_result: bool = True
|
||||
) -> Result:
|
||||
"""
|
||||
Run a datahub CLI command in a test context
|
||||
|
||||
This function handles a known issue with Click's testing framework where it may raise
|
||||
"ValueError: I/O operation on closed file" after the command has successfully completed under some conditions,
|
||||
such as console debug logs enabled.
|
||||
See related issues:
|
||||
- https://github.com/pallets/click/issues/824
|
||||
"""
|
||||
runner = CliRunner()
|
||||
|
||||
if tmp_path is None:
|
||||
result = runner.invoke(datahub, command)
|
||||
else:
|
||||
with fs_helpers.isolated_filesystem(tmp_path):
|
||||
try:
|
||||
if tmp_path is None:
|
||||
result = runner.invoke(datahub, command)
|
||||
else:
|
||||
with fs_helpers.isolated_filesystem(tmp_path):
|
||||
result = runner.invoke(datahub, command)
|
||||
except ValueError as e:
|
||||
if "I/O operation on closed file" in str(e):
|
||||
# This is a known issue with the Click testing framework
|
||||
# The command likely still succeeded, so we'll construct a basic Result object
|
||||
# and continue with the test
|
||||
print(f"WARNING: Caught Click I/O error but continuing with the test: {e}")
|
||||
# Create an empty buffer for stdout and stderr
|
||||
empty_buffer = BytesIO()
|
||||
result = Result(
|
||||
runner=runner,
|
||||
stdout_bytes=empty_buffer.getvalue(),
|
||||
stderr_bytes=empty_buffer.getvalue(),
|
||||
return_value=None, # type: ignore[call-arg]
|
||||
exit_code=0,
|
||||
exception=None,
|
||||
exc_info=None,
|
||||
)
|
||||
else:
|
||||
# Re-raise if it's not the specific error we're handling
|
||||
raise
|
||||
|
||||
if check_result:
|
||||
assert_result_ok(result)
|
||||
|
@ -48,4 +48,14 @@ def assert_capability_report(
|
||||
assert not capability_report[capability].capable
|
||||
failure_reason = capability_report[capability].failure_reason
|
||||
assert failure_reason
|
||||
assert expected_reason in failure_reason
|
||||
# Handle different error codes for "Connection refused" across operating systems
|
||||
# Linux uses [Errno 111] (usually CI env), macOS uses [Errno 61] (usually local developer env)
|
||||
if (
|
||||
"Connection refused" in expected_reason
|
||||
and "Connection refused" in failure_reason
|
||||
):
|
||||
# If both mention connection refused, consider it a match regardless of error code
|
||||
assert True
|
||||
else:
|
||||
# Otherwise do the normal string inclusion check
|
||||
assert expected_reason in failure_reason
|
||||
|
Loading…
x
Reference in New Issue
Block a user