mirror of
https://github.com/datahub-project/datahub.git
synced 2025-12-27 09:58:14 +00:00
feat: datahub check local-docker (#2295)
This commit is contained in:
parent
1de47d6a6a
commit
07f4cb1199
@ -94,6 +94,10 @@ services:
|
||||
- "9200:9200"
|
||||
volumes:
|
||||
- esdata:/usr/share/elasticsearch/data
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -sS --fail 'http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=0s' || exit 1"]
|
||||
start_period: 2m
|
||||
retries: 4
|
||||
|
||||
kibana:
|
||||
image: kibana:7.9.3
|
||||
|
||||
@ -1,6 +1,12 @@
|
||||
# Debugging Guide
|
||||
|
||||
## How can I confirm if all Docker containers are running as expected after a quickstart?
|
||||
|
||||
If you set up the `datahub` CLI tool (see [here](../metadata-ingestion/README.md)), you can use the built-in check utility:
|
||||
```sh
|
||||
datahub check local-docker
|
||||
```
|
||||
|
||||
You can list all Docker containers in your local by running `docker container ls`. You should expect to see a log similar to the below:
|
||||
|
||||
```
|
||||
|
||||
@ -48,6 +48,8 @@ ignore_missing_imports = yes
|
||||
ignore_missing_imports = yes
|
||||
[mypy-pymongo.*]
|
||||
ignore_missing_imports = yes
|
||||
[mypy-docker.*]
|
||||
ignore_missing_imports = yes
|
||||
|
||||
[isort]
|
||||
profile = black
|
||||
|
||||
@ -37,6 +37,7 @@ framework_common = {
|
||||
"click>=7.1.1",
|
||||
"pyyaml>=5.4.1",
|
||||
"toml>=0.10.0",
|
||||
"docker>=4.4",
|
||||
"avro-gen3==0.3.8",
|
||||
"avro-python3>=1.8.2",
|
||||
}
|
||||
|
||||
0
metadata-ingestion/src/datahub/check/__init__.py
Normal file
0
metadata-ingestion/src/datahub/check/__init__.py
Normal file
34
metadata-ingestion/src/datahub/check/check_cli.py
Normal file
34
metadata-ingestion/src/datahub/check/check_cli.py
Normal file
@ -0,0 +1,34 @@
|
||||
import sys
|
||||
|
||||
import click
|
||||
|
||||
from datahub.check.docker import check_local_docker_containers
|
||||
from datahub.check.json_file import check_mce_file
|
||||
|
||||
|
||||
@click.group()
|
||||
def check() -> None:
|
||||
pass
|
||||
|
||||
|
||||
@check.command()
|
||||
@click.argument("json-file", type=click.Path(exists=True, dir_okay=False))
|
||||
def mce_file(json_file: str) -> None:
|
||||
"""Check the schema of a MCE JSON file"""
|
||||
|
||||
report = check_mce_file(json_file)
|
||||
click.echo(report)
|
||||
|
||||
|
||||
@check.command()
|
||||
def local_docker() -> None:
|
||||
"""Check that the local Docker containers are healthy"""
|
||||
|
||||
issues = check_local_docker_containers()
|
||||
if not issues:
|
||||
click.secho("✔ No issues detected", fg="green")
|
||||
else:
|
||||
click.secho("The following issues were detected:", fg="bright_red")
|
||||
for issue in issues:
|
||||
click.echo(f"- {issue}")
|
||||
sys.exit(1)
|
||||
62
metadata-ingestion/src/datahub/check/docker.py
Normal file
62
metadata-ingestion/src/datahub/check/docker.py
Normal file
@ -0,0 +1,62 @@
|
||||
from typing import List
|
||||
|
||||
import docker
|
||||
|
||||
REQUIRED_CONTAINERS = [
|
||||
"elasticsearch-setup",
|
||||
"elasticsearch",
|
||||
"datahub-gms",
|
||||
"datahub-mce-consumer",
|
||||
"datahub-frontend-react",
|
||||
"datahub-mae-consumer",
|
||||
"kafka-topics-ui",
|
||||
"kafka-rest-proxy",
|
||||
"kafka-setup",
|
||||
"schema-registry-ui",
|
||||
"schema-registry",
|
||||
"broker",
|
||||
"kibana",
|
||||
"mysql",
|
||||
"neo4j",
|
||||
"zookeeper",
|
||||
]
|
||||
|
||||
ALLOW_STOPPED = [
|
||||
"kafka-setup",
|
||||
"elasticsearch-setup",
|
||||
]
|
||||
|
||||
|
||||
def check_local_docker_containers() -> List[str]:
|
||||
issues: List[str] = []
|
||||
client = docker.from_env()
|
||||
|
||||
containers = client.containers.list(
|
||||
all=True,
|
||||
filters={
|
||||
"label": "com.docker.compose.project=datahub",
|
||||
},
|
||||
)
|
||||
|
||||
# Check number of containers.
|
||||
if len(containers) == 0:
|
||||
issues.append("quickstart.sh or dev.sh is not running")
|
||||
else:
|
||||
existing_containers = set(container.name for container in containers)
|
||||
missing_containers = set(REQUIRED_CONTAINERS) - existing_containers
|
||||
for missing in missing_containers:
|
||||
issues.append(f"{missing} container is not present")
|
||||
|
||||
# Check that the containers are running and healthy.
|
||||
for container in containers:
|
||||
if container.name in ALLOW_STOPPED:
|
||||
continue
|
||||
elif container.status != "running":
|
||||
issues.append(f"{container.name} is not running")
|
||||
elif "Health" in container.attrs["State"]:
|
||||
if container.attrs["State"]["Health"]["Status"] == "starting":
|
||||
issues.append(f"{container.name} is still starting")
|
||||
elif container.attrs["State"]["Health"]["Status"] != "healthy":
|
||||
issues.append(f"{container.name} is running but not healthy")
|
||||
|
||||
return issues
|
||||
8
metadata-ingestion/src/datahub/check/json_file.py
Normal file
8
metadata-ingestion/src/datahub/check/json_file.py
Normal file
@ -0,0 +1,8 @@
|
||||
from datahub.ingestion.source.mce_file import MetadataFileSource
|
||||
|
||||
|
||||
def check_mce_file(filepath: str) -> str:
|
||||
mce_source = MetadataFileSource.create({"filename": filepath}, None)
|
||||
for _ in mce_source.get_workunits():
|
||||
pass
|
||||
return f"{mce_source.get_report().workunits_produced} MCEs found - all valid"
|
||||
@ -6,12 +6,12 @@ import sys
|
||||
import click
|
||||
from pydantic import ValidationError
|
||||
|
||||
from datahub.check.check_cli import check
|
||||
from datahub.configuration.common import ConfigurationError, ConfigurationMechanism
|
||||
from datahub.configuration.toml import TomlConfigurationMechanism
|
||||
from datahub.configuration.yaml import YamlConfigurationMechanism
|
||||
from datahub.ingestion.run.pipeline import Pipeline
|
||||
from datahub.ingestion.sink.sink_registry import sink_registry
|
||||
from datahub.ingestion.source.mce_file import check_mce_file
|
||||
from datahub.ingestion.source.source_registry import source_registry
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@ -97,15 +97,4 @@ def ingest_list_plugins() -> None:
|
||||
click.echo('If a plugin is disabled, try running: pip install ".[<plugin>]"')
|
||||
|
||||
|
||||
@datahub.group()
|
||||
def check() -> None:
|
||||
pass
|
||||
|
||||
|
||||
@check.command()
|
||||
@click.argument("json-file", type=click.Path(exists=True, dir_okay=False))
|
||||
def mce_file(json_file: str) -> None:
|
||||
"""Check the schema of a MCE JSON file"""
|
||||
|
||||
report = check_mce_file(json_file)
|
||||
click.echo(report)
|
||||
datahub.add_command(check)
|
||||
|
||||
@ -41,10 +41,3 @@ class MetadataFileSource(Source):
|
||||
|
||||
def close(self):
|
||||
pass
|
||||
|
||||
|
||||
def check_mce_file(filepath: str) -> str:
|
||||
mce_source = MetadataFileSource.create({"filename": filepath}, None)
|
||||
for _ in mce_source.get_workunits():
|
||||
pass
|
||||
return f"{mce_source.get_report().workunits_produced} MCEs found - all valid"
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user