feat: datahub check local-docker (#2295)

This commit is contained in:
Harshal Sheth 2021-03-26 10:03:51 -07:00 committed by GitHub
parent 1de47d6a6a
commit 07f4cb1199
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 119 additions and 20 deletions

View File

@ -94,6 +94,10 @@ services:
- "9200:9200"
volumes:
- esdata:/usr/share/elasticsearch/data
healthcheck:
test: ["CMD-SHELL", "curl -sS --fail 'http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=0s' || exit 1"]
start_period: 2m
retries: 4
kibana:
image: kibana:7.9.3

View File

@ -1,6 +1,12 @@
# Debugging Guide
## How can I confirm if all Docker containers are running as expected after a quickstart?
If you set up the `datahub` CLI tool (see [here](../metadata-ingestion/README.md)), you can use the built-in check utility:
```sh
datahub check local-docker
```
You can list all Docker containers in your local by running `docker container ls`. You should expect to see a log similar to the below:
```

View File

@ -48,6 +48,8 @@ ignore_missing_imports = yes
ignore_missing_imports = yes
[mypy-pymongo.*]
ignore_missing_imports = yes
[mypy-docker.*]
ignore_missing_imports = yes
[isort]
profile = black

View File

@ -37,6 +37,7 @@ framework_common = {
"click>=7.1.1",
"pyyaml>=5.4.1",
"toml>=0.10.0",
"docker>=4.4",
"avro-gen3==0.3.8",
"avro-python3>=1.8.2",
}

View File

@ -0,0 +1,34 @@
import sys
import click
from datahub.check.docker import check_local_docker_containers
from datahub.check.json_file import check_mce_file
@click.group()
def check() -> None:
pass
@check.command()
@click.argument("json-file", type=click.Path(exists=True, dir_okay=False))
def mce_file(json_file: str) -> None:
"""Check the schema of a MCE JSON file"""
report = check_mce_file(json_file)
click.echo(report)
@check.command()
def local_docker() -> None:
"""Check that the local Docker containers are healthy"""
issues = check_local_docker_containers()
if not issues:
click.secho("✔ No issues detected", fg="green")
else:
click.secho("The following issues were detected:", fg="bright_red")
for issue in issues:
click.echo(f"- {issue}")
sys.exit(1)

View File

@ -0,0 +1,62 @@
from typing import List
import docker
REQUIRED_CONTAINERS = [
"elasticsearch-setup",
"elasticsearch",
"datahub-gms",
"datahub-mce-consumer",
"datahub-frontend-react",
"datahub-mae-consumer",
"kafka-topics-ui",
"kafka-rest-proxy",
"kafka-setup",
"schema-registry-ui",
"schema-registry",
"broker",
"kibana",
"mysql",
"neo4j",
"zookeeper",
]
ALLOW_STOPPED = [
"kafka-setup",
"elasticsearch-setup",
]
def check_local_docker_containers() -> List[str]:
issues: List[str] = []
client = docker.from_env()
containers = client.containers.list(
all=True,
filters={
"label": "com.docker.compose.project=datahub",
},
)
# Check number of containers.
if len(containers) == 0:
issues.append("quickstart.sh or dev.sh is not running")
else:
existing_containers = set(container.name for container in containers)
missing_containers = set(REQUIRED_CONTAINERS) - existing_containers
for missing in missing_containers:
issues.append(f"{missing} container is not present")
# Check that the containers are running and healthy.
for container in containers:
if container.name in ALLOW_STOPPED:
continue
elif container.status != "running":
issues.append(f"{container.name} is not running")
elif "Health" in container.attrs["State"]:
if container.attrs["State"]["Health"]["Status"] == "starting":
issues.append(f"{container.name} is still starting")
elif container.attrs["State"]["Health"]["Status"] != "healthy":
issues.append(f"{container.name} is running but not healthy")
return issues

View File

@ -0,0 +1,8 @@
from datahub.ingestion.source.mce_file import MetadataFileSource
def check_mce_file(filepath: str) -> str:
mce_source = MetadataFileSource.create({"filename": filepath}, None)
for _ in mce_source.get_workunits():
pass
return f"{mce_source.get_report().workunits_produced} MCEs found - all valid"

View File

@ -6,12 +6,12 @@ import sys
import click
from pydantic import ValidationError
from datahub.check.check_cli import check
from datahub.configuration.common import ConfigurationError, ConfigurationMechanism
from datahub.configuration.toml import TomlConfigurationMechanism
from datahub.configuration.yaml import YamlConfigurationMechanism
from datahub.ingestion.run.pipeline import Pipeline
from datahub.ingestion.sink.sink_registry import sink_registry
from datahub.ingestion.source.mce_file import check_mce_file
from datahub.ingestion.source.source_registry import source_registry
logger = logging.getLogger(__name__)
@ -97,15 +97,4 @@ def ingest_list_plugins() -> None:
click.echo('If a plugin is disabled, try running: pip install ".[<plugin>]"')
@datahub.group()
def check() -> None:
pass
@check.command()
@click.argument("json-file", type=click.Path(exists=True, dir_okay=False))
def mce_file(json_file: str) -> None:
"""Check the schema of a MCE JSON file"""
report = check_mce_file(json_file)
click.echo(report)
datahub.add_command(check)

View File

@ -41,10 +41,3 @@ class MetadataFileSource(Source):
def close(self):
pass
def check_mce_file(filepath: str) -> str:
mce_source = MetadataFileSource.create({"filename": filepath}, None)
for _ in mce_source.get_workunits():
pass
return f"{mce_source.get_report().workunits_produced} MCEs found - all valid"