fix(quickstart): support for user-level sockets, arch option for forcing quickstart architecture (#6279)

This commit is contained in:
Shirshanka Das 2022-10-25 11:45:27 -07:00 committed by GitHub
parent aeed817783
commit abf1b11a5e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 111 additions and 14 deletions

View File

@ -4,15 +4,25 @@
To deploy a new instance of DataHub, perform the following steps.
1. Install [docker](https://docs.docker.com/install/), [jq](https://stedolan.github.io/jq/download/) and [docker-compose v1 ](https://github.com/docker/compose/blob/master/INSTALL.md) (if
using Linux). Make sure to allocate enough hardware resources for Docker engine. Tested & confirmed config: 2 CPUs,
8GB RAM, 2GB Swap area, and 10GB disk space.
2. Launch the Docker Engine from command line or the desktop app.
1. Install Docker for your platform.
- On Windows or Mac, install [Docker Desktop](https://www.docker.com/products/docker-desktop/).
- On Linux, install [Docker for Linux](https://docs.docker.com/desktop/install/linux-install/).
:::note
Make sure to allocate enough hardware resources for Docker engine.
Tested & confirmed config: 2 CPUs, 8GB RAM, 2GB Swap area, and 10GB disk space.
:::
2. Install [jq](https://stedolan.github.io/jq/download/)
3. Launch the Docker Engine from command line or the desktop app.
3. Install the DataHub CLI
a. Ensure you have Python 3.6+ installed & configured. (Check using `python3 --version`)
a. Ensure you have Python 3.7+ installed & configured. (Check using `python3 --version`).
b. Run the following commands in your terminal
@ -37,11 +47,44 @@ To deploy a new instance of DataHub, perform the following steps.
```
This will deploy a DataHub instance using [docker-compose](https://docs.docker.com/compose/).
If you are curious, the `docker-compose.yaml` file is downloaded to your home directory under the `.datahub/quickstart` directory.
If things go well, you should see messages like the ones below:
```
Fetching docker-compose file https://raw.githubusercontent.com/datahub-project/datahub/master/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml from GitHub
Pulling docker images...
Finished pulling docker images!
[+] Running 11/11
⠿ Container zookeeper Running 0.0s
⠿ Container elasticsearch Running 0.0s
⠿ Container broker Running 0.0s
⠿ Container schema-registry Running 0.0s
⠿ Container elasticsearch-setup Started 0.7s
⠿ Container kafka-setup Started 0.7s
⠿ Container mysql Running 0.0s
⠿ Container datahub-gms Running 0.0s
⠿ Container mysql-setup Started 0.7s
⠿ Container datahub-datahub-actions-1 Running 0.0s
⠿ Container datahub-frontend-react Running 0.0s
.......
✔ DataHub is now running
Ingest some demo data using `datahub docker ingest-sample-data`,
or head to http://localhost:9002 (username: datahub, password: datahub) to play around with the frontend.
Need support? Get in touch on Slack: https://slack.datahubproject.io/
```
Upon completion of this step, you should be able to navigate to the DataHub UI
at [http://localhost:9002](http://localhost:9002) in your browser. You can sign in using `datahub` as both the
username and password.
:::note
On Mac computers with Apple Silicon (M1, M2 etc.), you might see an error like `no matching manifest for linux/arm64/v8 in the manifest list entries`, this typically means that the datahub cli was not able to detect that you are running it on Apple Silicon. To resolve this issue, override the default architecture detection by issuing `datahub docker quickstart --arch m1`
:::
5. To ingest the sample metadata, run the following CLI command from your terminal
@ -100,6 +143,13 @@ By default the quickstart deploy will require the following ports to be free on
</details>
<details>
<summary>
no matching manifest for linux/arm64/v8 in the manifest list entries
</summary>
On Mac computers with Apple Silicon (M1, M2 etc.), you might see an error like `no matching manifest for linux/arm64/v8 in the manifest list entries`, this typically means that the datahub cli was not able to detect that you are running it on Apple Silicon. To resolve this issue, override the default architecture detection by issuing `datahub docker quickstart --arch m1`
</details>
<details>
<summary>
Miscellaneous Docker issues

View File

@ -1,3 +1,5 @@
# Get Started With DataHub
This page is under construction - more details coming soon!
To get started with running the open-source DataHub locally on your system (a.k.a Self-Hosted DataHub), head over to the [QuickStart section](../quickstart.md).
To get started with the [Acryl Data](https://acryldata.io) provided SaaS instance of DataHub (a.k.a Managed DataHub), head over to the [Managed DataHub section](../managed-datahub/saas-slack-setup.md).

View File

@ -1,3 +1,4 @@
import os
from contextlib import contextmanager
from typing import Iterator, List, Optional, Tuple
@ -45,11 +46,21 @@ MIN_MEMORY_NEEDED = 3.8 # GB
def get_client_with_error() -> Iterator[
Tuple[docker.DockerClient, Optional[Exception]]
]:
docker_cli = None
try:
docker_cli = docker.from_env()
except docker.errors.DockerException as error:
yield None, error
else:
try:
# newer docker versions create the socket in a user directory, try that before giving up
maybe_sock_path = os.path.expanduser("~/.docker/run/docker.sock")
if os.path.exists(maybe_sock_path):
docker_cli = docker.DockerClient(base_url=f"unix://{maybe_sock_path}")
else:
yield None, error
except docker.errors.DockerException as error:
yield None, error
if docker_cli is not None:
try:
yield docker_cli, None
finally:

View File

@ -9,6 +9,7 @@ import subprocess
import sys
import tempfile
import time
from enum import Enum
from pathlib import Path
from typing import Dict, List, NoReturn, Optional
@ -58,6 +59,13 @@ GITHUB_M1_QUICKSTART_COMPOSE_URL = f"{GITHUB_BASE_URL}/{M1_QUICKSTART_COMPOSE_FI
GITHUB_BOOTSTRAP_MCES_URL = f"{GITHUB_BASE_URL}/{BOOTSTRAP_MCES_FILE}"
class Architectures(Enum):
x86 = "x86"
arm64 = "arm64"
m1 = "m1"
m2 = "m2"
@functools.lru_cache()
def _docker_subprocess_env() -> Dict[str, str]:
try:
@ -125,6 +133,10 @@ def is_m1() -> bool:
return False
def is_arch_m1(arch: Architectures) -> bool:
return arch in [Architectures.arm64, Architectures.m1, Architectures.m2]
def should_use_neo4j_for_graph_service(graph_service_override: Optional[str]) -> bool:
if graph_service_override is not None:
if graph_service_override == "elasticsearch":
@ -383,6 +395,24 @@ DATAHUB_MAE_CONSUMER_PORT=9091
return result.returncode
def detect_quickstart_arch(arch: Optional[str]) -> Architectures:
running_on_m1 = is_m1()
if running_on_m1:
click.secho("Detected M1 machine", fg="yellow")
quickstart_arch = Architectures.x86 if not running_on_m1 else Architectures.arm64
if arch:
matched_arch = [a for a in Architectures if arch.lower() == a.value]
if not matched_arch:
click.secho(
f"Failed to match arch {arch} with list of architectures supported {[a.value for a in Architectures]}"
)
quickstart_arch = matched_arch[0]
click.secho(f"Using architecture {quickstart_arch}", fg="yellow")
return quickstart_arch
@docker.command()
@click.option(
"--version",
@ -518,6 +548,11 @@ DATAHUB_MAE_CONSUMER_PORT=9091
default=False,
help="Launches MAE & MCE consumers as stand alone docker containers",
)
@click.option(
"--arch",
required=False,
help="Specify the architecture for the quickstart images to use. Options are x86, arm64, m1 etc.",
)
@upgrade.check_upgrade
@telemetry.with_telemetry
def quickstart(
@ -540,6 +575,7 @@ def quickstart(
restore_indices: bool,
no_restore_indices: bool,
standalone_consumers: bool,
arch: Optional[str],
) -> None:
"""Start an instance of DataHub locally using docker-compose.
@ -567,9 +603,7 @@ def quickstart(
)
return
running_on_m1 = is_m1()
if running_on_m1:
click.secho("Detected M1 machine", fg="yellow")
quickstart_arch = detect_quickstart_arch(arch)
# Run pre-flight checks.
issues = check_local_docker_containers(preflight_only=True)
@ -590,16 +624,16 @@ def quickstart(
elif not quickstart_compose_file:
# download appropriate quickstart file
should_use_neo4j = should_use_neo4j_for_graph_service(graph_service_impl)
if should_use_neo4j and running_on_m1:
if should_use_neo4j and is_arch_m1(quickstart_arch):
click.secho(
"Running with neo4j on M1 is not currently supported, will be using elasticsearch as graph",
fg="red",
)
github_file = (
GITHUB_NEO4J_AND_ELASTIC_QUICKSTART_COMPOSE_URL
if should_use_neo4j and not running_on_m1
if should_use_neo4j and not is_arch_m1(quickstart_arch)
else GITHUB_ELASTIC_QUICKSTART_COMPOSE_URL
if not running_on_m1
if not is_arch_m1(quickstart_arch)
else GITHUB_M1_QUICKSTART_COMPOSE_URL
)