mirror of
https://github.com/datahub-project/datahub.git
synced 2025-06-27 05:03:31 +00:00
feat(elastic-as-graph): defaulting to elastic in quickstart (#2753)
This commit is contained in:
parent
82468016ae
commit
62ba937bb7
@ -34,12 +34,15 @@ The main components are powered by 4 external dependencies:
|
||||
- Kafka
|
||||
- Local DB (MySQL, Postgres, MariaDB)
|
||||
- Search Index (Elasticsearch)
|
||||
- Graph Index (Supports only Neo4j)
|
||||
- Graph Index (Supports either Neo4j or Elasticsearch)
|
||||
|
||||
The dependencies must be deployed before deploying Datahub. We created a separate
|
||||
[chart](https://github.com/linkedin/datahub/tree/master/datahub-kubernetes/prerequisites)
|
||||
for deploying the dependencies with example configuration. They could also be deployed
|
||||
separately on-prem or leveraged as managed services.
|
||||
separately on-prem or leveraged as managed services. To remove your dependency on Neo4j,
|
||||
set enabled to false in the `datahub-kubernetes/prerequisites/values.yaml` file.
|
||||
Then, override the `graph_service_impl` field in `datahub-kubernetes/datahub/values.yaml` to
|
||||
have the value `elasticsearch` instead of `neo4j`.
|
||||
|
||||
## Quickstart
|
||||
Assuming kubectl context points to the correct kubernetes cluster, first create kubernetes secrets that contain MySQL and Neo4j passwords.
|
||||
@ -130,5 +133,3 @@ to expose the 9002 port to the public.
|
||||
| helm uninstall datahub | Remove DataHub |
|
||||
| helm ls | List of Helm charts |
|
||||
| helm history | Fetch a release history |
|
||||
|
||||
|
||||
|
@ -32,7 +32,7 @@ Dependencies:
|
||||
* [Kafka, Zookeeper, and Schema Registry](kafka-setup)
|
||||
* [Elasticsearch](elasticsearch-setup)
|
||||
* [MySQL](mysql)
|
||||
* [Neo4j](neo4j)
|
||||
* [(Optional) Neo4j](neo4j)
|
||||
|
||||
### Ingesting demo data.
|
||||
|
||||
|
@ -15,4 +15,17 @@ DEFAULT_VERSION=$(echo $TAG_VERSION | sed 's/undefined/head/')
|
||||
export DATAHUB_VERSION=${DATAHUB_VERSION:-${DEFAULT_VERSION}}
|
||||
|
||||
echo "Quickstarting DataHub: version ${DATAHUB_VERSION}"
|
||||
cd $DIR && docker-compose pull && docker-compose -p datahub up
|
||||
if docker volume ls | grep -c -q datahub_neo4jdata
|
||||
then
|
||||
echo "Datahub Neo4j volume found, starting with neo4j as graph service"
|
||||
cd $DIR && docker-compose pull && docker-compose -p datahub up
|
||||
else
|
||||
echo "No Datahub Neo4j volume found, starting with elasticsearch as graph service"
|
||||
cd $DIR && \
|
||||
docker-compose \
|
||||
-f quickstart/docker-compose-without-neo4j.quickstart.yml \
|
||||
pull && \
|
||||
docker-compose -p datahub \
|
||||
-f quickstart/docker-compose-without-neo4j.quickstart.yml \
|
||||
up
|
||||
fi
|
||||
|
@ -17,11 +17,21 @@ from datahub.cli.docker_check import (
|
||||
)
|
||||
from datahub.ingestion.run.pipeline import Pipeline
|
||||
|
||||
SIMPLE_QUICKSTART_COMPOSE_FILE = "docker/quickstart/docker-compose.quickstart.yml"
|
||||
NEO4J_AND_ELASTIC_QUICKSTART_COMPOSE_FILE = (
|
||||
"docker/quickstart/docker-compose.quickstart.yml"
|
||||
)
|
||||
ELASTIC_QUICKSTART_COMPOSE_FILE = (
|
||||
"docker/quickstart/docker-compose-without-neo4j.quickstart.yml"
|
||||
)
|
||||
BOOTSTRAP_MCES_FILE = "metadata-ingestion/examples/mce_files/bootstrap_mce.json"
|
||||
|
||||
GITHUB_BASE_URL = "https://raw.githubusercontent.com/linkedin/datahub/master"
|
||||
GITHUB_QUICKSTART_COMPOSE_URL = f"{GITHUB_BASE_URL}/{SIMPLE_QUICKSTART_COMPOSE_FILE}"
|
||||
GITHUB_NEO4J_AND_ELASTIC_QUICKSTART_COMPOSE_URL = (
|
||||
f"{GITHUB_BASE_URL}/{NEO4J_AND_ELASTIC_QUICKSTART_COMPOSE_FILE}"
|
||||
)
|
||||
GITHUB_ELASTIC_QUICKSTART_COMPOSE_URL = (
|
||||
f"{GITHUB_BASE_URL}/{ELASTIC_QUICKSTART_COMPOSE_FILE}"
|
||||
)
|
||||
GITHUB_BOOTSTRAP_MCES_URL = f"{GITHUB_BASE_URL}/{BOOTSTRAP_MCES_FILE}"
|
||||
|
||||
|
||||
@ -58,6 +68,30 @@ def check() -> None:
|
||||
docker_check_impl()
|
||||
|
||||
|
||||
def check_neo4j_volume_exists():
|
||||
with get_client_with_error() as (client, error):
|
||||
if error:
|
||||
click.secho(
|
||||
"Docker doesn't seem to be running. Did you start it?", fg="red"
|
||||
)
|
||||
return
|
||||
|
||||
if len(client.volumes.list(filters={"name": "datahub_neo4jdata"})) > 0:
|
||||
click.echo(
|
||||
"Datahub Neo4j volume found, starting with neo4j as graph service.\n"
|
||||
"If you want to run using elastic, run `datahub docker nuke` and re-ingest your data.\n"
|
||||
)
|
||||
return True
|
||||
|
||||
click.echo(
|
||||
"No Datahub Neo4j volume found, starting with elasticsearch as graph service.\n"
|
||||
"To use neo4j as a graph backend, run \n"
|
||||
"`datahub docker quickstart --quickstart-compose-file ./docker/quickstart/docker-compose.quickstart.yml`"
|
||||
"\nfrom the root of the datahub repo\n"
|
||||
)
|
||||
return False
|
||||
|
||||
|
||||
@docker.command()
|
||||
@click.option(
|
||||
"--version",
|
||||
@ -115,7 +149,11 @@ def quickstart(
|
||||
quickstart_compose_file.append(path)
|
||||
|
||||
# Download the quickstart docker-compose file from GitHub.
|
||||
quickstart_download_response = requests.get(GITHUB_QUICKSTART_COMPOSE_URL)
|
||||
quickstart_download_response = requests.get(
|
||||
GITHUB_NEO4J_AND_ELASTIC_QUICKSTART_COMPOSE_URL
|
||||
if check_neo4j_volume_exists()
|
||||
else GITHUB_ELASTIC_QUICKSTART_COMPOSE_URL
|
||||
)
|
||||
quickstart_download_response.raise_for_status()
|
||||
tmp_file.write(quickstart_download_response.content)
|
||||
|
||||
|
@ -12,7 +12,6 @@ REQUIRED_CONTAINERS = [
|
||||
"schema-registry",
|
||||
"broker",
|
||||
"mysql",
|
||||
"neo4j",
|
||||
"zookeeper",
|
||||
# These two containers are not necessary - only helpful in debugging.
|
||||
# "kafka-topics-ui",
|
||||
@ -33,6 +32,7 @@ CONTAINERS_TO_CHECK_IF_PRESENT = [
|
||||
# We only add this container in some cases, but if it's present, we
|
||||
# definitely want to check that it exits properly.
|
||||
"mysql-setup",
|
||||
"neo4j",
|
||||
]
|
||||
|
||||
# Docker seems to under-report memory allocated, so we also need a bit of buffer to account for it.
|
||||
|
Loading…
x
Reference in New Issue
Block a user