mirror of
https://github.com/datahub-project/datahub.git
synced 2025-08-31 12:52:13 +00:00
feat(quickstart): Adding env variables and cli options for customizing mapped ports in quickstart (#5353)
Co-authored-by: Shirshanka Das <shirshanka@apache.org>
This commit is contained in:
parent
489b5bb5b4
commit
f3e5afdba9
@ -9,7 +9,7 @@ services:
|
|||||||
env_file: mysql/env/docker.env
|
env_file: mysql/env/docker.env
|
||||||
command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin
|
command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin
|
||||||
ports:
|
ports:
|
||||||
- "3306:3306"
|
- ${DATAHUB_MAPPED_MYSQL_PORT:-3306}:3306
|
||||||
volumes:
|
volumes:
|
||||||
- ./mysql/init.sql:/docker-entrypoint-initdb.d/init.sql
|
- ./mysql/init.sql:/docker-entrypoint-initdb.d/init.sql
|
||||||
- mysqldata:/var/lib/mysql
|
- mysqldata:/var/lib/mysql
|
||||||
|
@ -12,7 +12,7 @@ services:
|
|||||||
hostname: zookeeper
|
hostname: zookeeper
|
||||||
container_name: zookeeper
|
container_name: zookeeper
|
||||||
ports:
|
ports:
|
||||||
- "2181:2181"
|
- ${DATAHUB_MAPPED_ZK_PORT:-2181}:2181
|
||||||
volumes:
|
volumes:
|
||||||
- zkdata:/var/opt/zookeeper
|
- zkdata:/var/opt/zookeeper
|
||||||
|
|
||||||
@ -24,8 +24,7 @@ services:
|
|||||||
depends_on:
|
depends_on:
|
||||||
- zookeeper
|
- zookeeper
|
||||||
ports:
|
ports:
|
||||||
- "29092:29092"
|
- ${DATAHUB_MAPPED_KAFKA_BROKER_PORT:-9092}:9092
|
||||||
- "9092:9092"
|
|
||||||
volumes:
|
volumes:
|
||||||
- broker:/var/lib/kafka/data/
|
- broker:/var/lib/kafka/data/
|
||||||
|
|
||||||
@ -50,7 +49,7 @@ services:
|
|||||||
- zookeeper
|
- zookeeper
|
||||||
- broker
|
- broker
|
||||||
ports:
|
ports:
|
||||||
- "8081:8081"
|
- ${DATAHUB_MAPPED_SCHEMA_REGISTRY_PORT:-8081}:8081
|
||||||
|
|
||||||
elasticsearch:
|
elasticsearch:
|
||||||
image: elasticsearch:7.9.3
|
image: elasticsearch:7.9.3
|
||||||
@ -58,7 +57,7 @@ services:
|
|||||||
container_name: elasticsearch
|
container_name: elasticsearch
|
||||||
hostname: elasticsearch
|
hostname: elasticsearch
|
||||||
ports:
|
ports:
|
||||||
- "9200:9200"
|
- ${DATAHUB_MAPPED_ELASTIC_PORT:-9200}:9200
|
||||||
environment:
|
environment:
|
||||||
- discovery.type=single-node
|
- discovery.type=single-node
|
||||||
- xpack.security.enabled=false
|
- xpack.security.enabled=false
|
||||||
@ -75,8 +74,8 @@ services:
|
|||||||
hostname: neo4j
|
hostname: neo4j
|
||||||
container_name: neo4j
|
container_name: neo4j
|
||||||
ports:
|
ports:
|
||||||
- "7474:7474"
|
- ${DATAHUB_MAPPED_NEO4J_HTTP_PORT:-7474}:7474
|
||||||
- "7687:7687"
|
- ${DATAHUB_MAPPED_NEO4J_BOLT_PORT:-7687}:7687
|
||||||
volumes:
|
volumes:
|
||||||
- neo4jdata:/data
|
- neo4jdata:/data
|
||||||
|
|
||||||
@ -100,7 +99,7 @@ services:
|
|||||||
hostname: datahub-gms
|
hostname: datahub-gms
|
||||||
container_name: datahub-gms
|
container_name: datahub-gms
|
||||||
ports:
|
ports:
|
||||||
- "8080:8080"
|
- ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080
|
||||||
depends_on:
|
depends_on:
|
||||||
- elasticsearch-setup
|
- elasticsearch-setup
|
||||||
- kafka-setup
|
- kafka-setup
|
||||||
@ -116,7 +115,7 @@ services:
|
|||||||
hostname: datahub-frontend-react
|
hostname: datahub-frontend-react
|
||||||
container_name: datahub-frontend-react
|
container_name: datahub-frontend-react
|
||||||
ports:
|
ports:
|
||||||
- "9002:9002"
|
- ${DATAHUB_MAPPED_FRONTEND_PORT:-9002}:9002
|
||||||
depends_on:
|
depends_on:
|
||||||
- datahub-gms
|
- datahub-gms
|
||||||
volumes:
|
volumes:
|
||||||
|
@ -17,8 +17,7 @@ services:
|
|||||||
hostname: broker
|
hostname: broker
|
||||||
image: kymeric/cp-kafka:latest
|
image: kymeric/cp-kafka:latest
|
||||||
ports:
|
ports:
|
||||||
- 29092:29092
|
- ${DATAHUB_MAPPED_KAFKA_BROKER_PORT:-9092}:9092
|
||||||
- 9092:9092
|
|
||||||
datahub-actions:
|
datahub-actions:
|
||||||
depends_on:
|
depends_on:
|
||||||
- datahub-gms
|
- datahub-gms
|
||||||
@ -57,7 +56,7 @@ services:
|
|||||||
hostname: datahub-frontend-react
|
hostname: datahub-frontend-react
|
||||||
image: linkedin/datahub-frontend-react:${DATAHUB_VERSION:-head}
|
image: linkedin/datahub-frontend-react:${DATAHUB_VERSION:-head}
|
||||||
ports:
|
ports:
|
||||||
- 9002:9002
|
- ${DATAHUB_MAPPED_FRONTEND_PORT:-9002}:9002
|
||||||
volumes:
|
volumes:
|
||||||
- ${HOME}/.datahub/plugins:/etc/datahub/plugins
|
- ${HOME}/.datahub/plugins:/etc/datahub/plugins
|
||||||
datahub-gms:
|
datahub-gms:
|
||||||
@ -86,7 +85,7 @@ services:
|
|||||||
hostname: datahub-gms
|
hostname: datahub-gms
|
||||||
image: linkedin/datahub-gms:${DATAHUB_VERSION:-head}
|
image: linkedin/datahub-gms:${DATAHUB_VERSION:-head}
|
||||||
ports:
|
ports:
|
||||||
- 8080:8080
|
- ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080
|
||||||
volumes:
|
volumes:
|
||||||
- ${HOME}/.datahub/plugins:/etc/datahub/plugins
|
- ${HOME}/.datahub/plugins:/etc/datahub/plugins
|
||||||
elasticsearch:
|
elasticsearch:
|
||||||
@ -106,7 +105,7 @@ services:
|
|||||||
image: elasticsearch:7.9.3
|
image: elasticsearch:7.9.3
|
||||||
mem_limit: 1g
|
mem_limit: 1g
|
||||||
ports:
|
ports:
|
||||||
- 9200:9200
|
- ${DATAHUB_MAPPED_ELASTIC_PORT:-9200}:9200
|
||||||
volumes:
|
volumes:
|
||||||
- esdata:/usr/share/elasticsearch/data
|
- esdata:/usr/share/elasticsearch/data
|
||||||
elasticsearch-setup:
|
elasticsearch-setup:
|
||||||
@ -140,7 +139,7 @@ services:
|
|||||||
hostname: mysql
|
hostname: mysql
|
||||||
image: mariadb:10.5.8
|
image: mariadb:10.5.8
|
||||||
ports:
|
ports:
|
||||||
- 3306:3306
|
- ${DATAHUB_MAPPED_MYSQL_PORT:-3306}:3306
|
||||||
volumes:
|
volumes:
|
||||||
- ../mysql/init.sql:/docker-entrypoint-initdb.d/init.sql
|
- ../mysql/init.sql:/docker-entrypoint-initdb.d/init.sql
|
||||||
- mysqldata:/var/lib/mysql
|
- mysqldata:/var/lib/mysql
|
||||||
@ -167,7 +166,7 @@ services:
|
|||||||
hostname: schema-registry
|
hostname: schema-registry
|
||||||
image: eugenetea/schema-registry-arm64:latest
|
image: eugenetea/schema-registry-arm64:latest
|
||||||
ports:
|
ports:
|
||||||
- 8081:8081
|
- ${DATAHUB_MAPPED_SCHEMA_REGISTRY_PORT:-8081}:8081
|
||||||
zookeeper:
|
zookeeper:
|
||||||
container_name: zookeeper
|
container_name: zookeeper
|
||||||
environment:
|
environment:
|
||||||
@ -176,7 +175,7 @@ services:
|
|||||||
hostname: zookeeper
|
hostname: zookeeper
|
||||||
image: kymeric/cp-zookeeper:latest
|
image: kymeric/cp-zookeeper:latest
|
||||||
ports:
|
ports:
|
||||||
- 2181:2181
|
- ${DATAHUB_MAPPED_ZK_PORT:-2181}:2181
|
||||||
volumes:
|
volumes:
|
||||||
- zkdata:/var/opt/zookeeper
|
- zkdata:/var/opt/zookeeper
|
||||||
version: '2.3'
|
version: '2.3'
|
||||||
|
@ -17,8 +17,7 @@ services:
|
|||||||
hostname: broker
|
hostname: broker
|
||||||
image: confluentinc/cp-kafka:5.4.0
|
image: confluentinc/cp-kafka:5.4.0
|
||||||
ports:
|
ports:
|
||||||
- 29092:29092
|
- ${DATAHUB_MAPPED_KAFKA_BROKER_PORT:-9092}:9092
|
||||||
- 9092:9092
|
|
||||||
volumes:
|
volumes:
|
||||||
- broker:/var/lib/kafka/data/
|
- broker:/var/lib/kafka/data/
|
||||||
datahub-actions:
|
datahub-actions:
|
||||||
@ -59,7 +58,7 @@ services:
|
|||||||
hostname: datahub-frontend-react
|
hostname: datahub-frontend-react
|
||||||
image: linkedin/datahub-frontend-react:${DATAHUB_VERSION:-head}
|
image: linkedin/datahub-frontend-react:${DATAHUB_VERSION:-head}
|
||||||
ports:
|
ports:
|
||||||
- 9002:9002
|
- ${DATAHUB_MAPPED_FRONTEND_PORT:-9002}:9002
|
||||||
volumes:
|
volumes:
|
||||||
- ${HOME}/.datahub/plugins:/etc/datahub/plugins
|
- ${HOME}/.datahub/plugins:/etc/datahub/plugins
|
||||||
datahub-gms:
|
datahub-gms:
|
||||||
@ -94,7 +93,7 @@ services:
|
|||||||
hostname: datahub-gms
|
hostname: datahub-gms
|
||||||
image: linkedin/datahub-gms:${DATAHUB_VERSION:-head}
|
image: linkedin/datahub-gms:${DATAHUB_VERSION:-head}
|
||||||
ports:
|
ports:
|
||||||
- 8080:8080
|
- ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080
|
||||||
volumes:
|
volumes:
|
||||||
- ${HOME}/.datahub/plugins/:/etc/datahub/plugins
|
- ${HOME}/.datahub/plugins/:/etc/datahub/plugins
|
||||||
elasticsearch:
|
elasticsearch:
|
||||||
@ -114,7 +113,7 @@ services:
|
|||||||
image: elasticsearch:7.9.3
|
image: elasticsearch:7.9.3
|
||||||
mem_limit: 1g
|
mem_limit: 1g
|
||||||
ports:
|
ports:
|
||||||
- 9200:9200
|
- ${DATAHUB_MAPPED_ELASTIC_PORT:-9200}:9200
|
||||||
volumes:
|
volumes:
|
||||||
- esdata:/usr/share/elasticsearch/data
|
- esdata:/usr/share/elasticsearch/data
|
||||||
elasticsearch-setup:
|
elasticsearch-setup:
|
||||||
@ -148,7 +147,7 @@ services:
|
|||||||
hostname: mysql
|
hostname: mysql
|
||||||
image: mysql:5.7
|
image: mysql:5.7
|
||||||
ports:
|
ports:
|
||||||
- 3306:3306
|
- ${DATAHUB_MAPPED_MYSQL_PORT:-3306}:3306
|
||||||
volumes:
|
volumes:
|
||||||
- ../mysql/init.sql:/docker-entrypoint-initdb.d/init.sql
|
- ../mysql/init.sql:/docker-entrypoint-initdb.d/init.sql
|
||||||
- mysqldata:/var/lib/mysql
|
- mysqldata:/var/lib/mysql
|
||||||
@ -173,8 +172,8 @@ services:
|
|||||||
hostname: neo4j
|
hostname: neo4j
|
||||||
image: neo4j:4.0.6
|
image: neo4j:4.0.6
|
||||||
ports:
|
ports:
|
||||||
- 7474:7474
|
- ${DATAHUB_MAPPED_NEO4J_HTTP_PORT:-7474}:7474
|
||||||
- 7687:7687
|
- ${DATAHUB_MAPPED_NEO4J_BOLT_PORT:-7687}:7687
|
||||||
volumes:
|
volumes:
|
||||||
- neo4jdata:/data
|
- neo4jdata:/data
|
||||||
schema-registry:
|
schema-registry:
|
||||||
@ -188,7 +187,7 @@ services:
|
|||||||
hostname: schema-registry
|
hostname: schema-registry
|
||||||
image: confluentinc/cp-schema-registry:5.4.0
|
image: confluentinc/cp-schema-registry:5.4.0
|
||||||
ports:
|
ports:
|
||||||
- 8081:8081
|
- ${DATAHUB_MAPPED_SCHEMA_REGISTRY_PORT:-8081}:8081
|
||||||
zookeeper:
|
zookeeper:
|
||||||
container_name: zookeeper
|
container_name: zookeeper
|
||||||
environment:
|
environment:
|
||||||
@ -197,7 +196,7 @@ services:
|
|||||||
hostname: zookeeper
|
hostname: zookeeper
|
||||||
image: confluentinc/cp-zookeeper:5.4.0
|
image: confluentinc/cp-zookeeper:5.4.0
|
||||||
ports:
|
ports:
|
||||||
- 2181:2181
|
- ${DATAHUB_MAPPED_ZK_PORT:-2181}:2181
|
||||||
volumes:
|
volumes:
|
||||||
- zkdata:/var/opt/zookeeper
|
- zkdata:/var/opt/zookeeper
|
||||||
version: '2.3'
|
version: '2.3'
|
||||||
|
@ -42,10 +42,6 @@ To deploy a new instance of DataHub, perform the following steps.
|
|||||||
at [http://localhost:9002](http://localhost:9002) in your browser. You can sign in using `datahub` as both the
|
at [http://localhost:9002](http://localhost:9002) in your browser. You can sign in using `datahub` as both the
|
||||||
username and password.
|
username and password.
|
||||||
|
|
||||||
If you would like to modify/configure the DataHub installation in some way, please download the [docker-compose.yaml](https://raw.githubusercontent.com/datahub-project/datahub/master/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml) used by the cli tool, modify it as necessary and deploy DataHub by passing the downloaded docker-compose file:
|
|
||||||
```
|
|
||||||
datahub docker quickstart --quickstart-compose-file <path to compose file>
|
|
||||||
```
|
|
||||||
|
|
||||||
5. To ingest the sample metadata, run the following CLI command from your terminal
|
5. To ingest the sample metadata, run the following CLI command from your terminal
|
||||||
|
|
||||||
@ -62,48 +58,11 @@ using the `--token <token>` parameter in the command.
|
|||||||
|
|
||||||
That's it! Now feel free to play around with DataHub!
|
That's it! Now feel free to play around with DataHub!
|
||||||
|
|
||||||
## Next Steps
|
## Troubleshooting Issues
|
||||||
|
|
||||||
### Ingest Metadata
|
<details><summary>
|
||||||
|
Command not found: datahub
|
||||||
To start pushing your company's metadata into DataHub, take a look at the [Metadata Ingestion Framework](../metadata-ingestion/README.md).
|
</summary>
|
||||||
|
|
||||||
### Invite Users
|
|
||||||
|
|
||||||
To add users to your deployment to share with your team check out our [Adding Users to DataHub](authentication/guides/add-users.md)
|
|
||||||
|
|
||||||
### Enable Authentication
|
|
||||||
|
|
||||||
To enable SSO, check out [Configuring OIDC Authentication](authentication/guides/sso/configure-oidc-react.md) or [Configuring JaaS Authentication](authentication/guides/jaas.md).
|
|
||||||
|
|
||||||
To enable backend Authentication, check out [authentication in DataHub's backend](authentication/introducing-metadata-service-authentication.md#Configuring Metadata Service Authentication).
|
|
||||||
|
|
||||||
### Move to Production
|
|
||||||
|
|
||||||
We recommend deploying DataHub to production using Kubernetes. We provide helpful [Helm Charts](https://artifacthub.io/packages/helm/datahub/datahub) to help you quickly get up and running. Check out [Deploying DataHub to Kubernetes](./deploy/kubernetes.md) for a step-by-step walkthrough.
|
|
||||||
|
|
||||||
## Resetting DataHub
|
|
||||||
|
|
||||||
To cleanse DataHub of all of it's state (e.g. before ingesting your own), you can use the CLI `nuke` command.
|
|
||||||
|
|
||||||
```
|
|
||||||
datahub docker nuke
|
|
||||||
```
|
|
||||||
|
|
||||||
## Updating DataHub locally
|
|
||||||
|
|
||||||
If you have been testing DataHub locally, a new version of DataHub got released and you want to try the new version then you can use below commands.
|
|
||||||
|
|
||||||
```
|
|
||||||
datahub docker nuke --keep-data
|
|
||||||
datahub docker quickstart
|
|
||||||
```
|
|
||||||
|
|
||||||
This will keep the data that you have ingested so far in DataHub and start a new quickstart with the latest version of DataHub.
|
|
||||||
|
|
||||||
## Troubleshooting
|
|
||||||
|
|
||||||
### Command not found: datahub
|
|
||||||
|
|
||||||
If running the datahub cli produces "command not found" errors inside your terminal, your system may be defaulting to an
|
If running the datahub cli produces "command not found" errors inside your terminal, your system may be defaulting to an
|
||||||
older version of Python. Try prefixing your `datahub` commands with `python3 -m`:
|
older version of Python. Try prefixing your `datahub` commands with `python3 -m`:
|
||||||
@ -119,9 +78,31 @@ if [ -d "$HOME/.local/bin" ] ; then
|
|||||||
PATH="$HOME/.local/bin:$PATH"
|
PATH="$HOME/.local/bin:$PATH"
|
||||||
fi
|
fi
|
||||||
```
|
```
|
||||||
|
</details>
|
||||||
|
|
||||||
### Miscellaneous Docker issues
|
<details>
|
||||||
|
<summary>
|
||||||
|
Port Conflicts
|
||||||
|
</summary>
|
||||||
|
|
||||||
|
By default the quickstart deploy will require the following ports to be free on your local machine:
|
||||||
|
- 3306 for MySQL
|
||||||
|
- 9200 for Elasticsearch
|
||||||
|
- 9092 for the Kafka broker
|
||||||
|
- 8081 for Schema Registry
|
||||||
|
- 2181 for ZooKeeper
|
||||||
|
- 9002 for the DataHub Web Application (datahub-frontend)
|
||||||
|
- 8080 for the DataHub Metadata Service (datahub-gms)
|
||||||
|
|
||||||
|
In case the default ports conflict with software you are already running on your machine, you can override these ports by passing additional flags to the `datahub docker quickstart` command.
|
||||||
|
e.g. To override the MySQL port with 53306 (instead of the default 3306), you can say: `datahub docker quickstart --mysql-port 53306`. Use `datahub docker quickstart --help` to see all the supported options.
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
|
<details>
|
||||||
|
<summary>
|
||||||
|
Miscellaneous Docker issues
|
||||||
|
</summary>
|
||||||
There can be misc issues with Docker, like conflicting containers and dangling volumes, that can often be resolved by
|
There can be misc issues with Docker, like conflicting containers and dangling volumes, that can often be resolved by
|
||||||
pruning your Docker state with the following command. Note that this command removes all unused containers, networks,
|
pruning your Docker state with the following command. Note that this command removes all unused containers, networks,
|
||||||
images (both dangling and unreferenced), and optionally, volumes.
|
images (both dangling and unreferenced), and optionally, volumes.
|
||||||
@ -129,3 +110,66 @@ images (both dangling and unreferenced), and optionally, volumes.
|
|||||||
```
|
```
|
||||||
docker system prune
|
docker system prune
|
||||||
```
|
```
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
|
<details>
|
||||||
|
<summary>
|
||||||
|
Still stuck?
|
||||||
|
</summary>
|
||||||
|
Hop over to our [Slack community](https://slack.datahubproject.io) and ask for help in the [#troubleshoot](https://datahubspace.slack.com/archives/C029A3M079U) channel!
|
||||||
|
</details>
|
||||||
|
|
||||||
|
## Next Steps
|
||||||
|
|
||||||
|
### Ingest Metadata
|
||||||
|
|
||||||
|
To start pushing your company's metadata into DataHub, take a look at [UI-based Ingestion Guide](./ui-ingestion.md), or to run ingestion using the cli, look at the [Metadata Ingestion Guide](../metadata-ingestion/README.md).
|
||||||
|
|
||||||
|
### Invite Users
|
||||||
|
|
||||||
|
To add users to your deployment to share with your team check out our [Adding Users to DataHub](authentication/guides/add-users.md)
|
||||||
|
|
||||||
|
### Enable Authentication
|
||||||
|
|
||||||
|
To enable SSO, check out [Configuring OIDC Authentication](authentication/guides/sso/configure-oidc-react.md) or [Configuring JaaS Authentication](authentication/guides/jaas.md).
|
||||||
|
|
||||||
|
To enable backend Authentication, check out [authentication in DataHub's backend](authentication/introducing-metadata-service-authentication.md#Configuring Metadata Service Authentication).
|
||||||
|
|
||||||
|
### Move to Production
|
||||||
|
|
||||||
|
We recommend deploying DataHub to production using Kubernetes. We provide helpful [Helm Charts](https://artifacthub.io/packages/helm/datahub/datahub) to help you quickly get up and running. Check out [Deploying DataHub to Kubernetes](./deploy/kubernetes.md) for a step-by-step walkthrough.
|
||||||
|
|
||||||
|
## Other Common Operations
|
||||||
|
|
||||||
|
### Stopping DataHub
|
||||||
|
|
||||||
|
To stop DataHub's quickstart, you can issue the following command.
|
||||||
|
|
||||||
|
```
|
||||||
|
datahub docker quickstart --stop
|
||||||
|
```
|
||||||
|
|
||||||
|
### Resetting DataHub
|
||||||
|
|
||||||
|
To cleanse DataHub of all of its state (e.g. before ingesting your own), you can use the CLI `nuke` command.
|
||||||
|
|
||||||
|
```
|
||||||
|
datahub docker nuke
|
||||||
|
```
|
||||||
|
|
||||||
|
### Upgrading your local DataHub
|
||||||
|
|
||||||
|
If you have been testing DataHub locally, a new version of DataHub got released and you want to try the new version then you can just issue the quickstart command again. It will pull down newer images and restart your instance without losing any data.
|
||||||
|
|
||||||
|
```
|
||||||
|
datahub docker quickstart
|
||||||
|
```
|
||||||
|
|
||||||
|
### Customization
|
||||||
|
|
||||||
|
If you would like to customize the DataHub installation further, please download the [docker-compose.yaml](https://raw.githubusercontent.com/datahub-project/datahub/master/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml) used by the cli tool, modify it as necessary and deploy DataHub by passing the downloaded docker-compose file:
|
||||||
|
```
|
||||||
|
datahub docker quickstart --quickstart-compose-file <path to compose file>
|
||||||
|
```
|
||||||
|
|
||||||
|
@ -68,7 +68,7 @@ kafka_common = {
|
|||||||
# At the same time, we use Kafka's AvroSerializer, which internally relies on
|
# At the same time, we use Kafka's AvroSerializer, which internally relies on
|
||||||
# fastavro for serialization. We do not use confluent_kafka[avro], since it
|
# fastavro for serialization. We do not use confluent_kafka[avro], since it
|
||||||
# is incompatible with its own dep on avro-python3.
|
# is incompatible with its own dep on avro-python3.
|
||||||
"confluent_kafka>=1.5.0,<1.9.0",
|
"confluent_kafka>=1.5.0",
|
||||||
"fastavro>=1.2.0",
|
"fastavro>=1.2.0",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -11,6 +11,7 @@ import time
|
|||||||
from typing import List, NoReturn, Optional
|
from typing import List, NoReturn, Optional
|
||||||
|
|
||||||
import click
|
import click
|
||||||
|
import pydantic
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from datahub.cli.docker_check import (
|
from datahub.cli.docker_check import (
|
||||||
@ -131,6 +132,80 @@ def should_use_neo4j_for_graph_service(graph_service_override: Optional[str]) ->
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _set_environment_variables(
|
||||||
|
version: Optional[str],
|
||||||
|
mysql_port: Optional[pydantic.PositiveInt],
|
||||||
|
zk_port: Optional[pydantic.PositiveInt],
|
||||||
|
kafka_broker_port: Optional[pydantic.PositiveInt],
|
||||||
|
schema_registry_port: Optional[pydantic.PositiveInt],
|
||||||
|
elastic_port: Optional[pydantic.PositiveInt],
|
||||||
|
) -> None:
|
||||||
|
if version is not None:
|
||||||
|
os.environ["DATAHUB_VERSION"] = version
|
||||||
|
if mysql_port is not None:
|
||||||
|
os.environ["DATAHUB_MAPPED_MYSQL_PORT"] = str(mysql_port)
|
||||||
|
|
||||||
|
if zk_port is not None:
|
||||||
|
os.environ["DATAHUB_MAPPED_ZK_PORT"] = str(zk_port)
|
||||||
|
|
||||||
|
if kafka_broker_port is not None:
|
||||||
|
os.environ["DATAHUB_MAPPED_KAFKA_BROKER_PORT"] = str(kafka_broker_port)
|
||||||
|
|
||||||
|
if schema_registry_port is not None:
|
||||||
|
os.environ["DATAHUB_MAPPED_SCHEMA_REGISTRY_PORT"] = str(schema_registry_port)
|
||||||
|
|
||||||
|
if elastic_port is not None:
|
||||||
|
os.environ["DATAHUB_MAPPED_ELASTIC_PORT"] = str(elastic_port)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_default_quickstart_compose_file() -> Optional[str]:
|
||||||
|
home = os.environ["HOME"]
|
||||||
|
if home:
|
||||||
|
try:
|
||||||
|
os.makedirs(f"{home}/.datahub/quickstart", exist_ok=True)
|
||||||
|
return f"{home}/.datahub/quickstart/docker-compose.yml"
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(
|
||||||
|
f"Failed to identify a default quickstart compose file due to {e}"
|
||||||
|
)
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _attempt_stop(quickstart_compose_file: List[pathlib.Path]) -> None:
|
||||||
|
default_quickstart_compose_file = _get_default_quickstart_compose_file()
|
||||||
|
compose_files_for_stopping = (
|
||||||
|
quickstart_compose_file
|
||||||
|
if quickstart_compose_file
|
||||||
|
else [pathlib.Path(default_quickstart_compose_file)]
|
||||||
|
if default_quickstart_compose_file
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
if compose_files_for_stopping:
|
||||||
|
# docker-compose stop
|
||||||
|
base_command: List[str] = [
|
||||||
|
"docker-compose",
|
||||||
|
*itertools.chain.from_iterable(
|
||||||
|
("-f", f"{path}") for path in compose_files_for_stopping
|
||||||
|
),
|
||||||
|
"-p",
|
||||||
|
"datahub",
|
||||||
|
]
|
||||||
|
try:
|
||||||
|
logger.debug(f"Executing {base_command} stop")
|
||||||
|
subprocess.run(
|
||||||
|
[*base_command, "stop"],
|
||||||
|
check=True,
|
||||||
|
)
|
||||||
|
click.secho("Stopped datahub successfully.", fg="green")
|
||||||
|
except subprocess.CalledProcessError:
|
||||||
|
click.secho(
|
||||||
|
"Error while stopping.",
|
||||||
|
fg="red",
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
@docker.command()
|
@docker.command()
|
||||||
@click.option(
|
@click.option(
|
||||||
"--version",
|
"--version",
|
||||||
@ -166,6 +241,48 @@ def should_use_neo4j_for_graph_service(graph_service_override: Optional[str]) ->
|
|||||||
default=None,
|
default=None,
|
||||||
help="If set, forces docker-compose to use that graph service implementation",
|
help="If set, forces docker-compose to use that graph service implementation",
|
||||||
)
|
)
|
||||||
|
@click.option(
|
||||||
|
"--mysql-port",
|
||||||
|
type=pydantic.PositiveInt,
|
||||||
|
is_flag=False,
|
||||||
|
default=None,
|
||||||
|
help="If there is an existing mysql instance running on port 3306, set this to a free port to avoid port conflicts on startup",
|
||||||
|
)
|
||||||
|
@click.option(
|
||||||
|
"--zk-port",
|
||||||
|
type=pydantic.PositiveInt,
|
||||||
|
is_flag=False,
|
||||||
|
default=None,
|
||||||
|
help="If there is an existing zookeeper instance running on port 2181, set this to a free port to avoid port conflicts on startup",
|
||||||
|
)
|
||||||
|
@click.option(
|
||||||
|
"--kafka-broker-port",
|
||||||
|
type=pydantic.PositiveInt,
|
||||||
|
is_flag=False,
|
||||||
|
default=None,
|
||||||
|
help="If there is an existing Kafka broker running on port 9092, set this to a free port to avoid port conflicts on startup",
|
||||||
|
)
|
||||||
|
@click.option(
|
||||||
|
"--schema-registry-port",
|
||||||
|
type=pydantic.PositiveInt,
|
||||||
|
is_flag=False,
|
||||||
|
default=None,
|
||||||
|
help="If there is an existing process running on port 8081, set this to a free port to avoid port conflicts with Kafka schema registry on startup",
|
||||||
|
)
|
||||||
|
@click.option(
|
||||||
|
"--elastic-port",
|
||||||
|
type=pydantic.PositiveInt,
|
||||||
|
is_flag=False,
|
||||||
|
default=None,
|
||||||
|
help="If there is an existing Elasticsearch instance running on port 9092, set this to a free port to avoid port conflicts on startup",
|
||||||
|
)
|
||||||
|
@click.option(
|
||||||
|
"--stop",
|
||||||
|
type=bool,
|
||||||
|
is_flag=True,
|
||||||
|
default=False,
|
||||||
|
help="Use this flag to stop the running containers",
|
||||||
|
)
|
||||||
@upgrade.check_upgrade
|
@upgrade.check_upgrade
|
||||||
@telemetry.with_telemetry
|
@telemetry.with_telemetry
|
||||||
def quickstart(
|
def quickstart(
|
||||||
@ -174,6 +291,12 @@ def quickstart(
|
|||||||
quickstart_compose_file: List[pathlib.Path],
|
quickstart_compose_file: List[pathlib.Path],
|
||||||
dump_logs_on_failure: bool,
|
dump_logs_on_failure: bool,
|
||||||
graph_service_impl: Optional[str],
|
graph_service_impl: Optional[str],
|
||||||
|
mysql_port: Optional[pydantic.PositiveInt],
|
||||||
|
zk_port: Optional[pydantic.PositiveInt],
|
||||||
|
kafka_broker_port: Optional[pydantic.PositiveInt],
|
||||||
|
schema_registry_port: Optional[pydantic.PositiveInt],
|
||||||
|
elastic_port: Optional[pydantic.PositiveInt],
|
||||||
|
stop: bool,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Start an instance of DataHub locally using docker-compose.
|
"""Start an instance of DataHub locally using docker-compose.
|
||||||
|
|
||||||
@ -185,7 +308,7 @@ def quickstart(
|
|||||||
|
|
||||||
running_on_m1 = is_m1()
|
running_on_m1 = is_m1()
|
||||||
if running_on_m1:
|
if running_on_m1:
|
||||||
click.echo("Detected M1 machine")
|
click.secho("Detected M1 machine", fg="yellow")
|
||||||
|
|
||||||
# Run pre-flight checks.
|
# Run pre-flight checks.
|
||||||
issues = check_local_docker_containers(preflight_only=True)
|
issues = check_local_docker_containers(preflight_only=True)
|
||||||
@ -195,7 +318,13 @@ def quickstart(
|
|||||||
quickstart_compose_file = list(
|
quickstart_compose_file = list(
|
||||||
quickstart_compose_file
|
quickstart_compose_file
|
||||||
) # convert to list from tuple
|
) # convert to list from tuple
|
||||||
if not quickstart_compose_file:
|
|
||||||
|
default_quickstart_compose_file = _get_default_quickstart_compose_file()
|
||||||
|
if stop:
|
||||||
|
_attempt_stop(quickstart_compose_file)
|
||||||
|
return
|
||||||
|
elif not quickstart_compose_file:
|
||||||
|
# download appropriate quickstart file
|
||||||
should_use_neo4j = should_use_neo4j_for_graph_service(graph_service_impl)
|
should_use_neo4j = should_use_neo4j_for_graph_service(graph_service_impl)
|
||||||
if should_use_neo4j and running_on_m1:
|
if should_use_neo4j and running_on_m1:
|
||||||
click.secho(
|
click.secho(
|
||||||
@ -210,7 +339,11 @@ def quickstart(
|
|||||||
else GITHUB_M1_QUICKSTART_COMPOSE_URL
|
else GITHUB_M1_QUICKSTART_COMPOSE_URL
|
||||||
)
|
)
|
||||||
|
|
||||||
with tempfile.NamedTemporaryFile(suffix=".yml", delete=False) as tmp_file:
|
with open(
|
||||||
|
default_quickstart_compose_file, "wb"
|
||||||
|
) if default_quickstart_compose_file else tempfile.NamedTemporaryFile(
|
||||||
|
suffix=".yml", delete=False
|
||||||
|
) as tmp_file:
|
||||||
path = pathlib.Path(tmp_file.name)
|
path = pathlib.Path(tmp_file.name)
|
||||||
quickstart_compose_file.append(path)
|
quickstart_compose_file.append(path)
|
||||||
click.echo(f"Fetching docker-compose file {github_file} from GitHub")
|
click.echo(f"Fetching docker-compose file {github_file} from GitHub")
|
||||||
@ -221,8 +354,14 @@ def quickstart(
|
|||||||
logger.debug(f"Copied to {path}")
|
logger.debug(f"Copied to {path}")
|
||||||
|
|
||||||
# set version
|
# set version
|
||||||
if version is not None:
|
_set_environment_variables(
|
||||||
os.environ["DATAHUB_VERSION"] = version
|
version=version,
|
||||||
|
mysql_port=mysql_port,
|
||||||
|
zk_port=zk_port,
|
||||||
|
kafka_broker_port=kafka_broker_port,
|
||||||
|
schema_registry_port=schema_registry_port,
|
||||||
|
elastic_port=elastic_port,
|
||||||
|
)
|
||||||
|
|
||||||
base_command: List[str] = [
|
base_command: List[str] = [
|
||||||
"docker-compose",
|
"docker-compose",
|
||||||
|
@ -26,7 +26,7 @@ from datahub.utilities.server_config_util import get_gms_config
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# Configure some loggers.
|
# Configure some loggers.
|
||||||
logging.getLogger("urllib3").setLevel(logging.WARNING)
|
logging.getLogger("urllib3").setLevel(logging.ERROR)
|
||||||
logging.getLogger("snowflake").setLevel(level=logging.WARNING)
|
logging.getLogger("snowflake").setLevel(level=logging.WARNING)
|
||||||
# logging.getLogger("botocore").setLevel(logging.INFO)
|
# logging.getLogger("botocore").setLevel(logging.INFO)
|
||||||
# logging.getLogger("google").setLevel(logging.INFO)
|
# logging.getLogger("google").setLevel(logging.INFO)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user