mirror of
https://github.com/datahub-project/datahub.git
synced 2025-07-05 08:07:04 +00:00
168 lines
5.3 KiB
YAML
168 lines
5.3 KiB
YAML
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
#
|
|
|
|
# Basic Airflow cluster configuration for CeleryExecutor with Redis and PostgreSQL.
|
|
#
|
|
# WARNING: This configuration is for local development. Do not use it in a production deployment.
|
|
#
|
|
# This configuration supports basic configuration using environment variables or an .env file
|
|
# The following variables are supported:
|
|
#
|
|
# AIRFLOW_IMAGE_NAME - Docker image name used to run Airflow.
|
|
# Default: apache/airflow:master-python3.8
|
|
# AIRFLOW_UID - User ID in Airflow containers
|
|
# Default: 50000
|
|
# AIRFLOW_GID - Group ID in Airflow containers
|
|
# Default: 50000
|
|
#
|
|
# Those configurations are useful mostly in case of standalone testing/running Airflow in test/try-out mode
|
|
#
|
|
# _AIRFLOW_WWW_USER_USERNAME - Username for the administrator account (if requested).
|
|
# Default: airflow
|
|
# _AIRFLOW_WWW_USER_PASSWORD - Password for the administrator account (if requested).
|
|
# Default: airflow
|
|
# _PIP_ADDITIONAL_REQUIREMENTS - Additional PIP requirements to add when starting all containers.
|
|
# Default: ''
|
|
#
|
|
# Feel free to modify this file to suit your needs.
|
|
---
|
|
x-airflow-common:
|
|
&airflow-common
|
|
image: ${AIRFLOW_IMAGE_NAME:-acryldata/airflow-datahub:latest}
|
|
environment:
|
|
&airflow-common-env
|
|
AIRFLOW__CORE__EXECUTOR: CeleryExecutor
|
|
AIRFLOW__CORE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@postgres/airflow
|
|
AIRFLOW__CELERY__RESULT_BACKEND: db+postgresql://airflow:airflow@postgres/airflow
|
|
AIRFLOW__CELERY__BROKER_URL: redis://:@redis:6379/0
|
|
AIRFLOW__CORE__FERNET_KEY: ''
|
|
AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION: 'true'
|
|
AIRFLOW__CORE__LOAD_EXAMPLES: 'true'
|
|
AIRFLOW__API__AUTH_BACKEND: 'airflow.api.auth.backend.basic_auth'
|
|
AIRFLOW__LINEAGE__BACKEND: 'datahub_provider.lineage.datahub.DatahubLineageBackend'
|
|
AIRFLOW__LINEAGE__DATAHUB_KWARGS: '{ "datahub_conn_id": "datahub_rest_default", "capture_ownership_info": true, "capture_tags_info": true, "graceful_exceptions": false }'
|
|
_PIP_ADDITIONAL_REQUIREMENTS: ${_PIP_ADDITIONAL_REQUIREMENTS:-}
|
|
volumes:
|
|
- ./dags:/opt/airflow/dags
|
|
- ./logs:/opt/airflow/logs
|
|
- ./plugins:/opt/airflow/plugins
|
|
user: "${AIRFLOW_UID:-50000}:${AIRFLOW_GID:-50000}"
|
|
depends_on:
|
|
redis:
|
|
condition: service_healthy
|
|
postgres:
|
|
condition: service_healthy
|
|
networks:
|
|
- datahub_network
|
|
|
|
services:
|
|
postgres:
|
|
image: postgres:13
|
|
hostname: postgres
|
|
environment:
|
|
POSTGRES_USER: airflow
|
|
POSTGRES_PASSWORD: airflow
|
|
POSTGRES_DB: airflow
|
|
volumes:
|
|
- postgres-db-volume:/var/lib/postgresql/data
|
|
networks:
|
|
- datahub_network
|
|
healthcheck:
|
|
test: ["CMD", "pg_isready", "-U", "airflow"]
|
|
interval: 5s
|
|
retries: 5
|
|
restart: always
|
|
|
|
redis:
|
|
image: redis:latest
|
|
hostname: redis
|
|
ports:
|
|
- 6379:6379
|
|
networks:
|
|
- datahub_network
|
|
healthcheck:
|
|
test: ["CMD", "redis-cli", "ping"]
|
|
interval: 5s
|
|
timeout: 30s
|
|
retries: 50
|
|
restart: always
|
|
|
|
airflow-webserver:
|
|
<<: *airflow-common
|
|
command: webserver
|
|
ports:
|
|
- 58080:8080
|
|
healthcheck:
|
|
test: ["CMD", "curl", "--fail", "http://localhost:58080/health"]
|
|
interval: 10s
|
|
timeout: 10s
|
|
retries: 5
|
|
restart: always
|
|
|
|
airflow-scheduler:
|
|
<<: *airflow-common
|
|
command: scheduler
|
|
healthcheck:
|
|
test: ["CMD-SHELL", 'airflow jobs check --job-type SchedulerJob --hostname "$${HOSTNAME}"']
|
|
interval: 10s
|
|
timeout: 10s
|
|
retries: 5
|
|
restart: always
|
|
|
|
airflow-worker:
|
|
<<: *airflow-common
|
|
command: celery worker
|
|
healthcheck:
|
|
test:
|
|
- "CMD-SHELL"
|
|
- 'celery --app airflow.executors.celery_executor.app inspect ping -d "celery@$${HOSTNAME}"'
|
|
interval: 10s
|
|
timeout: 10s
|
|
retries: 5
|
|
restart: always
|
|
|
|
airflow-init:
|
|
<<: *airflow-common
|
|
command: version
|
|
environment:
|
|
<<: *airflow-common-env
|
|
_AIRFLOW_DB_UPGRADE: 'true'
|
|
_AIRFLOW_WWW_USER_CREATE: 'true'
|
|
_AIRFLOW_WWW_USER_USERNAME: ${_AIRFLOW_WWW_USER_USERNAME:-airflow}
|
|
_AIRFLOW_WWW_USER_PASSWORD: ${_AIRFLOW_WWW_USER_PASSWORD:-airflow}
|
|
|
|
flower:
|
|
<<: *airflow-common
|
|
command: celery flower
|
|
ports:
|
|
- 5555:5555
|
|
healthcheck:
|
|
test: ["CMD", "curl", "--fail", "http://localhost:5555/"]
|
|
interval: 10s
|
|
timeout: 10s
|
|
retries: 5
|
|
restart: always
|
|
|
|
volumes:
|
|
postgres-db-volume:
|
|
|
|
networks:
|
|
datahub_network:
|
|
external: true
|
|
|