mirror of
				https://github.com/datahub-project/datahub.git
				synced 2025-11-03 20:27:50 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			169 lines
		
	
	
		
			5.3 KiB
		
	
	
	
		
			YAML
		
	
	
	
	
	
			
		
		
	
	
			169 lines
		
	
	
		
			5.3 KiB
		
	
	
	
		
			YAML
		
	
	
	
	
	
# Licensed to the Apache Software Foundation (ASF) under one
 | 
						|
# or more contributor license agreements.  See the NOTICE file
 | 
						|
# distributed with this work for additional information
 | 
						|
# regarding copyright ownership.  The ASF licenses this file
 | 
						|
# to you under the Apache License, Version 2.0 (the
 | 
						|
# "License"); you may not use this file except in compliance
 | 
						|
# with the License.  You may obtain a copy of the License at
 | 
						|
#
 | 
						|
#   http://www.apache.org/licenses/LICENSE-2.0
 | 
						|
#
 | 
						|
# Unless required by applicable law or agreed to in writing,
 | 
						|
# software distributed under the License is distributed on an
 | 
						|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 | 
						|
# KIND, either express or implied.  See the License for the
 | 
						|
# specific language governing permissions and limitations
 | 
						|
# under the License.
 | 
						|
#
 | 
						|
 | 
						|
# Basic Airflow cluster configuration for CeleryExecutor with Redis and PostgreSQL.
 | 
						|
#
 | 
						|
# WARNING: This configuration is for local development. Do not use it in a production deployment.
 | 
						|
#
 | 
						|
# This configuration supports basic configuration using environment variables or an .env file
 | 
						|
# The following variables are supported:
 | 
						|
#
 | 
						|
# AIRFLOW_IMAGE_NAME           - Docker image name used to run Airflow.
 | 
						|
#                                Default: apache/airflow:master-python3.8
 | 
						|
# AIRFLOW_UID                  - User ID in Airflow containers
 | 
						|
#                                Default: 50000
 | 
						|
# AIRFLOW_GID                  - Group ID in Airflow containers
 | 
						|
#                                Default: 50000
 | 
						|
#
 | 
						|
# Those configurations are useful mostly in case of standalone testing/running Airflow in test/try-out mode
 | 
						|
#
 | 
						|
# _AIRFLOW_WWW_USER_USERNAME   - Username for the administrator account (if requested).
 | 
						|
#                                Default: airflow
 | 
						|
# _AIRFLOW_WWW_USER_PASSWORD   - Password for the administrator account (if requested).
 | 
						|
#                                Default: airflow
 | 
						|
# _PIP_ADDITIONAL_REQUIREMENTS - Additional PIP requirements to add when starting all containers.
 | 
						|
#                                Default: ''
 | 
						|
#
 | 
						|
# Feel free to modify this file to suit your needs.
 | 
						|
---
 | 
						|
version: '3'
 | 
						|
x-airflow-common:
 | 
						|
  &airflow-common
 | 
						|
  image: ${AIRFLOW_IMAGE_NAME:-acryldata/airflow-datahub:latest}
 | 
						|
  environment:
 | 
						|
    &airflow-common-env
 | 
						|
    AIRFLOW__CORE__EXECUTOR: CeleryExecutor
 | 
						|
    AIRFLOW__CORE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@postgres/airflow
 | 
						|
    AIRFLOW__CELERY__RESULT_BACKEND: db+postgresql://airflow:airflow@postgres/airflow
 | 
						|
    AIRFLOW__CELERY__BROKER_URL: redis://:@redis:6379/0
 | 
						|
    AIRFLOW__CORE__FERNET_KEY: ''
 | 
						|
    AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION: 'true'
 | 
						|
    AIRFLOW__CORE__LOAD_EXAMPLES: 'true'
 | 
						|
    AIRFLOW__API__AUTH_BACKEND: 'airflow.api.auth.backend.basic_auth'
 | 
						|
    AIRFLOW__LINEAGE__BACKEND: 'datahub_provider.lineage.datahub.DatahubLineageBackend'
 | 
						|
    AIRFLOW__LINEAGE__DATAHUB_KWARGS: '{ "datahub_conn_id": "datahub_rest_default", "capture_ownership_info": true, "capture_tags_info": true, "graceful_exceptions": false }'
 | 
						|
    _PIP_ADDITIONAL_REQUIREMENTS: ${_PIP_ADDITIONAL_REQUIREMENTS:-}
 | 
						|
  volumes:
 | 
						|
    - ./dags:/opt/airflow/dags
 | 
						|
    - ./logs:/opt/airflow/logs
 | 
						|
    - ./plugins:/opt/airflow/plugins
 | 
						|
  user: "${AIRFLOW_UID:-50000}:${AIRFLOW_GID:-50000}"
 | 
						|
  depends_on:
 | 
						|
    redis:
 | 
						|
      condition: service_healthy
 | 
						|
    postgres:
 | 
						|
      condition: service_healthy
 | 
						|
  networks:
 | 
						|
    - datahub_network
 | 
						|
 | 
						|
services:
 | 
						|
  postgres:
 | 
						|
    image: postgres:13
 | 
						|
    hostname: postgres
 | 
						|
    environment:
 | 
						|
      POSTGRES_USER: airflow
 | 
						|
      POSTGRES_PASSWORD: airflow
 | 
						|
      POSTGRES_DB: airflow
 | 
						|
    volumes:
 | 
						|
      - postgres-db-volume:/var/lib/postgresql/data
 | 
						|
    networks:
 | 
						|
      - datahub_network
 | 
						|
    healthcheck:
 | 
						|
      test: ["CMD", "pg_isready", "-U", "airflow"]
 | 
						|
      interval: 5s
 | 
						|
      retries: 5
 | 
						|
    restart: always
 | 
						|
 | 
						|
  redis:
 | 
						|
    image: redis:latest
 | 
						|
    hostname: redis
 | 
						|
    ports:
 | 
						|
      - 6379:6379
 | 
						|
    networks:
 | 
						|
      - datahub_network
 | 
						|
    healthcheck:
 | 
						|
      test: ["CMD", "redis-cli", "ping"]
 | 
						|
      interval: 5s
 | 
						|
      timeout: 30s
 | 
						|
      retries: 50
 | 
						|
    restart: always
 | 
						|
 | 
						|
  airflow-webserver:
 | 
						|
    <<: *airflow-common
 | 
						|
    command: webserver
 | 
						|
    ports:
 | 
						|
      - 58080:8080
 | 
						|
    healthcheck:
 | 
						|
      test: ["CMD", "curl", "--fail", "http://localhost:58080/health"]
 | 
						|
      interval: 10s
 | 
						|
      timeout: 10s
 | 
						|
      retries: 5
 | 
						|
    restart: always
 | 
						|
 | 
						|
  airflow-scheduler:
 | 
						|
    <<: *airflow-common
 | 
						|
    command: scheduler
 | 
						|
    healthcheck:
 | 
						|
      test: ["CMD-SHELL", 'airflow jobs check --job-type SchedulerJob --hostname "$${HOSTNAME}"']
 | 
						|
      interval: 10s
 | 
						|
      timeout: 10s
 | 
						|
      retries: 5
 | 
						|
    restart: always
 | 
						|
 | 
						|
  airflow-worker:
 | 
						|
    <<: *airflow-common
 | 
						|
    command: celery worker
 | 
						|
    healthcheck:
 | 
						|
      test:
 | 
						|
        - "CMD-SHELL"
 | 
						|
        - 'celery --app airflow.executors.celery_executor.app inspect ping -d "celery@$${HOSTNAME}"'
 | 
						|
      interval: 10s
 | 
						|
      timeout: 10s
 | 
						|
      retries: 5
 | 
						|
    restart: always
 | 
						|
 | 
						|
  airflow-init:
 | 
						|
    <<: *airflow-common
 | 
						|
    command: version
 | 
						|
    environment:
 | 
						|
      <<: *airflow-common-env
 | 
						|
      _AIRFLOW_DB_UPGRADE: 'true'
 | 
						|
      _AIRFLOW_WWW_USER_CREATE: 'true'
 | 
						|
      _AIRFLOW_WWW_USER_USERNAME: ${_AIRFLOW_WWW_USER_USERNAME:-airflow}
 | 
						|
      _AIRFLOW_WWW_USER_PASSWORD: ${_AIRFLOW_WWW_USER_PASSWORD:-airflow}
 | 
						|
 | 
						|
  flower:
 | 
						|
    <<: *airflow-common
 | 
						|
    command: celery flower
 | 
						|
    ports:
 | 
						|
      - 5555:5555
 | 
						|
    healthcheck:
 | 
						|
      test: ["CMD", "curl", "--fail", "http://localhost:5555/"]
 | 
						|
      interval: 10s
 | 
						|
      timeout: 10s
 | 
						|
      retries: 5
 | 
						|
    restart: always
 | 
						|
 | 
						|
volumes:
 | 
						|
  postgres-db-volume:
 | 
						|
 | 
						|
networks:
 | 
						|
  datahub_network:
 | 
						|
    external: true
 | 
						|
 |