OpenMetadata/docker/run_local_docker.sh
Mayur Singal acb1be97f4
Fix #23096: Add Airflow 3.x support (#24338)
* Fix #23096: Add Airflow 3.x support

* airflow auth fixes

* fix airflow tests

* fix airflow 3 ingestion

* pyformat

* fix pytest

* pyformat

* bump version

* fix version

* fix mlflow

* custom pydoris

* fix airflow tests

* fix spotless

* final test fixs

* playwrite debug

* fix pytests

* checkstyle fix

* fit get status api and revert playwrite debug

* fix airflow version

---------

Co-authored-by: Ashish Gupta <ashish@getcollate.io>
2025-11-21 12:28:28 +01:00

269 lines
10 KiB
Bash
Executable File

#!/bin/bash
# Copyright 2021 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
RED='\033[0;31m'
cd "$(dirname "${BASH_SOURCE[0]}")" || exit
helpFunction()
{
echo ""
echo "Usage: $0 -m mode -d database"
echo "\t-m Running mode: [ui, no-ui]. Default [ui]\n"
echo "\t-d Database: [mysql, postgresql]. Default [mysql]\n"
echo "\t-s Skip maven build: [true, false]. Default [false]\n"
echo "\t-x Open JVM debug port on 5005: [true, false]. Default [false]\n"
echo "\t-h For usage help\n"
echo "\t-r For Cleaning DB Volumes. [true, false]. Default [true]\n"
exit 1 # Exit script after printing help
}
while getopts "m:d:s:x:r:h" opt
do
case "$opt" in
m ) mode="$OPTARG" ;;
d ) database="$OPTARG" ;;
s ) skipMaven="$OPTARG" ;;
x ) debugOM="$OPTARG" ;;
r ) cleanDbVolumes="$OPTARG" ;;
h ) helpFunction ;;
? ) helpFunction ;;
esac
done
mode="${mode:=ui}"
database="${database:=mysql}"
skipMaven="${skipMaven:=false}"
debugOM="${debugOM:=false}"
authorizationToken="eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg"
cleanDbVolumes="${cleanDbVolumes:=true}"
echo "Running local docker using mode [$mode] database [$database] and skipping maven build [$skipMaven] with cleanDB as [$cleanDbVolumes]"
cd ../
echo "Stopping any previous Local Docker Containers"
docker compose -f docker/development/docker-compose-postgres.yml down --remove-orphans
docker compose -f docker/development/docker-compose.yml down --remove-orphans
if [[ $skipMaven == "false" ]]; then
if [[ $mode == "no-ui" ]]; then
echo "Maven Build - Skipping Tests and UI"
mvn -DskipTests -DonlyBackend clean package -pl !openmetadata-ui
else
echo "Maven Build - Skipping Tests"
mvn -DskipTests clean package
fi
else
echo "Skipping Maven Build"
fi
RESULT=$?
if [ $RESULT -ne 0 ]; then
echo "Failed to run Maven build!"
exit 1
fi
if [[ $debugOM == "true" ]]; then
export OPENMETADATA_DEBUG=true
fi
if [[ $cleanDbVolumes == "true" ]]
then
if [[ -d "$PWD/docker/development/docker-volume/" ]]
then
rm -rf $PWD/docker/development/docker-volume
fi
fi
if [[ $VIRTUAL_ENV == "" ]];
then
echo "Please Use Virtual Environment and make sure to generate Pydantic Models";
else
echo "Generating Pydantic Models";
make install_dev generate
fi
echo "Starting Local Docker Containers"
echo "Using ingestion dependency: ${INGESTION_DEPENDENCY:-all}"
if [[ $database == "postgresql" ]]; then
docker compose -f docker/development/docker-compose-postgres.yml build --build-arg INGESTION_DEPENDENCY="${INGESTION_DEPENDENCY:-all}" && docker compose -f docker/development/docker-compose-postgres.yml up -d
elif [[ $database == "mysql" ]]; then
docker compose -f docker/development/docker-compose.yml build --build-arg INGESTION_DEPENDENCY="${INGESTION_DEPENDENCY:-all}" && docker compose -f docker/development/docker-compose.yml up -d
else
echo "Invalid database type: $database"
exit 1
fi
RESULT=$?
if [ $RESULT -ne 0 ]; then
echo "Failed to start Docker instances!"
exit 1
fi
until curl -s -f "http://localhost:9200/_cat/indices/openmetadata_team_search_index"; do
echo 'Checking if Elastic Search instance is up...\n'
sleep 5
done
# Function to get OAuth access token for Airflow API
get_airflow_token() {
local token_response=$(curl -s -X POST 'http://localhost:8080/auth/token' \
-H 'Content-Type: application/json' \
-d '{"username": "admin", "password": "admin"}')
local access_token=$(echo "$token_response" | python3 -c "import sys, json; data=json.load(sys.stdin); print(data.get('access_token', ''))" 2>/dev/null || echo "")
if [ -z "$access_token" ]; then
echo "✗ Failed to get access token" >&2
echo " Response: ${token_response}" >&2
return 1
fi
echo "$access_token"
}
# Wait for Airflow API to be ready and get initial token
echo "Waiting for Airflow API to be ready..."
until AIRFLOW_ACCESS_TOKEN=$(get_airflow_token) 2>/dev/null && [ -n "$AIRFLOW_ACCESS_TOKEN" ]; do
echo 'Checking if Airflow API is reachable...'
sleep 5
done
echo "✓ Airflow API is ready, token obtained"
# Check if sample_data DAG is available
echo "Checking if Sample Data DAG is available..."
until curl -s -f -H "Authorization: Bearer $AIRFLOW_ACCESS_TOKEN" "http://localhost:8080/api/v2/dags/sample_data" >/dev/null 2>&1; do
# Check for import errors
IMPORT_ERRORS=$(curl -s -H "Authorization: Bearer $AIRFLOW_ACCESS_TOKEN" "http://localhost:8080/api/v2/importErrors" 2>/dev/null)
if [ -n "$IMPORT_ERRORS" ]; then
echo "$IMPORT_ERRORS" | grep "/airflow_sample_data.py" > /dev/null 2>&1
if [ "$?" == "0" ]; then
echo -e "${RED}Airflow found an error importing \`sample_data\` DAG"
echo "$IMPORT_ERRORS" | python3 -c "import sys, json; data=json.load(sys.stdin); [print(json.dumps(e, indent=2)) for e in data.get('import_errors', []) if e.get('filename', '').endswith('airflow_sample_data.py')]" 2>/dev/null || echo "$IMPORT_ERRORS"
exit 1
fi
fi
echo 'Checking if Sample Data DAG is reachable...'
sleep 5
# Refresh token if needed (tokens expire after 24h)
AIRFLOW_ACCESS_TOKEN=$(get_airflow_token) 2>/dev/null
done
echo "✓ Sample Data DAG is available"
until curl -s -f --header "Authorization: Bearer $authorizationToken" "http://localhost:8585/api/v1/tables"; do
echo 'Checking if OM Server is reachable...\n'
sleep 5
done
# Function to unpause DAG using Airflow API with OAuth Bearer token
unpause_dag() {
local dag_id=$1
echo "Unpausing DAG: ${dag_id}"
# Get fresh token if not already set
if [ -z "$AIRFLOW_ACCESS_TOKEN" ]; then
AIRFLOW_ACCESS_TOKEN=$(get_airflow_token)
if [ -z "$AIRFLOW_ACCESS_TOKEN" ]; then
return 1
fi
echo "✓ OAuth token obtained"
fi
response=$(curl -s -w "\n%{http_code}" --location --request PATCH "http://localhost:8080/api/v2/dags/${dag_id}" \
--header "Authorization: Bearer $AIRFLOW_ACCESS_TOKEN" \
--header 'Content-Type: application/json' \
--data-raw '{"is_paused": false}')
http_code=$(echo "$response" | tail -n1)
body=$(echo "$response" | sed '$d')
if [ "$http_code" = "200" ] || [ "$http_code" = "201" ]; then
echo "✓ Successfully unpaused ${dag_id}"
else
echo "✗ Failed to unpause ${dag_id} (HTTP ${http_code})"
echo " Response: ${body}"
# Token might be expired, try refreshing once
if [ "$http_code" = "401" ]; then
echo " Refreshing token and retrying..."
AIRFLOW_ACCESS_TOKEN=$(get_airflow_token)
if [ -n "$AIRFLOW_ACCESS_TOKEN" ]; then
response=$(curl -s -w "\n%{http_code}" --location --request PATCH "http://localhost:8080/api/v2/dags/${dag_id}" \
--header "Authorization: Bearer $AIRFLOW_ACCESS_TOKEN" \
--header 'Content-Type: application/json' \
--data-raw '{"is_paused": false}')
http_code=$(echo "$response" | tail -n1)
if [ "$http_code" = "200" ] || [ "$http_code" = "201" ]; then
echo "✓ Successfully unpaused ${dag_id} after retry"
fi
fi
fi
fi
}
unpause_dag "sample_data"
unpause_dag "extended_sample_data"
# Trigger sample_data DAG to run
echo "Triggering sample_data DAG..."
LOGICAL_DATE=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
response=$(curl -s -w "\n%{http_code}" -X POST "http://localhost:8080/api/v2/dags/sample_data/dagRuns" \
--header "Authorization: Bearer $AIRFLOW_ACCESS_TOKEN" \
--header 'Content-Type: application/json' \
--data-raw "{\"logical_date\": \"$LOGICAL_DATE\"}")
http_code=$(echo "$response" | tail -n1)
if [ "$http_code" = "200" ] || [ "$http_code" = "201" ]; then
echo "✓ Successfully triggered sample_data DAG"
else
echo "⚠ Could not trigger sample_data DAG (HTTP ${http_code})"
echo " Response: $(echo "$response" | sed '$d')"
echo " Note: DAG may run automatically on schedule"
fi
echo 'Validate sample data DAG...'
sleep 5
# This validates the sample data DAG flow
make install
# Run validation with timeout to avoid hanging indefinitely
echo "Running DAG validation (this may take a few minutes)..."
timeout 300 python docker/validate_compose.py || {
exit_code=$?
if [ $exit_code -eq 124 ]; then
echo "⚠ Warning: DAG validation timed out after 5 minutes"
echo " The DAG may still be running. Check Airflow UI at http://localhost:8080"
else
echo "⚠ Warning: DAG validation failed with exit code $exit_code"
fi
echo " Continuing with remaining setup..."
}
sleep 5
unpause_dag "sample_usage"
sleep 5
unpause_dag "index_metadata"
sleep 2
unpause_dag "sample_lineage"
echo "✔running reindexing"
# Trigger ElasticSearch ReIndexing from UI
curl --location --request POST 'http://localhost:8585/api/v1/apps/trigger/SearchIndexingApplication' \
--header 'Authorization: Bearer eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg'
sleep 60 # Sleep for 60 seconds to make sure the elasticsearch reindexing from UI finishes
tput setaf 2
echo "✔ OpenMetadata is up and running"