mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-08-05 07:38:26 +00:00

* Add cleaner metrics * Revamp Metrics; Add Request Latencies, breakdown database, search latencies * Remove MicrometerBundleSingleton.java * Fix tests
439 lines
18 KiB
Bash
Executable File
439 lines
18 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# Copyright 2021 Collate
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
# Script to analyze request latency metrics for a specific endpoint from OpenMetadata Prometheus metrics
|
|
|
|
# Default values
|
|
HOST="localhost"
|
|
PORT="8586"
|
|
ENDPOINT=""
|
|
SHOW_RAW=false
|
|
PROTOCOL="http"
|
|
LIST_MODE=false
|
|
SORT_BY="count"
|
|
TOP_N=0
|
|
|
|
# Colors for output
|
|
GREEN='\033[0;32m'
|
|
BLUE='\033[0;34m'
|
|
YELLOW='\033[1;33m'
|
|
RED='\033[0;31m'
|
|
NC='\033[0m' # No Color
|
|
|
|
# Function to show usage
|
|
usage() {
|
|
echo "Usage: $0 [OPTIONS]"
|
|
echo ""
|
|
echo "Modes:"
|
|
echo " Analyze mode: $0 -e <endpoint> [options]"
|
|
echo " List mode: $0 -l [options]"
|
|
echo ""
|
|
echo "Common Options:"
|
|
echo " -h <host> Prometheus host (default: localhost)"
|
|
echo " -p <port> Prometheus port (default: 8586)"
|
|
echo " -s Use HTTPS instead of HTTP"
|
|
echo ""
|
|
echo "Analyze Mode Options:"
|
|
echo " -e <endpoint> The endpoint to analyze (e.g., 'v1/tables/123-456-789')"
|
|
echo " -r Show raw metrics data"
|
|
echo ""
|
|
echo "List Mode Options:"
|
|
echo " -l List all endpoints with metrics"
|
|
echo " -t <top_n> Show only top N endpoints (default: all)"
|
|
echo " -o <sort_by> Sort by: count, time, or name (default: count)"
|
|
echo ""
|
|
echo "Examples:"
|
|
echo " # Analyze specific endpoint"
|
|
echo " $0 -e 'v1/tables/0a4cd328-50c3-44c8-a7d2-ba71ba53bbe3'"
|
|
echo " $0 -e 'v1/users' -h 'myserver.com' -p 9090 -r"
|
|
echo ""
|
|
echo " # List all endpoints"
|
|
echo " $0 -l"
|
|
echo " $0 -l -t 10 # Top 10 by request count"
|
|
echo " $0 -l -o time -t 5 # Top 5 by total time"
|
|
exit 1
|
|
}
|
|
|
|
# Parse command line arguments
|
|
while getopts "e:h:p:srlt:o:" opt; do
|
|
case $opt in
|
|
e)
|
|
ENDPOINT="$OPTARG"
|
|
;;
|
|
h)
|
|
HOST="$OPTARG"
|
|
;;
|
|
p)
|
|
PORT="$OPTARG"
|
|
;;
|
|
s)
|
|
PROTOCOL="https"
|
|
;;
|
|
r)
|
|
SHOW_RAW=true
|
|
;;
|
|
l)
|
|
LIST_MODE=true
|
|
;;
|
|
t)
|
|
TOP_N="$OPTARG"
|
|
;;
|
|
o)
|
|
SORT_BY="$OPTARG"
|
|
if [[ ! "$SORT_BY" =~ ^(count|time|name)$ ]]; then
|
|
echo -e "${RED}Error: Invalid sort option. Must be: count, time, or name${NC}"
|
|
usage
|
|
fi
|
|
;;
|
|
*)
|
|
usage
|
|
;;
|
|
esac
|
|
done
|
|
|
|
# Validate mode selection
|
|
if [ "$LIST_MODE" = true ] && [ -n "$ENDPOINT" ]; then
|
|
echo -e "${RED}Error: Cannot use both -l and -e options together${NC}"
|
|
usage
|
|
fi
|
|
|
|
if [ "$LIST_MODE" = false ] && [ -z "$ENDPOINT" ]; then
|
|
echo -e "${RED}Error: Either -e <endpoint> or -l must be specified${NC}"
|
|
usage
|
|
fi
|
|
|
|
# Construct metrics URL
|
|
METRICS_URL="${PROTOCOL}://${HOST}:${PORT}/prometheus"
|
|
|
|
# Fetch metrics
|
|
echo -e "${YELLOW}Fetching metrics...${NC}"
|
|
METRICS=$(curl -s "$METRICS_URL")
|
|
|
|
if [ -z "$METRICS" ]; then
|
|
echo -e "${RED}Error: Failed to fetch metrics from $METRICS_URL${NC}"
|
|
exit 1
|
|
fi
|
|
|
|
# Branch based on mode
|
|
if [ "$LIST_MODE" = true ]; then
|
|
# List mode
|
|
echo -e "${BLUE}═══════════════════════════════════════════════════════════════════════${NC}"
|
|
echo -e "${BLUE}OpenMetadata Endpoints with Metrics${NC}"
|
|
echo -e "${BLUE}═══════════════════════════════════════════════════════════════════════${NC}"
|
|
echo -e "${GREEN}Host:${NC} $HOST"
|
|
echo -e "${GREEN}Port:${NC} $PORT"
|
|
echo -e "${GREEN}Protocol:${NC} $PROTOCOL"
|
|
echo -e "${GREEN}Metrics URL:${NC} $METRICS_URL"
|
|
echo -e "${BLUE}═══════════════════════════════════════════════════════════════════════${NC}"
|
|
echo ""
|
|
|
|
echo -e "${YELLOW}Processing endpoint data...${NC}"
|
|
echo ""
|
|
|
|
# Create temporary file for processing
|
|
TEMP_FILE=$(mktemp)
|
|
|
|
# Extract endpoint data
|
|
echo "$METRICS" | grep -E "^request_latency_total_seconds_count" | while read -r line; do
|
|
endpoint=$(echo "$line" | sed -n 's/.*endpoint="\([^"]*\)".*/\1/p')
|
|
count=$(echo "$line" | awk '{print $NF}')
|
|
|
|
# Get total time for this endpoint
|
|
total_time=$(echo "$METRICS" | grep -E "^request_latency_total_seconds_sum.*endpoint=\"${endpoint}\"" | head -1 | awk '{print $NF}')
|
|
|
|
# Get percentages
|
|
db_pct=$(echo "$METRICS" | grep -E "^request_percentage_database.*endpoint=\"${endpoint}\"" | head -1 | awk '{print $NF}')
|
|
search_pct=$(echo "$METRICS" | grep -E "^request_percentage_search.*endpoint=\"${endpoint}\"" | head -1 | awk '{print $NF}')
|
|
internal_pct=$(echo "$METRICS" | grep -E "^request_percentage_internal.*endpoint=\"${endpoint}\"" | head -1 | awk '{print $NF}')
|
|
|
|
# Calculate average time in ms
|
|
if [ -n "$total_time" ] && [ -n "$count" ] && [ "$count" != "0" ]; then
|
|
avg_time=$(echo "scale=3; $total_time * 1000 / $count" | bc)
|
|
else
|
|
avg_time="0"
|
|
fi
|
|
|
|
# Format percentages
|
|
db_pct=${db_pct:-0}
|
|
search_pct=${search_pct:-0}
|
|
internal_pct=${internal_pct:-0}
|
|
|
|
# Output in tab-separated format
|
|
echo -e "$endpoint\t$count\t$total_time\t$avg_time\t$db_pct\t$search_pct\t$internal_pct"
|
|
done > "$TEMP_FILE"
|
|
|
|
# Sort based on option
|
|
case "$SORT_BY" in
|
|
count)
|
|
SORTED=$(sort -t$'\t' -k2 -nr "$TEMP_FILE")
|
|
;;
|
|
time)
|
|
SORTED=$(sort -t$'\t' -k3 -nr "$TEMP_FILE")
|
|
;;
|
|
name)
|
|
SORTED=$(sort -t$'\t' -k1 "$TEMP_FILE")
|
|
;;
|
|
esac
|
|
|
|
# Apply top N filter if specified
|
|
if [ "$TOP_N" -gt 0 ]; then
|
|
SORTED=$(echo "$SORTED" | head -n "$TOP_N")
|
|
fi
|
|
|
|
# Display header
|
|
printf "${GREEN}%-60s %10s %15s %12s %10s %10s %10s${NC}\n" \
|
|
"Endpoint" "Requests" "Total Time(s)" "Avg(ms)" "DB%" "Search%" "Internal%"
|
|
echo -e "${BLUE}$(printf '%.0s─' {1..130})${NC}"
|
|
|
|
# Display data
|
|
echo "$SORTED" | while IFS=$'\t' read -r endpoint count total_time avg_time db_pct search_pct internal_pct; do
|
|
# Truncate endpoint if too long
|
|
if [ ${#endpoint} -gt 57 ]; then
|
|
display_endpoint="${endpoint:0:54}..."
|
|
else
|
|
display_endpoint="$endpoint"
|
|
fi
|
|
|
|
# Format numbers
|
|
total_time_fmt=$(printf "%.3f" "$total_time" 2>/dev/null || echo "0.000")
|
|
avg_time_fmt=$(printf "%.1f" "$avg_time" 2>/dev/null || echo "0.0")
|
|
db_pct_fmt=$(printf "%.1f" "$db_pct" 2>/dev/null || echo "0.0")
|
|
search_pct_fmt=$(printf "%.1f" "$search_pct" 2>/dev/null || echo "0.0")
|
|
internal_pct_fmt=$(printf "%.1f" "$internal_pct" 2>/dev/null || echo "0.0")
|
|
|
|
printf "%-60s %10s %15s %12s %10s %10s %10s\n" \
|
|
"$display_endpoint" "$count" "$total_time_fmt" "$avg_time_fmt" \
|
|
"$db_pct_fmt" "$search_pct_fmt" "$internal_pct_fmt"
|
|
done
|
|
|
|
# Summary
|
|
echo -e "${BLUE}$(printf '%.0s─' {1..130})${NC}"
|
|
TOTAL_ENDPOINTS=$(echo "$SORTED" | wc -l | tr -d ' ')
|
|
TOTAL_REQUESTS=$(echo "$SORTED" | awk -F$'\t' '{sum+=$2} END {print sum}')
|
|
echo -e "${GREEN}Total Endpoints:${NC} $TOTAL_ENDPOINTS"
|
|
echo -e "${GREEN}Total Requests:${NC} $TOTAL_REQUESTS"
|
|
|
|
# Cleanup
|
|
rm -f "$TEMP_FILE"
|
|
|
|
echo ""
|
|
echo -e "${BLUE}═══════════════════════════════════════════════════════════════════════${NC}"
|
|
exit 0
|
|
fi
|
|
|
|
# Analyze mode
|
|
echo -e "${BLUE}═══════════════════════════════════════════════════════════════════════${NC}"
|
|
echo -e "${BLUE}OpenMetadata Endpoint Metrics Analysis${NC}"
|
|
echo -e "${BLUE}═══════════════════════════════════════════════════════════════════════${NC}"
|
|
echo -e "${GREEN}Endpoint:${NC} $ENDPOINT"
|
|
echo -e "${GREEN}Host:${NC} $HOST"
|
|
echo -e "${GREEN}Port:${NC} $PORT"
|
|
echo -e "${GREEN}Protocol:${NC} $PROTOCOL"
|
|
echo -e "${GREEN}Metrics URL:${NC} $METRICS_URL"
|
|
echo -e "${BLUE}═══════════════════════════════════════════════════════════════════════${NC}"
|
|
echo ""
|
|
|
|
# Escape special characters in endpoint for regex
|
|
ENDPOINT_ESCAPED=$(echo "$ENDPOINT" | sed 's/[[\.*^$()+?{|]/\\&/g')
|
|
|
|
# Function to extract metric value
|
|
get_metric_value() {
|
|
local metric_name=$1
|
|
local metric_type=$2
|
|
|
|
if [ "$metric_type" = "counter" ]; then
|
|
# For counters, get the total count
|
|
echo "$METRICS" | grep -E "^${metric_name}.*endpoint=\"${ENDPOINT_ESCAPED}\"[^}]*} " | grep -v "_total" | grep -v "_created" | head -1 | awk '{print $NF}'
|
|
elif [ "$metric_type" = "histogram_count" ]; then
|
|
# For histograms, get the count
|
|
echo "$METRICS" | grep -E "^${metric_name}_count.*endpoint=\"${ENDPOINT_ESCAPED}\"" | head -1 | awk '{print $NF}'
|
|
elif [ "$metric_type" = "histogram_sum" ]; then
|
|
# For histograms, get the sum (total time)
|
|
echo "$METRICS" | grep -E "^${metric_name}_sum.*endpoint=\"${ENDPOINT_ESCAPED}\"" | head -1 | awk '{print $NF}'
|
|
elif [ "$metric_type" = "gauge" ]; then
|
|
# For gauges, get the value
|
|
echo "$METRICS" | grep -E "^${metric_name}.*endpoint=\"${ENDPOINT_ESCAPED}\"" | grep -v "#" | head -1 | awk '{print $NF}'
|
|
fi
|
|
}
|
|
|
|
# Function to calculate mean from histogram
|
|
calculate_mean() {
|
|
local sum=$1
|
|
local count=$2
|
|
if [ -n "$sum" ] && [ -n "$count" ] && [ "$count" != "0" ]; then
|
|
echo "scale=6; $sum / $count" | bc
|
|
else
|
|
echo "0"
|
|
fi
|
|
}
|
|
|
|
# Function to format time
|
|
format_time() {
|
|
local seconds=$1
|
|
if [ -z "$seconds" ] || [ "$seconds" = "0" ]; then
|
|
echo "N/A"
|
|
else
|
|
local ms=$(echo "scale=3; $seconds * 1000" | bc)
|
|
echo "${ms}ms"
|
|
fi
|
|
}
|
|
|
|
# Function to get percentile from histogram
|
|
get_percentile() {
|
|
local metric_name=$1
|
|
local percentile=$2
|
|
|
|
# Get all bucket values for this metric
|
|
local buckets=$(echo "$METRICS" | grep -E "^${metric_name}_bucket.*endpoint=\"${ENDPOINT_ESCAPED}\"" | grep -v "#")
|
|
|
|
if [ -z "$buckets" ]; then
|
|
echo "N/A"
|
|
return
|
|
fi
|
|
|
|
# Get total count
|
|
local total_count=$(echo "$buckets" | grep 'le="+Inf"' | awk '{print $NF}')
|
|
if [ -z "$total_count" ] || [ "$total_count" = "0" ]; then
|
|
echo "N/A"
|
|
return
|
|
fi
|
|
|
|
# Calculate target count for percentile
|
|
local target_count=$(echo "scale=0; $total_count * $percentile / 100" | bc)
|
|
|
|
# Find the bucket that contains the percentile
|
|
local prev_le="0"
|
|
echo "$buckets" | sort -t'=' -k3 -g | while IFS= read -r line; do
|
|
local le=$(echo "$line" | sed -n 's/.*le="\([^"]*\)".*/\1/p')
|
|
local count=$(echo "$line" | awk '{print $NF}')
|
|
|
|
if [ "$le" != "+Inf" ] && [ "$(echo "$count >= $target_count" | bc)" -eq 1 ]; then
|
|
format_time "$le"
|
|
break
|
|
fi
|
|
prev_le="$le"
|
|
done
|
|
}
|
|
|
|
# Collect metrics
|
|
echo -e "${YELLOW}Analyzing metrics for endpoint: ${NC}${ENDPOINT}"
|
|
echo ""
|
|
|
|
# Total request metrics
|
|
TOTAL_COUNT=$(get_metric_value "request_latency_total_seconds" "histogram_count")
|
|
TOTAL_SUM=$(get_metric_value "request_latency_total_seconds" "histogram_sum")
|
|
TOTAL_MEAN=$(calculate_mean "$TOTAL_SUM" "$TOTAL_COUNT")
|
|
|
|
# Database metrics
|
|
DB_COUNT=$(get_metric_value "request_latency_database_seconds" "histogram_count")
|
|
DB_SUM=$(get_metric_value "request_latency_database_seconds" "histogram_sum")
|
|
DB_MEAN=$(calculate_mean "$DB_SUM" "$DB_COUNT")
|
|
DB_OPERATIONS=$(get_metric_value "request_operations_database" "counter")
|
|
DB_PERCENTAGE=$(get_metric_value "request_percentage_database" "gauge")
|
|
|
|
# Search metrics
|
|
SEARCH_COUNT=$(get_metric_value "request_latency_search_seconds" "histogram_count")
|
|
SEARCH_SUM=$(get_metric_value "request_latency_search_seconds" "histogram_sum")
|
|
SEARCH_MEAN=$(calculate_mean "$SEARCH_SUM" "$SEARCH_COUNT")
|
|
SEARCH_OPERATIONS=$(get_metric_value "request_operations_search" "counter")
|
|
SEARCH_PERCENTAGE=$(get_metric_value "request_percentage_search" "gauge")
|
|
|
|
# Internal processing metrics
|
|
INTERNAL_COUNT=$(get_metric_value "request_latency_internal_seconds" "histogram_count")
|
|
INTERNAL_SUM=$(get_metric_value "request_latency_internal_seconds" "histogram_sum")
|
|
INTERNAL_MEAN=$(calculate_mean "$INTERNAL_SUM" "$INTERNAL_COUNT")
|
|
INTERNAL_PERCENTAGE=$(get_metric_value "request_percentage_internal" "gauge")
|
|
|
|
# Calculate average time per request (not per operation)
|
|
if [ -n "$TOTAL_COUNT" ] && [ "$TOTAL_COUNT" != "0" ]; then
|
|
DB_TIME_PER_REQUEST=$(echo "scale=6; $DB_SUM / $TOTAL_COUNT" | bc)
|
|
SEARCH_TIME_PER_REQUEST=$(echo "scale=6; $SEARCH_SUM / $TOTAL_COUNT" | bc)
|
|
INTERNAL_TIME_PER_REQUEST=$(echo "scale=6; $INTERNAL_SUM / $TOTAL_COUNT" | bc)
|
|
else
|
|
DB_TIME_PER_REQUEST="0"
|
|
SEARCH_TIME_PER_REQUEST="0"
|
|
INTERNAL_TIME_PER_REQUEST="0"
|
|
fi
|
|
|
|
# Check if we have any data
|
|
if [ -z "$TOTAL_COUNT" ] || [ "$TOTAL_COUNT" = "0" ]; then
|
|
echo -e "${RED}No metrics found for endpoint: $ENDPOINT${NC}"
|
|
echo ""
|
|
echo "Available endpoints with metrics:"
|
|
echo "$METRICS" | grep -E "request_latency_total.*endpoint=" | sed -n 's/.*endpoint="\([^"]*\)".*/\1/p' | sort | uniq | head -20
|
|
exit 1
|
|
fi
|
|
|
|
# Display summary
|
|
echo -e "${GREEN}📊 REQUEST SUMMARY${NC}"
|
|
echo -e "├─ Total Requests: ${BLUE}${TOTAL_COUNT}${NC}"
|
|
echo -e "├─ Total Time: ${BLUE}$(format_time "$TOTAL_SUM")${NC}"
|
|
echo -e "└─ Average Time: ${BLUE}$(format_time "$TOTAL_MEAN")${NC}"
|
|
echo ""
|
|
|
|
# Display component breakdown
|
|
echo -e "${GREEN}⏱️ LATENCY BREAKDOWN${NC}"
|
|
echo -e "├─ ${YELLOW}Total Request Time:${NC}"
|
|
echo -e "│ ├─ Mean: ${BLUE}$(format_time "$TOTAL_MEAN")${NC}"
|
|
echo -e "│ ├─ P50: ${BLUE}$(get_percentile "request_latency_total_seconds" 50)${NC}"
|
|
echo -e "│ ├─ P95: ${BLUE}$(get_percentile "request_latency_total_seconds" 95)${NC}"
|
|
echo -e "│ └─ P99: ${BLUE}$(get_percentile "request_latency_total_seconds" 99)${NC}"
|
|
echo -e "│"
|
|
echo -e "├─ ${YELLOW}Database Operations:${NC}"
|
|
echo -e "│ ├─ Total Operations: ${BLUE}${DB_COUNT:-0}${NC} (across ${BLUE}${TOTAL_COUNT}${NC} requests)"
|
|
echo -e "│ ├─ Avg Operations/Request: ${BLUE}$(echo "scale=1; ${DB_COUNT:-0} / ${TOTAL_COUNT}" | bc)${NC}"
|
|
echo -e "│ ├─ Avg Time/Operation: ${BLUE}$(format_time "$DB_MEAN")${NC}"
|
|
echo -e "│ ├─ Avg Time/Request: ${BLUE}$(format_time "$DB_TIME_PER_REQUEST")${NC}"
|
|
echo -e "│ └─ Percentage of Request: ${BLUE}${DB_PERCENTAGE:-0}%${NC}"
|
|
echo -e "│"
|
|
echo -e "├─ ${YELLOW}Search Operations:${NC}"
|
|
echo -e "│ ├─ Total Operations: ${BLUE}${SEARCH_COUNT:-0}${NC} (across ${BLUE}${TOTAL_COUNT}${NC} requests)"
|
|
echo -e "│ ├─ Avg Operations/Request: ${BLUE}$(echo "scale=1; ${SEARCH_COUNT:-0} / ${TOTAL_COUNT}" | bc)${NC}"
|
|
echo -e "│ ├─ Avg Time/Operation: ${BLUE}$(format_time "$SEARCH_MEAN")${NC}"
|
|
echo -e "│ ├─ Avg Time/Request: ${BLUE}$(format_time "$SEARCH_TIME_PER_REQUEST")${NC}"
|
|
echo -e "│ └─ Percentage of Request: ${BLUE}${SEARCH_PERCENTAGE:-0}%${NC}"
|
|
echo -e "│"
|
|
echo -e "└─ ${YELLOW}Internal Processing:${NC}"
|
|
echo -e " ├─ Avg Time/Request: ${BLUE}$(format_time "$INTERNAL_TIME_PER_REQUEST")${NC}"
|
|
echo -e " └─ Percentage of Request: ${BLUE}${INTERNAL_PERCENTAGE:-0}%${NC}"
|
|
echo ""
|
|
|
|
# Calculate and display percentage summary
|
|
if [ -n "$DB_PERCENTAGE" ] || [ -n "$SEARCH_PERCENTAGE" ] || [ -n "$INTERNAL_PERCENTAGE" ]; then
|
|
echo -e "${GREEN}📈 TIME DISTRIBUTION${NC}"
|
|
|
|
# Create a simple bar chart
|
|
DB_PCT=${DB_PERCENTAGE:-0}
|
|
SEARCH_PCT=${SEARCH_PERCENTAGE:-0}
|
|
INTERNAL_PCT=${INTERNAL_PERCENTAGE:-0}
|
|
|
|
# Function to create bar
|
|
create_bar() {
|
|
local pct=$1
|
|
local width=$(echo "scale=0; $pct / 2" | bc)
|
|
printf '█%.0s' $(seq 1 $width)
|
|
}
|
|
|
|
echo -e "Database: $(create_bar $DB_PCT) ${DB_PCT}%"
|
|
echo -e "Search: $(create_bar $SEARCH_PCT) ${SEARCH_PCT}%"
|
|
echo -e "Internal: $(create_bar $INTERNAL_PCT) ${INTERNAL_PCT}%"
|
|
echo ""
|
|
fi
|
|
|
|
# Show raw metrics if requested
|
|
if [ "$SHOW_RAW" = true ]; then
|
|
echo -e "${GREEN}📋 RAW METRICS${NC}"
|
|
echo "$METRICS" | grep -E "(request_latency|request_percentage|request_operations).*endpoint=\"${ENDPOINT_ESCAPED}\"" | sort
|
|
echo ""
|
|
fi
|
|
|
|
echo -e "${BLUE}═══════════════════════════════════════════════════════════════════════${NC}" |