Fix Metrics Buckets and Url Normalization (#22536)

* Fix Metrics Buckets and Url Normalization

* Add more uri to normalization

* Maintain Normalized Endpoints

* Fix more tags

* Fix Tests

---------

Co-authored-by: Sriharsha Chintalapani <harshach@users.noreply.github.com>
This commit is contained in:
Mohit Yadav 2025-07-24 02:24:00 +05:30 committed by GitHub
parent 727ae107f8
commit 94d297d454
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 306 additions and 58 deletions

View File

@ -0,0 +1,247 @@
package org.openmetadata.service.monitoring;
import java.time.Duration;
import lombok.experimental.UtilityClass;
@UtilityClass
public class MetricUtils {
// Standard SLA buckets for all latency metrics (10 buckets total)
public static final Duration[] LATENCY_SLA_BUCKETS = {
Duration.ofMillis(10), // 10ms
Duration.ofMillis(25), // 25ms
Duration.ofMillis(50), // 50ms
Duration.ofMillis(100), // 100ms
Duration.ofMillis(250), // 250ms
Duration.ofMillis(500), // 500ms
Duration.ofSeconds(1), // 1s
Duration.ofMillis(2500), // 2.5s
Duration.ofSeconds(5), // 5s
Duration.ofSeconds(30) // 30s
};
public static String normalizeUri(String uri) {
// Normalize URIs to avoid high cardinality
if (uri == null || uri.isEmpty()) {
return "/unknown";
}
// Remove query parameters to reduce cardinality
String normalizedUri = uri.split("\\?")[0];
// Replace various ID patterns with placeholders
normalizedUri =
normalizedUri
// UUID patterns (e.g., /api/v1/tables/12345678-1234-1234-1234-123456789abc)
.replaceAll("/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}", "/{id}")
// Numeric IDs (e.g., /api/v1/tables/123456)
.replaceAll("/\\d+", "/{id}")
// Entity names that contain special characters or spaces (encoded)
.replaceAll("/[^/]*%[0-9a-fA-F]{2}[^/]*", "/{name}")
// Long alphanumeric strings that might be encoded names
.replaceAll("/[a-zA-Z0-9_.-]{20,}", "/{name}")
// Handle common OpenMetadata API patterns - split into multiple patterns to reduce
// complexity
.replaceAll(
"/(tables|databases|services|pipelines|topics|dashboards|charts|containers)/[^/]+/[^/]+",
"/$1/{name}/{subresource}")
.replaceAll(
"/(glossaryTerms|tags|policies|roles|users|teams|dataModels|searchIndexes)/[^/]+/[^/]+",
"/$1/{name}/{subresource}")
.replaceAll(
"/(testSuites|testCases|webhooks|bots|automations|applications|connections)/[^/]+/[^/]+",
"/$1/{name}/{subresource}")
.replaceAll(
"/(secrets|storedProcedures|databaseSchemas|mlModels|reports|metrics)/[^/]+/[^/]+",
"/$1/{name}/{subresource}")
.replaceAll(
"/(queries|suggestions|lineage|events|feeds|conversations|activities)/[^/]+/[^/]+",
"/$1/{name}/{subresource}")
.replaceAll(
"/(tasks|kpis|domains|dataProducts|governanceWorkflows)/[^/]+/[^/]+",
"/$1/{name}/{subresource}")
.replaceAll(
"/(tables|databases|services|pipelines|topics|dashboards|charts|containers)/[^/]+",
"/$1/{name}")
.replaceAll(
"/(glossaryTerms|tags|policies|roles|users|teams|dataModels|searchIndexes)/[^/]+",
"/$1/{name}")
.replaceAll(
"/(testSuites|testCases|webhooks|bots|automations|applications|connections)/[^/]+",
"/$1/{name}")
.replaceAll(
"/(secrets|storedProcedures|databaseSchemas|mlModels|reports|metrics)/[^/]+",
"/$1/{name}")
.replaceAll(
"/(queries|suggestions|lineage|events|feeds|conversations|activities)/[^/]+",
"/$1/{name}")
.replaceAll(
"/(tasks|kpis|domains|dataProducts|governanceWorkflows)/[^/]+", "/$1/{name}")
// Analytics deep paths with timestamps and multiple segments
.replaceAll(
"/analytics/dataInsights/[^/]+/[^/]+", "/analytics/dataInsights/{type}/{id}")
.replaceAll(
"/analytics/web/events/[^/]+/[^/]+/collect",
"/analytics/web/events/{name}/{timestamp}/collect")
// Data quality multi-level paths
.replaceAll("/dataQuality/testCases/[^/]+/[^/]+", "/dataQuality/testCases/{type}/{id}")
.replaceAll(
"/dataQuality/testSuites/[^/]+/[^/]+", "/dataQuality/testSuites/{id}/{subresource}")
// Complex lineage patterns with multiple entities
.replaceAll(
"/lineage/[^/]+/[^/]+/[^/]+/[^/]+",
"/lineage/{fromEntity}/{fromId}/{toEntity}/{toId}")
.replaceAll(
"/lineage/[^/]+/name/[^/]+/[^/]+/name/[^/]+",
"/lineage/{fromEntity}/name/{fromFQN}/{toEntity}/name/{toFQN}")
.replaceAll(
"/lineage/[^/]+/[^/]+/type/[^/]+",
"/lineage/{entityType}/{entityId}/type/{lineageSource}")
// Event subscriptions complex paths
.replaceAll(
"/events/subscriptions/[^/]+/[^/]+/[^/]+",
"/events/subscriptions/{id}/{resource}/{subresource}")
.replaceAll(
"/events/subscriptions/name/[^/]+/[^/]+",
"/events/subscriptions/name/{name}/{resource}")
// Service nested paths
.replaceAll("/services/[^/]+/[^/]+/[^/]+", "/services/{serviceType}/{id}/{subresource}")
.replaceAll(
"/services/testConnectionDefinitions/[^/]+",
"/services/testConnectionDefinitions/{connectionType}")
// Governance workflow paths
.replaceAll(
"/governance/[^/]+/[^/]+/[^/]+",
"/governance/{workflowType}/{definitionName}/{instanceId}")
// Drive/file management paths
.replaceAll("/drives/[^/]+/[^/]+/[^/]+", "/drives/{type}/{id}/{subresource}")
// Universal entity sub-resources (versions, followers, results, etc.)
.replaceAll("/([^/]+)/([^/]+)/versions/[^/]+", "/$1/$2/versions/{version}")
.replaceAll("/([^/]+)/([^/]+)/followers/[^/]+", "/$1/$2/followers/{userId}")
.replaceAll("/([^/]+)/([^/]+)/results/[^/]+", "/$1/$2/results/{result}")
.replaceAll(
"/([^/]+)/([^/]+)/results/before/[^/]+", "/$1/$2/results/before/{timestamp}")
.replaceAll(
"/([^/]+)/name/([^/]+)/(export|import|exportAsync|importAsync)", "/$1/name/$2/$3")
// SCIM paths
.replaceAll("/scim/(Users|Groups)/[^/]+", "/scim/$1/{id}")
// Permission resource patterns
.replaceAll("/permissions/[^/]+/[^/]+", "/permissions/{resource}/{id}")
.replaceAll("/permissions/[^/]+/name/[^/]+", "/permissions/{resource}/name/{name}")
.replaceAll("/permissions/view/[^/]+", "/permissions/view/{entityType}")
.replaceAll("/permissions/debug/user/[^/]+", "/permissions/debug/user/{username}")
.replaceAll("/permissions/debug/evaluate", "/permissions/debug/evaluate")
.replaceAll("/permissions/[^/]+", "/permissions/{resource}")
// EventSubscription complex patterns (HIGH PRIORITY - prevents cardinality explosion)
.replaceAll(
"/events/subscriptions/name/[^/]+/status/[^/]+",
"/events/subscriptions/name/{name}/status/{destinationId}")
.replaceAll(
"/events/subscriptions/[^/]+/status/[^/]+",
"/events/subscriptions/{id}/status/{destinationId}")
.replaceAll(
"/events/subscriptions/[^/]+/resources",
"/events/subscriptions/{alertType}/resources")
.replaceAll(
"/events/subscriptions/id/[^/]+/listEvents",
"/events/subscriptions/id/{id}/listEvents")
.replaceAll(
"/events/subscriptions/id/[^/]+/eventsRecord",
"/events/subscriptions/id/{subscriptionId}/eventsRecord")
.replaceAll(
"/events/subscriptions/name/[^/]+/eventsRecord",
"/events/subscriptions/name/{subscriptionName}/eventsRecord")
.replaceAll(
"/events/subscriptions/id/[^/]+/diagnosticInfo",
"/events/subscriptions/id/{subscriptionId}/diagnosticInfo")
.replaceAll(
"/events/subscriptions/name/[^/]+/diagnosticInfo",
"/events/subscriptions/name/{subscriptionName}/diagnosticInfo")
.replaceAll(
"/events/subscriptions/id/[^/]+/failedEvents",
"/events/subscriptions/id/{id}/failedEvents")
.replaceAll(
"/events/subscriptions/name/[^/]+/failedEvents",
"/events/subscriptions/name/{eventSubscriptionName}/failedEvents")
.replaceAll(
"/events/subscriptions/id/[^/]+/listSuccessfullySentChangeEvents",
"/events/subscriptions/id/{id}/listSuccessfullySentChangeEvents")
.replaceAll(
"/events/subscriptions/name/[^/]+/listSuccessfullySentChangeEvents",
"/events/subscriptions/name/{eventSubscriptionName}/listSuccessfullySentChangeEvents")
.replaceAll(
"/events/subscriptions/id/[^/]+/destinations",
"/events/subscriptions/id/{eventSubscriptionId}/destinations")
.replaceAll(
"/events/subscriptions/name/[^/]+/destinations",
"/events/subscriptions/name/{eventSubscriptionName}/destinations")
.replaceAll(
"/events/subscriptions/name/[^/]+/syncOffset",
"/events/subscriptions/name/{eventSubscriptionName}/syncOffset")
// App management patterns
.replaceAll("/apps/name/[^/]+/status", "/apps/name/{name}/status")
.replaceAll("/apps/name/[^/]+/extension", "/apps/name/{name}/extension")
.replaceAll("/apps/name/[^/]+/logs", "/apps/name/{name}/logs")
.replaceAll("/apps/name/[^/]+/runs/latest", "/apps/name/{name}/runs/latest")
.replaceAll("/apps/schedule/[^/]+", "/apps/schedule/{name}")
.replaceAll("/apps/configure/[^/]+", "/apps/configure/{name}")
.replaceAll("/apps/trigger/[^/]+", "/apps/trigger/{name}")
.replaceAll("/apps/stop/[^/]+", "/apps/stop/{name}")
.replaceAll("/apps/deploy/[^/]+", "/apps/deploy/{name}")
// IngestionPipeline operational patterns
.replaceAll(
"/services/ingestionPipelines/deploy/[^/]+",
"/services/ingestionPipelines/deploy/{id}")
.replaceAll(
"/services/ingestionPipelines/trigger/[^/]+",
"/services/ingestionPipelines/trigger/{id}")
.replaceAll(
"/services/ingestionPipelines/toggleIngestion/[^/]+",
"/services/ingestionPipelines/toggleIngestion/{id}")
.replaceAll(
"/services/ingestionPipelines/kill/[^/]+", "/services/ingestionPipelines/kill/{id}")
.replaceAll(
"/services/ingestionPipelines/logs/[^/]+/last",
"/services/ingestionPipelines/logs/{id}/last")
.replaceAll(
"/services/ingestionPipelines/[^/]+/pipelineStatus/[^/]+",
"/services/ingestionPipelines/{fqn}/pipelineStatus/{id}")
.replaceAll(
"/services/ingestionPipelines/[^/]+/pipelineStatus",
"/services/ingestionPipelines/{fqn}/pipelineStatus")
// Search resource patterns
.replaceAll("/search/get/[^/]+/doc/[^/]+", "/search/get/{index}/doc/{id}")
// User authentication & security patterns
.replaceAll("/users/generateToken/[^/]+", "/users/generateToken/{id}")
.replaceAll("/users/token/[^/]+", "/users/token/{id}")
.replaceAll("/users/auth-mechanism/[^/]+", "/users/auth-mechanism/{id}")
// Feed & discussion patterns
.replaceAll("/feed/tasks/[^/]+/resolve", "/feed/tasks/{id}/resolve")
.replaceAll("/feed/tasks/[^/]+/close", "/feed/tasks/{id}/close")
.replaceAll("/feed/tasks/[^/]+", "/feed/tasks/{id}")
.replaceAll("/feed/[^/]+/posts/[^/]+", "/feed/{threadId}/posts/{postId}")
.replaceAll("/feed/[^/]+/posts", "/feed/{id}/posts")
.replaceAll("/feed/[^/]+", "/feed/{threadId}")
// System & configuration patterns
.replaceAll("/system/settings/[^/]+", "/system/settings/{name}")
.replaceAll("/system/settings/reset/[^/]+", "/system/settings/reset/{name}")
// DocStore patterns
.replaceAll(
"/docStore/validateTemplate/[^/]+", "/docStore/validateTemplate/{templateName}")
// Handle remaining timestamp patterns
.replaceAll("/[0-9]{10,13}", "/{timestamp}");
// Ensure we don't have empty path segments
normalizedUri = normalizedUri.replaceAll("/+", "/");
// Limit to reasonable URI length to prevent edge cases
if (normalizedUri.length() > 100) {
// For very long URIs, just use the first few path segments
String[] segments = normalizedUri.split("/");
if (segments.length > 5) {
normalizedUri = String.join("/", java.util.Arrays.copyOfRange(segments, 0, 5)) + "/...";
}
}
return normalizedUri.isEmpty() ? "/" : normalizedUri;
}
}

View File

@ -138,8 +138,7 @@ public class MicrometerBundle implements ConfiguredBundle<OpenMetadataApplicatio
// HTTP request histogram
io.micrometer.core.instrument.Timer.builder("http_server_requests_sec")
.description("HTTP methods duration")
.publishPercentileHistogram()
.serviceLevelObjectives(
.sla(
java.time.Duration.ofMillis(10),
java.time.Duration.ofMillis(100),
java.time.Duration.ofSeconds(1),
@ -153,8 +152,7 @@ public class MicrometerBundle implements ConfiguredBundle<OpenMetadataApplicatio
// JDBI request histogram
io.micrometer.core.instrument.Timer.builder("jdbi_requests_seconds")
.description("jdbi requests duration distribution")
.publishPercentileHistogram()
.serviceLevelObjectives(
.sla(
java.time.Duration.ofMillis(10),
java.time.Duration.ofMillis(100),
java.time.Duration.ofSeconds(1),

View File

@ -1,5 +1,8 @@
package org.openmetadata.service.monitoring;
import static org.openmetadata.service.monitoring.MetricUtils.LATENCY_SLA_BUCKETS;
import static org.openmetadata.service.monitoring.MetricUtils.normalizeUri;
import io.micrometer.core.instrument.Counter;
import io.micrometer.core.instrument.DistributionSummary;
import io.micrometer.core.instrument.MeterRegistry;
@ -51,12 +54,7 @@ public class OpenMetadataMetrics {
this.httpRequestTimer =
Timer.builder("http.server.requests")
.description("HTTP server request duration")
.publishPercentileHistogram()
.serviceLevelObjectives(
Duration.ofMillis(50),
Duration.ofMillis(100),
Duration.ofMillis(500),
Duration.ofSeconds(1))
.sla(LATENCY_SLA_BUCKETS)
.register(meterRegistry);
this.httpRequestCounter =
@ -68,16 +66,14 @@ public class OpenMetadataMetrics {
DistributionSummary.builder("http.server.response.size")
.description("HTTP response size in bytes")
.baseUnit("bytes")
.publishPercentileHistogram()
.serviceLevelObjectives(1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216)
.register(meterRegistry);
// Initialize Database metrics
this.jdbiQueryTimer =
Timer.builder("db.query.duration")
.description("Database query duration")
.publishPercentileHistogram()
.serviceLevelObjectives(
Duration.ofMillis(10), Duration.ofMillis(50), Duration.ofMillis(100))
.sla(LATENCY_SLA_BUCKETS)
.register(meterRegistry);
this.jdbiConnectionCounter =
@ -123,7 +119,7 @@ public class OpenMetadataMetrics {
this.pipelineExecutionTimer =
Timer.builder("pipeline.execution.duration")
.description("Pipeline execution duration")
.publishPercentileHistogram()
.sla(LATENCY_SLA_BUCKETS)
.register(meterRegistry);
// Initialize Authentication metrics
@ -265,14 +261,6 @@ public class OpenMetadataMetrics {
.register(meterRegistry);
}
// Utility methods
private String normalizeUri(String uri) {
// Normalize URIs to avoid high cardinality
// Replace IDs with placeholders
return uri.replaceAll("/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}", "/{id}")
.replaceAll("/[0-9]+", "/{id}");
}
private String getStatusClass(int status) {
if (status >= 100 && status < 200) return "1xx";
if (status >= 200 && status < 300) return "2xx";

View File

@ -1,5 +1,8 @@
package org.openmetadata.service.monitoring;
import static org.openmetadata.service.monitoring.MetricUtils.LATENCY_SLA_BUCKETS;
import static org.openmetadata.service.monitoring.MetricUtils.normalizeUri;
import io.micrometer.core.instrument.Gauge;
import io.micrometer.core.instrument.Metrics;
import io.micrometer.core.instrument.Timer;
@ -48,14 +51,14 @@ public class RequestLatencyContext {
public static void startRequest(String endpoint) {
RequestContext context = new RequestContext(endpoint);
requestContext.set(context);
String normalizedEndpoint = normalizeUri(endpoint);
requestTimers.computeIfAbsent(
endpoint,
normalizedEndpoint,
k ->
Timer.builder("request.latency.total")
.tag(ENDPOINT, endpoint)
.tag(ENDPOINT, normalizedEndpoint)
.description("Total request latency")
.publishPercentileHistogram()
.sla(LATENCY_SLA_BUCKETS)
.register(Metrics.globalRegistry));
context.requestTimerSample = Timer.start(Metrics.globalRegistry);
context.internalTimerStartNanos = System.nanoTime();
@ -128,10 +131,11 @@ public class RequestLatencyContext {
RequestContext context = requestContext.get();
if (context == null) return;
String normalizedEndpoint = normalizeUri(context.endpoint);
try {
// Stop request timer
if (context.requestTimerSample != null) {
Timer requestTimer = requestTimers.get(context.endpoint);
Timer requestTimer = requestTimers.get(normalizedEndpoint);
if (requestTimer != null) {
context.totalTime = context.requestTimerSample.stop(requestTimer);
}
@ -145,47 +149,47 @@ public class RequestLatencyContext {
// This gives us the total DB time for THIS request
Timer dbTimer =
databaseTimers.computeIfAbsent(
context.endpoint,
normalizedEndpoint,
k ->
Timer.builder("request.latency.database")
.tag(ENDPOINT, context.endpoint)
.tag(ENDPOINT, normalizedEndpoint)
.description("Total database latency per request")
.publishPercentileHistogram()
.sla(LATENCY_SLA_BUCKETS)
.register(Metrics.globalRegistry));
dbTimer.record(context.dbTime, java.util.concurrent.TimeUnit.NANOSECONDS);
// Record total search time for THIS request
Timer searchTimer =
searchTimers.computeIfAbsent(
context.endpoint,
normalizedEndpoint,
k ->
Timer.builder("request.latency.search")
.tag(ENDPOINT, context.endpoint)
.tag(ENDPOINT, normalizedEndpoint)
.description("Total search latency per request")
.publishPercentileHistogram()
.sla(LATENCY_SLA_BUCKETS)
.register(Metrics.globalRegistry));
searchTimer.record(context.searchTime, java.util.concurrent.TimeUnit.NANOSECONDS);
// Record internal processing time for THIS request
Timer internalTimer =
internalTimers.computeIfAbsent(
context.endpoint,
normalizedEndpoint,
k ->
Timer.builder("request.latency.internal")
.tag(ENDPOINT, context.endpoint)
.tag(ENDPOINT, normalizedEndpoint)
.description("Internal processing latency per request")
.publishPercentileHistogram()
.sla(LATENCY_SLA_BUCKETS)
.register(Metrics.globalRegistry));
internalTimer.record(context.internalTime, java.util.concurrent.TimeUnit.NANOSECONDS);
// Record operation counts as distribution summaries to get avg/max/percentiles
if (context.dbOperationCount > 0) {
Metrics.summary("request.operations.database", ENDPOINT, context.endpoint)
Metrics.summary("request.operations.database", ENDPOINT, normalizedEndpoint)
.record(context.dbOperationCount);
}
if (context.searchOperationCount > 0) {
Metrics.summary("request.operations.search", ENDPOINT, context.endpoint)
Metrics.summary("request.operations.search", ENDPOINT, normalizedEndpoint)
.record(context.searchOperationCount);
}
@ -198,23 +202,23 @@ public class RequestLatencyContext {
// Get or create percentage holder for this endpoint
PercentageHolder holder =
percentageHolders.computeIfAbsent(
context.endpoint,
normalizedEndpoint,
k -> {
PercentageHolder newHolder = new PercentageHolder();
// Register gauges that read from the atomic references
Gauge.builder("request.percentage.database", newHolder.databasePercent::get)
.tag(ENDPOINT, context.endpoint)
.tag(ENDPOINT, normalizedEndpoint)
.description("Percentage of request time spent in database operations")
.register(Metrics.globalRegistry);
Gauge.builder("request.percentage.search", newHolder.searchPercent::get)
.tag(ENDPOINT, context.endpoint)
.tag(ENDPOINT, normalizedEndpoint)
.description("Percentage of request time spent in search operations")
.register(Metrics.globalRegistry);
Gauge.builder("request.percentage.internal", newHolder.internalPercent::get)
.tag(ENDPOINT, context.endpoint)
.tag(ENDPOINT, normalizedEndpoint)
.description("Percentage of request time spent in internal processing")
.register(Metrics.globalRegistry);

View File

@ -39,10 +39,11 @@ class RequestLatencyContextTest {
simulateWork(20);
String normalizedEndpoint = MetricUtils.normalizeUri(endpoint);
RequestLatencyContext.endRequest();
Timer totalTimer = Metrics.timer("request.latency.total", "endpoint", endpoint);
Timer dbTimer = Metrics.timer("request.latency.database", "endpoint", endpoint);
Timer internalTimer = Metrics.timer("request.latency.internal", "endpoint", endpoint);
Timer totalTimer = Metrics.timer("request.latency.total", "endpoint", normalizedEndpoint);
Timer dbTimer = Metrics.timer("request.latency.database", "endpoint", normalizedEndpoint);
Timer internalTimer = Metrics.timer("request.latency.internal", "endpoint", normalizedEndpoint);
assertEquals(1, totalTimer.count(), "Should have recorded 1 request");
assertEquals(1, dbTimer.count(), "Should have recorded 1 request with database operations");

View File

@ -34,15 +34,16 @@ class RequestLatencyTrackingSimpleTest {
simulateWork(30);
RequestLatencyContext.endRequest();
Timer totalTimer = Metrics.timer("request.latency.total", "endpoint", endpoint);
String normalizedEndpoint = MetricUtils.normalizeUri(endpoint);
Timer totalTimer = Metrics.timer("request.latency.total", "endpoint", normalizedEndpoint);
assertNotNull(totalTimer);
assertEquals(1, totalTimer.count(), "Should have recorded 1 request");
Timer dbTimer = Metrics.timer("request.latency.database", "endpoint", endpoint);
Timer dbTimer = Metrics.timer("request.latency.database", "endpoint", normalizedEndpoint);
assertNotNull(dbTimer);
assertEquals(1, dbTimer.count(), "Should have recorded 1 database operation");
Timer internalTimer = Metrics.timer("request.latency.internal", "endpoint", endpoint);
Timer internalTimer = Metrics.timer("request.latency.internal", "endpoint", normalizedEndpoint);
assertNotNull(internalTimer);
assertEquals(1, internalTimer.count(), "Should have recorded internal processing");
@ -54,9 +55,14 @@ class RequestLatencyTrackingSimpleTest {
LOG.info("Database time: {} ms", dbMs);
LOG.info("Internal time: {} ms", internalMs);
assertTrue(totalMs >= 150 && totalMs <= 210, "Total time should be ~180ms, got: " + totalMs);
assertTrue(dbMs >= 80 && dbMs <= 120, "Database time should be ~100ms, got: " + dbMs);
// Timing expectations: 500ms + 100ms + 30ms = 630ms total
// DB time: 100ms during database operation
// Internal time: 500ms (before DB) + 30ms (after DB) = 530ms
// Allow generous bounds for system timing variations
assertTrue(totalMs >= 500 && totalMs <= 1000, "Total time should be ~630ms, got: " + totalMs);
assertTrue(dbMs >= 80 && dbMs <= 150, "Database time should be ~100ms, got: " + dbMs);
assertTrue(
internalMs >= 60 && internalMs <= 100, "Internal time should be ~80ms, got: " + internalMs);
internalMs >= 400 && internalMs <= 700,
"Internal time should be ~530ms, got: " + internalMs);
}
}

View File

@ -123,10 +123,11 @@ class RequestLatencyTrackingTest extends OpenMetadataApplicationTest {
// Check for specific endpoint metrics - the actual metrics use the endpoint path as-is
LOG.info("Looking for metrics with endpoint: {}", getEndpoint);
String normalizedUri = MetricUtils.normalizeUri("v1/tables/" + createdTable.getId());
// Parse and verify latency metrics
assertLatencyMetricsExist(prometheusMetrics, "request_latency_total", getEndpoint);
assertLatencyMetricsExist(prometheusMetrics, "request_latency_database", getEndpoint);
assertLatencyMetricsExist(prometheusMetrics, "request_latency_internal", getEndpoint);
assertLatencyMetricsExist(prometheusMetrics, "request_latency_total", normalizedUri);
assertLatencyMetricsExist(prometheusMetrics, "request_latency_database", normalizedUri);
assertLatencyMetricsExist(prometheusMetrics, "request_latency_internal", normalizedUri);
}
@Test
@ -205,7 +206,7 @@ class RequestLatencyTrackingTest extends OpenMetadataApplicationTest {
WebTarget complexTarget =
getResource("tables/" + createdTable.getId())
.queryParam("fields", "owners,tags,followers,columns,domain,dataProducts,extension")
.queryParam("fields", "owners,tags,followers,columns,domains,dataProducts,extension")
.queryParam("include", "all");
TestUtils.get(complexTarget, Table.class, ADMIN_AUTH_HEADERS);
@ -218,7 +219,8 @@ class RequestLatencyTrackingTest extends OpenMetadataApplicationTest {
String endpoint = "v1/tables/" + createdTable.getId();
// Verify database operation count
assertLatencyMetricsExist(prometheusMetrics, "request_latency_database", endpoint);
assertLatencyMetricsExist(
prometheusMetrics, "request_latency_database", MetricUtils.normalizeUri(endpoint));
// Check that we have multiple database operations recorded
assertTrue(
@ -247,7 +249,9 @@ class RequestLatencyTrackingTest extends OpenMetadataApplicationTest {
private void assertLatencyMetricsExist(
String prometheusOutput, String metricName, String endpoint) {
// Look for metrics that contain the metric name with the endpoint label
String pattern = metricName + "_seconds.*endpoint=\"" + endpoint.replace("/", "\\/") + "\"";
// Escape regex special characters in the endpoint string
String escapedEndpoint = java.util.regex.Pattern.quote(endpoint);
String pattern = metricName + "_seconds.*endpoint=\"" + escapedEndpoint + "\"";
assertTrue(
prometheusOutput.matches("(?s).*" + pattern + ".*"),
String.format(