From 94d297d454cb45d4438c386203d62f748cae3b05 Mon Sep 17 00:00:00 2001 From: Mohit Yadav <105265192+mohityadav766@users.noreply.github.com> Date: Thu, 24 Jul 2025 02:24:00 +0530 Subject: [PATCH] Fix Metrics Buckets and Url Normalization (#22536) * Fix Metrics Buckets and Url Normalization * Add more uri to normalization * Maintain Normalized Endpoints * Fix more tags * Fix Tests --------- Co-authored-by: Sriharsha Chintalapani --- .../service/monitoring/MetricUtils.java | 247 ++++++++++++++++++ .../service/monitoring/MicrometerBundle.java | 6 +- .../monitoring/OpenMetadataMetrics.java | 26 +- .../monitoring/RequestLatencyContext.java | 44 ++-- .../monitoring/RequestLatencyContextTest.java | 7 +- .../RequestLatencyTrackingSimpleTest.java | 18 +- .../RequestLatencyTrackingTest.java | 16 +- 7 files changed, 306 insertions(+), 58 deletions(-) create mode 100644 openmetadata-service/src/main/java/org/openmetadata/service/monitoring/MetricUtils.java diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/monitoring/MetricUtils.java b/openmetadata-service/src/main/java/org/openmetadata/service/monitoring/MetricUtils.java new file mode 100644 index 00000000000..f69aa3add83 --- /dev/null +++ b/openmetadata-service/src/main/java/org/openmetadata/service/monitoring/MetricUtils.java @@ -0,0 +1,247 @@ +package org.openmetadata.service.monitoring; + +import java.time.Duration; +import lombok.experimental.UtilityClass; + +@UtilityClass +public class MetricUtils { + // Standard SLA buckets for all latency metrics (10 buckets total) + public static final Duration[] LATENCY_SLA_BUCKETS = { + Duration.ofMillis(10), // 10ms + Duration.ofMillis(25), // 25ms + Duration.ofMillis(50), // 50ms + Duration.ofMillis(100), // 100ms + Duration.ofMillis(250), // 250ms + Duration.ofMillis(500), // 500ms + Duration.ofSeconds(1), // 1s + Duration.ofMillis(2500), // 2.5s + Duration.ofSeconds(5), // 5s + Duration.ofSeconds(30) // 30s + }; + + public static String normalizeUri(String uri) { + // Normalize URIs to avoid high cardinality + if (uri == null || uri.isEmpty()) { + return "/unknown"; + } + + // Remove query parameters to reduce cardinality + String normalizedUri = uri.split("\\?")[0]; + + // Replace various ID patterns with placeholders + normalizedUri = + normalizedUri + // UUID patterns (e.g., /api/v1/tables/12345678-1234-1234-1234-123456789abc) + .replaceAll("/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}", "/{id}") + // Numeric IDs (e.g., /api/v1/tables/123456) + .replaceAll("/\\d+", "/{id}") + // Entity names that contain special characters or spaces (encoded) + .replaceAll("/[^/]*%[0-9a-fA-F]{2}[^/]*", "/{name}") + // Long alphanumeric strings that might be encoded names + .replaceAll("/[a-zA-Z0-9_.-]{20,}", "/{name}") + // Handle common OpenMetadata API patterns - split into multiple patterns to reduce + // complexity + .replaceAll( + "/(tables|databases|services|pipelines|topics|dashboards|charts|containers)/[^/]+/[^/]+", + "/$1/{name}/{subresource}") + .replaceAll( + "/(glossaryTerms|tags|policies|roles|users|teams|dataModels|searchIndexes)/[^/]+/[^/]+", + "/$1/{name}/{subresource}") + .replaceAll( + "/(testSuites|testCases|webhooks|bots|automations|applications|connections)/[^/]+/[^/]+", + "/$1/{name}/{subresource}") + .replaceAll( + "/(secrets|storedProcedures|databaseSchemas|mlModels|reports|metrics)/[^/]+/[^/]+", + "/$1/{name}/{subresource}") + .replaceAll( + "/(queries|suggestions|lineage|events|feeds|conversations|activities)/[^/]+/[^/]+", + "/$1/{name}/{subresource}") + .replaceAll( + "/(tasks|kpis|domains|dataProducts|governanceWorkflows)/[^/]+/[^/]+", + "/$1/{name}/{subresource}") + .replaceAll( + "/(tables|databases|services|pipelines|topics|dashboards|charts|containers)/[^/]+", + "/$1/{name}") + .replaceAll( + "/(glossaryTerms|tags|policies|roles|users|teams|dataModels|searchIndexes)/[^/]+", + "/$1/{name}") + .replaceAll( + "/(testSuites|testCases|webhooks|bots|automations|applications|connections)/[^/]+", + "/$1/{name}") + .replaceAll( + "/(secrets|storedProcedures|databaseSchemas|mlModels|reports|metrics)/[^/]+", + "/$1/{name}") + .replaceAll( + "/(queries|suggestions|lineage|events|feeds|conversations|activities)/[^/]+", + "/$1/{name}") + .replaceAll( + "/(tasks|kpis|domains|dataProducts|governanceWorkflows)/[^/]+", "/$1/{name}") + // Analytics deep paths with timestamps and multiple segments + .replaceAll( + "/analytics/dataInsights/[^/]+/[^/]+", "/analytics/dataInsights/{type}/{id}") + .replaceAll( + "/analytics/web/events/[^/]+/[^/]+/collect", + "/analytics/web/events/{name}/{timestamp}/collect") + // Data quality multi-level paths + .replaceAll("/dataQuality/testCases/[^/]+/[^/]+", "/dataQuality/testCases/{type}/{id}") + .replaceAll( + "/dataQuality/testSuites/[^/]+/[^/]+", "/dataQuality/testSuites/{id}/{subresource}") + // Complex lineage patterns with multiple entities + .replaceAll( + "/lineage/[^/]+/[^/]+/[^/]+/[^/]+", + "/lineage/{fromEntity}/{fromId}/{toEntity}/{toId}") + .replaceAll( + "/lineage/[^/]+/name/[^/]+/[^/]+/name/[^/]+", + "/lineage/{fromEntity}/name/{fromFQN}/{toEntity}/name/{toFQN}") + .replaceAll( + "/lineage/[^/]+/[^/]+/type/[^/]+", + "/lineage/{entityType}/{entityId}/type/{lineageSource}") + // Event subscriptions complex paths + .replaceAll( + "/events/subscriptions/[^/]+/[^/]+/[^/]+", + "/events/subscriptions/{id}/{resource}/{subresource}") + .replaceAll( + "/events/subscriptions/name/[^/]+/[^/]+", + "/events/subscriptions/name/{name}/{resource}") + // Service nested paths + .replaceAll("/services/[^/]+/[^/]+/[^/]+", "/services/{serviceType}/{id}/{subresource}") + .replaceAll( + "/services/testConnectionDefinitions/[^/]+", + "/services/testConnectionDefinitions/{connectionType}") + // Governance workflow paths + .replaceAll( + "/governance/[^/]+/[^/]+/[^/]+", + "/governance/{workflowType}/{definitionName}/{instanceId}") + // Drive/file management paths + .replaceAll("/drives/[^/]+/[^/]+/[^/]+", "/drives/{type}/{id}/{subresource}") + // Universal entity sub-resources (versions, followers, results, etc.) + .replaceAll("/([^/]+)/([^/]+)/versions/[^/]+", "/$1/$2/versions/{version}") + .replaceAll("/([^/]+)/([^/]+)/followers/[^/]+", "/$1/$2/followers/{userId}") + .replaceAll("/([^/]+)/([^/]+)/results/[^/]+", "/$1/$2/results/{result}") + .replaceAll( + "/([^/]+)/([^/]+)/results/before/[^/]+", "/$1/$2/results/before/{timestamp}") + .replaceAll( + "/([^/]+)/name/([^/]+)/(export|import|exportAsync|importAsync)", "/$1/name/$2/$3") + // SCIM paths + .replaceAll("/scim/(Users|Groups)/[^/]+", "/scim/$1/{id}") + // Permission resource patterns + .replaceAll("/permissions/[^/]+/[^/]+", "/permissions/{resource}/{id}") + .replaceAll("/permissions/[^/]+/name/[^/]+", "/permissions/{resource}/name/{name}") + .replaceAll("/permissions/view/[^/]+", "/permissions/view/{entityType}") + .replaceAll("/permissions/debug/user/[^/]+", "/permissions/debug/user/{username}") + .replaceAll("/permissions/debug/evaluate", "/permissions/debug/evaluate") + .replaceAll("/permissions/[^/]+", "/permissions/{resource}") + // EventSubscription complex patterns (HIGH PRIORITY - prevents cardinality explosion) + .replaceAll( + "/events/subscriptions/name/[^/]+/status/[^/]+", + "/events/subscriptions/name/{name}/status/{destinationId}") + .replaceAll( + "/events/subscriptions/[^/]+/status/[^/]+", + "/events/subscriptions/{id}/status/{destinationId}") + .replaceAll( + "/events/subscriptions/[^/]+/resources", + "/events/subscriptions/{alertType}/resources") + .replaceAll( + "/events/subscriptions/id/[^/]+/listEvents", + "/events/subscriptions/id/{id}/listEvents") + .replaceAll( + "/events/subscriptions/id/[^/]+/eventsRecord", + "/events/subscriptions/id/{subscriptionId}/eventsRecord") + .replaceAll( + "/events/subscriptions/name/[^/]+/eventsRecord", + "/events/subscriptions/name/{subscriptionName}/eventsRecord") + .replaceAll( + "/events/subscriptions/id/[^/]+/diagnosticInfo", + "/events/subscriptions/id/{subscriptionId}/diagnosticInfo") + .replaceAll( + "/events/subscriptions/name/[^/]+/diagnosticInfo", + "/events/subscriptions/name/{subscriptionName}/diagnosticInfo") + .replaceAll( + "/events/subscriptions/id/[^/]+/failedEvents", + "/events/subscriptions/id/{id}/failedEvents") + .replaceAll( + "/events/subscriptions/name/[^/]+/failedEvents", + "/events/subscriptions/name/{eventSubscriptionName}/failedEvents") + .replaceAll( + "/events/subscriptions/id/[^/]+/listSuccessfullySentChangeEvents", + "/events/subscriptions/id/{id}/listSuccessfullySentChangeEvents") + .replaceAll( + "/events/subscriptions/name/[^/]+/listSuccessfullySentChangeEvents", + "/events/subscriptions/name/{eventSubscriptionName}/listSuccessfullySentChangeEvents") + .replaceAll( + "/events/subscriptions/id/[^/]+/destinations", + "/events/subscriptions/id/{eventSubscriptionId}/destinations") + .replaceAll( + "/events/subscriptions/name/[^/]+/destinations", + "/events/subscriptions/name/{eventSubscriptionName}/destinations") + .replaceAll( + "/events/subscriptions/name/[^/]+/syncOffset", + "/events/subscriptions/name/{eventSubscriptionName}/syncOffset") + // App management patterns + .replaceAll("/apps/name/[^/]+/status", "/apps/name/{name}/status") + .replaceAll("/apps/name/[^/]+/extension", "/apps/name/{name}/extension") + .replaceAll("/apps/name/[^/]+/logs", "/apps/name/{name}/logs") + .replaceAll("/apps/name/[^/]+/runs/latest", "/apps/name/{name}/runs/latest") + .replaceAll("/apps/schedule/[^/]+", "/apps/schedule/{name}") + .replaceAll("/apps/configure/[^/]+", "/apps/configure/{name}") + .replaceAll("/apps/trigger/[^/]+", "/apps/trigger/{name}") + .replaceAll("/apps/stop/[^/]+", "/apps/stop/{name}") + .replaceAll("/apps/deploy/[^/]+", "/apps/deploy/{name}") + // IngestionPipeline operational patterns + .replaceAll( + "/services/ingestionPipelines/deploy/[^/]+", + "/services/ingestionPipelines/deploy/{id}") + .replaceAll( + "/services/ingestionPipelines/trigger/[^/]+", + "/services/ingestionPipelines/trigger/{id}") + .replaceAll( + "/services/ingestionPipelines/toggleIngestion/[^/]+", + "/services/ingestionPipelines/toggleIngestion/{id}") + .replaceAll( + "/services/ingestionPipelines/kill/[^/]+", "/services/ingestionPipelines/kill/{id}") + .replaceAll( + "/services/ingestionPipelines/logs/[^/]+/last", + "/services/ingestionPipelines/logs/{id}/last") + .replaceAll( + "/services/ingestionPipelines/[^/]+/pipelineStatus/[^/]+", + "/services/ingestionPipelines/{fqn}/pipelineStatus/{id}") + .replaceAll( + "/services/ingestionPipelines/[^/]+/pipelineStatus", + "/services/ingestionPipelines/{fqn}/pipelineStatus") + // Search resource patterns + .replaceAll("/search/get/[^/]+/doc/[^/]+", "/search/get/{index}/doc/{id}") + // User authentication & security patterns + .replaceAll("/users/generateToken/[^/]+", "/users/generateToken/{id}") + .replaceAll("/users/token/[^/]+", "/users/token/{id}") + .replaceAll("/users/auth-mechanism/[^/]+", "/users/auth-mechanism/{id}") + // Feed & discussion patterns + .replaceAll("/feed/tasks/[^/]+/resolve", "/feed/tasks/{id}/resolve") + .replaceAll("/feed/tasks/[^/]+/close", "/feed/tasks/{id}/close") + .replaceAll("/feed/tasks/[^/]+", "/feed/tasks/{id}") + .replaceAll("/feed/[^/]+/posts/[^/]+", "/feed/{threadId}/posts/{postId}") + .replaceAll("/feed/[^/]+/posts", "/feed/{id}/posts") + .replaceAll("/feed/[^/]+", "/feed/{threadId}") + // System & configuration patterns + .replaceAll("/system/settings/[^/]+", "/system/settings/{name}") + .replaceAll("/system/settings/reset/[^/]+", "/system/settings/reset/{name}") + // DocStore patterns + .replaceAll( + "/docStore/validateTemplate/[^/]+", "/docStore/validateTemplate/{templateName}") + // Handle remaining timestamp patterns + .replaceAll("/[0-9]{10,13}", "/{timestamp}"); + + // Ensure we don't have empty path segments + normalizedUri = normalizedUri.replaceAll("/+", "/"); + + // Limit to reasonable URI length to prevent edge cases + if (normalizedUri.length() > 100) { + // For very long URIs, just use the first few path segments + String[] segments = normalizedUri.split("/"); + if (segments.length > 5) { + normalizedUri = String.join("/", java.util.Arrays.copyOfRange(segments, 0, 5)) + "/..."; + } + } + + return normalizedUri.isEmpty() ? "/" : normalizedUri; + } +} diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/monitoring/MicrometerBundle.java b/openmetadata-service/src/main/java/org/openmetadata/service/monitoring/MicrometerBundle.java index 91ec2b51201..6cb90cdfc9e 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/monitoring/MicrometerBundle.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/monitoring/MicrometerBundle.java @@ -138,8 +138,7 @@ public class MicrometerBundle implements ConfiguredBundle= 100 && status < 200) return "1xx"; if (status >= 200 && status < 300) return "2xx"; diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/monitoring/RequestLatencyContext.java b/openmetadata-service/src/main/java/org/openmetadata/service/monitoring/RequestLatencyContext.java index ce06f12d883..80fdd961584 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/monitoring/RequestLatencyContext.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/monitoring/RequestLatencyContext.java @@ -1,5 +1,8 @@ package org.openmetadata.service.monitoring; +import static org.openmetadata.service.monitoring.MetricUtils.LATENCY_SLA_BUCKETS; +import static org.openmetadata.service.monitoring.MetricUtils.normalizeUri; + import io.micrometer.core.instrument.Gauge; import io.micrometer.core.instrument.Metrics; import io.micrometer.core.instrument.Timer; @@ -48,14 +51,14 @@ public class RequestLatencyContext { public static void startRequest(String endpoint) { RequestContext context = new RequestContext(endpoint); requestContext.set(context); - + String normalizedEndpoint = normalizeUri(endpoint); requestTimers.computeIfAbsent( - endpoint, + normalizedEndpoint, k -> Timer.builder("request.latency.total") - .tag(ENDPOINT, endpoint) + .tag(ENDPOINT, normalizedEndpoint) .description("Total request latency") - .publishPercentileHistogram() + .sla(LATENCY_SLA_BUCKETS) .register(Metrics.globalRegistry)); context.requestTimerSample = Timer.start(Metrics.globalRegistry); context.internalTimerStartNanos = System.nanoTime(); @@ -128,10 +131,11 @@ public class RequestLatencyContext { RequestContext context = requestContext.get(); if (context == null) return; + String normalizedEndpoint = normalizeUri(context.endpoint); try { // Stop request timer if (context.requestTimerSample != null) { - Timer requestTimer = requestTimers.get(context.endpoint); + Timer requestTimer = requestTimers.get(normalizedEndpoint); if (requestTimer != null) { context.totalTime = context.requestTimerSample.stop(requestTimer); } @@ -145,47 +149,47 @@ public class RequestLatencyContext { // This gives us the total DB time for THIS request Timer dbTimer = databaseTimers.computeIfAbsent( - context.endpoint, + normalizedEndpoint, k -> Timer.builder("request.latency.database") - .tag(ENDPOINT, context.endpoint) + .tag(ENDPOINT, normalizedEndpoint) .description("Total database latency per request") - .publishPercentileHistogram() + .sla(LATENCY_SLA_BUCKETS) .register(Metrics.globalRegistry)); dbTimer.record(context.dbTime, java.util.concurrent.TimeUnit.NANOSECONDS); // Record total search time for THIS request Timer searchTimer = searchTimers.computeIfAbsent( - context.endpoint, + normalizedEndpoint, k -> Timer.builder("request.latency.search") - .tag(ENDPOINT, context.endpoint) + .tag(ENDPOINT, normalizedEndpoint) .description("Total search latency per request") - .publishPercentileHistogram() + .sla(LATENCY_SLA_BUCKETS) .register(Metrics.globalRegistry)); searchTimer.record(context.searchTime, java.util.concurrent.TimeUnit.NANOSECONDS); // Record internal processing time for THIS request Timer internalTimer = internalTimers.computeIfAbsent( - context.endpoint, + normalizedEndpoint, k -> Timer.builder("request.latency.internal") - .tag(ENDPOINT, context.endpoint) + .tag(ENDPOINT, normalizedEndpoint) .description("Internal processing latency per request") - .publishPercentileHistogram() + .sla(LATENCY_SLA_BUCKETS) .register(Metrics.globalRegistry)); internalTimer.record(context.internalTime, java.util.concurrent.TimeUnit.NANOSECONDS); // Record operation counts as distribution summaries to get avg/max/percentiles if (context.dbOperationCount > 0) { - Metrics.summary("request.operations.database", ENDPOINT, context.endpoint) + Metrics.summary("request.operations.database", ENDPOINT, normalizedEndpoint) .record(context.dbOperationCount); } if (context.searchOperationCount > 0) { - Metrics.summary("request.operations.search", ENDPOINT, context.endpoint) + Metrics.summary("request.operations.search", ENDPOINT, normalizedEndpoint) .record(context.searchOperationCount); } @@ -198,23 +202,23 @@ public class RequestLatencyContext { // Get or create percentage holder for this endpoint PercentageHolder holder = percentageHolders.computeIfAbsent( - context.endpoint, + normalizedEndpoint, k -> { PercentageHolder newHolder = new PercentageHolder(); // Register gauges that read from the atomic references Gauge.builder("request.percentage.database", newHolder.databasePercent::get) - .tag(ENDPOINT, context.endpoint) + .tag(ENDPOINT, normalizedEndpoint) .description("Percentage of request time spent in database operations") .register(Metrics.globalRegistry); Gauge.builder("request.percentage.search", newHolder.searchPercent::get) - .tag(ENDPOINT, context.endpoint) + .tag(ENDPOINT, normalizedEndpoint) .description("Percentage of request time spent in search operations") .register(Metrics.globalRegistry); Gauge.builder("request.percentage.internal", newHolder.internalPercent::get) - .tag(ENDPOINT, context.endpoint) + .tag(ENDPOINT, normalizedEndpoint) .description("Percentage of request time spent in internal processing") .register(Metrics.globalRegistry); diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/monitoring/RequestLatencyContextTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/monitoring/RequestLatencyContextTest.java index 2c556f4f98c..08d314ddb77 100644 --- a/openmetadata-service/src/test/java/org/openmetadata/service/monitoring/RequestLatencyContextTest.java +++ b/openmetadata-service/src/test/java/org/openmetadata/service/monitoring/RequestLatencyContextTest.java @@ -39,10 +39,11 @@ class RequestLatencyContextTest { simulateWork(20); + String normalizedEndpoint = MetricUtils.normalizeUri(endpoint); RequestLatencyContext.endRequest(); - Timer totalTimer = Metrics.timer("request.latency.total", "endpoint", endpoint); - Timer dbTimer = Metrics.timer("request.latency.database", "endpoint", endpoint); - Timer internalTimer = Metrics.timer("request.latency.internal", "endpoint", endpoint); + Timer totalTimer = Metrics.timer("request.latency.total", "endpoint", normalizedEndpoint); + Timer dbTimer = Metrics.timer("request.latency.database", "endpoint", normalizedEndpoint); + Timer internalTimer = Metrics.timer("request.latency.internal", "endpoint", normalizedEndpoint); assertEquals(1, totalTimer.count(), "Should have recorded 1 request"); assertEquals(1, dbTimer.count(), "Should have recorded 1 request with database operations"); diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/monitoring/RequestLatencyTrackingSimpleTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/monitoring/RequestLatencyTrackingSimpleTest.java index d6333e60d14..782add21fec 100644 --- a/openmetadata-service/src/test/java/org/openmetadata/service/monitoring/RequestLatencyTrackingSimpleTest.java +++ b/openmetadata-service/src/test/java/org/openmetadata/service/monitoring/RequestLatencyTrackingSimpleTest.java @@ -34,15 +34,16 @@ class RequestLatencyTrackingSimpleTest { simulateWork(30); RequestLatencyContext.endRequest(); - Timer totalTimer = Metrics.timer("request.latency.total", "endpoint", endpoint); + String normalizedEndpoint = MetricUtils.normalizeUri(endpoint); + Timer totalTimer = Metrics.timer("request.latency.total", "endpoint", normalizedEndpoint); assertNotNull(totalTimer); assertEquals(1, totalTimer.count(), "Should have recorded 1 request"); - Timer dbTimer = Metrics.timer("request.latency.database", "endpoint", endpoint); + Timer dbTimer = Metrics.timer("request.latency.database", "endpoint", normalizedEndpoint); assertNotNull(dbTimer); assertEquals(1, dbTimer.count(), "Should have recorded 1 database operation"); - Timer internalTimer = Metrics.timer("request.latency.internal", "endpoint", endpoint); + Timer internalTimer = Metrics.timer("request.latency.internal", "endpoint", normalizedEndpoint); assertNotNull(internalTimer); assertEquals(1, internalTimer.count(), "Should have recorded internal processing"); @@ -54,9 +55,14 @@ class RequestLatencyTrackingSimpleTest { LOG.info("Database time: {} ms", dbMs); LOG.info("Internal time: {} ms", internalMs); - assertTrue(totalMs >= 150 && totalMs <= 210, "Total time should be ~180ms, got: " + totalMs); - assertTrue(dbMs >= 80 && dbMs <= 120, "Database time should be ~100ms, got: " + dbMs); + // Timing expectations: 500ms + 100ms + 30ms = 630ms total + // DB time: 100ms during database operation + // Internal time: 500ms (before DB) + 30ms (after DB) = 530ms + // Allow generous bounds for system timing variations + assertTrue(totalMs >= 500 && totalMs <= 1000, "Total time should be ~630ms, got: " + totalMs); + assertTrue(dbMs >= 80 && dbMs <= 150, "Database time should be ~100ms, got: " + dbMs); assertTrue( - internalMs >= 60 && internalMs <= 100, "Internal time should be ~80ms, got: " + internalMs); + internalMs >= 400 && internalMs <= 700, + "Internal time should be ~530ms, got: " + internalMs); } } diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/monitoring/RequestLatencyTrackingTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/monitoring/RequestLatencyTrackingTest.java index 604c54cfb20..aba162a1707 100644 --- a/openmetadata-service/src/test/java/org/openmetadata/service/monitoring/RequestLatencyTrackingTest.java +++ b/openmetadata-service/src/test/java/org/openmetadata/service/monitoring/RequestLatencyTrackingTest.java @@ -123,10 +123,11 @@ class RequestLatencyTrackingTest extends OpenMetadataApplicationTest { // Check for specific endpoint metrics - the actual metrics use the endpoint path as-is LOG.info("Looking for metrics with endpoint: {}", getEndpoint); + String normalizedUri = MetricUtils.normalizeUri("v1/tables/" + createdTable.getId()); // Parse and verify latency metrics - assertLatencyMetricsExist(prometheusMetrics, "request_latency_total", getEndpoint); - assertLatencyMetricsExist(prometheusMetrics, "request_latency_database", getEndpoint); - assertLatencyMetricsExist(prometheusMetrics, "request_latency_internal", getEndpoint); + assertLatencyMetricsExist(prometheusMetrics, "request_latency_total", normalizedUri); + assertLatencyMetricsExist(prometheusMetrics, "request_latency_database", normalizedUri); + assertLatencyMetricsExist(prometheusMetrics, "request_latency_internal", normalizedUri); } @Test @@ -205,7 +206,7 @@ class RequestLatencyTrackingTest extends OpenMetadataApplicationTest { WebTarget complexTarget = getResource("tables/" + createdTable.getId()) - .queryParam("fields", "owners,tags,followers,columns,domain,dataProducts,extension") + .queryParam("fields", "owners,tags,followers,columns,domains,dataProducts,extension") .queryParam("include", "all"); TestUtils.get(complexTarget, Table.class, ADMIN_AUTH_HEADERS); @@ -218,7 +219,8 @@ class RequestLatencyTrackingTest extends OpenMetadataApplicationTest { String endpoint = "v1/tables/" + createdTable.getId(); // Verify database operation count - assertLatencyMetricsExist(prometheusMetrics, "request_latency_database", endpoint); + assertLatencyMetricsExist( + prometheusMetrics, "request_latency_database", MetricUtils.normalizeUri(endpoint)); // Check that we have multiple database operations recorded assertTrue( @@ -247,7 +249,9 @@ class RequestLatencyTrackingTest extends OpenMetadataApplicationTest { private void assertLatencyMetricsExist( String prometheusOutput, String metricName, String endpoint) { // Look for metrics that contain the metric name with the endpoint label - String pattern = metricName + "_seconds.*endpoint=\"" + endpoint.replace("/", "\\/") + "\""; + // Escape regex special characters in the endpoint string + String escapedEndpoint = java.util.regex.Pattern.quote(endpoint); + String pattern = metricName + "_seconds.*endpoint=\"" + escapedEndpoint + "\""; assertTrue( prometheusOutput.matches("(?s).*" + pattern + ".*"), String.format(