diff --git a/openmetadata-docs/content/v1.1.0-snapshot/releases/roadmap/index.md b/openmetadata-docs/content/v1.1.0-snapshot/releases/roadmap/index.md index f7afe54d1a6..a86d8554e52 100644 --- a/openmetadata-docs/content/v1.1.0-snapshot/releases/roadmap/index.md +++ b/openmetadata-docs/content/v1.1.0-snapshot/releases/roadmap/index.md @@ -22,7 +22,7 @@ You can check the latest release [here](/releases/all-releases). - We will be adding support for NoSQL DB entities and Services with APIs - Support for Long Entity Names such as S3 paths - Import/Export support at all entities -- Tag Propgation using Import/Export +- Tag Propagation using Import/Export - Thumbs up & down to capture popularity of the Entities {% /tile %} diff --git a/openmetadata-service/pom.xml b/openmetadata-service/pom.xml index a435b556f38..fb61132ef84 100644 --- a/openmetadata-service/pom.xml +++ b/openmetadata-service/pom.xml @@ -227,6 +227,10 @@ org.flywaydb flyway-mysql + + io.github.resilience4j + resilience4j-retry + diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/events/scheduled/PipelineServiceStatusJob.java b/openmetadata-service/src/main/java/org/openmetadata/service/events/scheduled/PipelineServiceStatusJob.java index 855f7db572f..dc727f02890 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/events/scheduled/PipelineServiceStatusJob.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/events/scheduled/PipelineServiceStatusJob.java @@ -4,9 +4,13 @@ import static org.openmetadata.service.events.scheduled.PipelineServiceStatusJob import static org.openmetadata.service.events.scheduled.PipelineServiceStatusJobHandler.JOB_CONTEXT_METER_REGISTRY; import static org.openmetadata.service.events.scheduled.PipelineServiceStatusJobHandler.JOB_CONTEXT_PIPELINE_SERVICE_CLIENT; +import io.github.resilience4j.retry.Retry; +import io.github.resilience4j.retry.RetryConfig; import io.micrometer.core.instrument.Counter; import io.micrometer.prometheus.PrometheusMeterRegistry; +import java.time.Duration; import java.util.Map; +import java.util.function.Supplier; import javax.ws.rs.core.Response; import lombok.extern.slf4j.Slf4j; import org.openmetadata.sdk.PipelineServiceClient; @@ -22,6 +26,9 @@ public class PipelineServiceStatusJob implements Job { private static final String UNHEALTHY_TAG_NAME = "unhealthy"; private static final String CLUSTER_TAG_NAME = "clusterName"; + private static final Integer MAX_ATTEMPTS = 3; + private static final Integer BACKOFF_TIME_SECONDS = 5; + @Override public void execute(JobExecutionContext jobExecutionContext) { @@ -39,11 +46,35 @@ public class PipelineServiceStatusJob implements Job { } } + private String getServiceStatus(PipelineServiceClient pipelineServiceClient) { + RetryConfig retryConfig = + RetryConfig.custom() + .maxAttempts(MAX_ATTEMPTS) + .waitDuration(Duration.ofMillis(BACKOFF_TIME_SECONDS * 1_000L)) + .retryOnResult(response -> !HEALTHY_STATUS.equals(response)) + .failAfterMaxAttempts(false) + .build(); + + Retry retry = Retry.of("getServiceStatus", retryConfig); + + Supplier responseSupplier = + () -> { + try { + Response response = pipelineServiceClient.getServiceStatus(); + Map responseMap = (Map) response.getEntity(); + return responseMap.get(STATUS_KEY) == null ? "unhealthy" : responseMap.get(STATUS_KEY); + } catch (Exception e) { + throw new RuntimeException(e); + } + }; + + return retry.executeSupplier(responseSupplier); + } + private void registerStatusMetric( PipelineServiceClient pipelineServiceClient, PrometheusMeterRegistry meterRegistry, String clusterName) { - Response response = pipelineServiceClient.getServiceStatus(); - Map responseMap = (Map) response.getEntity(); - if (responseMap.get(STATUS_KEY) == null || !HEALTHY_STATUS.equals(responseMap.get(STATUS_KEY))) { + String status = getServiceStatus(pipelineServiceClient); + if (!HEALTHY_STATUS.equals(status)) { publishUnhealthyCounter(meterRegistry, clusterName); } } diff --git a/pom.xml b/pom.xml index 58fdd37f604..c4af9f1b07e 100644 --- a/pom.xml +++ b/pom.xml @@ -105,6 +105,7 @@ 2.6 1.18.26 10.1.7 + 1.7.0 @@ -458,6 +459,11 @@ ${org.junit.jupiter.version} test + + io.github.resilience4j + resilience4j-retry + ${resilience4j.version} + org.reflections reflections