mirror of
https://github.com/datahub-project/datahub.git
synced 2025-12-04 06:33:05 +00:00
feat(open assertion spec): MVP for Snowflake DMF Assertions: update models, add assertions cli with snowflake integration (#10602)
This commit is contained in:
parent
648fd459eb
commit
81b655c82d
@ -20,6 +20,7 @@ public class Constants {
|
||||
public static final String LINEAGE_SCHEMA_FILE = "lineage.graphql";
|
||||
public static final String PROPERTIES_SCHEMA_FILE = "properties.graphql";
|
||||
public static final String FORMS_SCHEMA_FILE = "forms.graphql";
|
||||
public static final String ASSERTIONS_SCHEMA_FILE = "assertions.graphql";
|
||||
public static final String INCIDENTS_SCHEMA_FILE = "incident.graphql";
|
||||
public static final String CONNECTIONS_SCHEMA_FILE = "connection.graphql";
|
||||
public static final String BROWSE_PATH_DELIMITER = "/";
|
||||
|
||||
@ -118,7 +118,12 @@ import com.linkedin.datahub.graphql.resolvers.MeResolver;
|
||||
import com.linkedin.datahub.graphql.resolvers.assertion.AssertionRunEventResolver;
|
||||
import com.linkedin.datahub.graphql.resolvers.assertion.DeleteAssertionResolver;
|
||||
import com.linkedin.datahub.graphql.resolvers.assertion.EntityAssertionsResolver;
|
||||
import com.linkedin.datahub.graphql.resolvers.auth.*;
|
||||
import com.linkedin.datahub.graphql.resolvers.auth.CreateAccessTokenResolver;
|
||||
import com.linkedin.datahub.graphql.resolvers.auth.DebugAccessResolver;
|
||||
import com.linkedin.datahub.graphql.resolvers.auth.GetAccessTokenMetadataResolver;
|
||||
import com.linkedin.datahub.graphql.resolvers.auth.GetAccessTokenResolver;
|
||||
import com.linkedin.datahub.graphql.resolvers.auth.ListAccessTokensResolver;
|
||||
import com.linkedin.datahub.graphql.resolvers.auth.RevokeAccessTokenResolver;
|
||||
import com.linkedin.datahub.graphql.resolvers.browse.BrowsePathsResolver;
|
||||
import com.linkedin.datahub.graphql.resolvers.browse.BrowseResolver;
|
||||
import com.linkedin.datahub.graphql.resolvers.browse.EntityBrowsePathsResolver;
|
||||
@ -814,6 +819,7 @@ public class GmsGraphQLEngine {
|
||||
.addSchema(fileBasedSchema(PROPERTIES_SCHEMA_FILE))
|
||||
.addSchema(fileBasedSchema(FORMS_SCHEMA_FILE))
|
||||
.addSchema(fileBasedSchema(CONNECTIONS_SCHEMA_FILE))
|
||||
.addSchema(fileBasedSchema(ASSERTIONS_SCHEMA_FILE))
|
||||
.addSchema(fileBasedSchema(INCIDENTS_SCHEMA_FILE));
|
||||
|
||||
for (GmsGraphQLPlugin plugin : this.graphQLPlugins) {
|
||||
|
||||
@ -98,6 +98,16 @@ public class AssertionRunEventResolver
|
||||
&& AssertionResultType.SUCCESS.equals(
|
||||
runEvent.getResult().getType()))
|
||||
.count()));
|
||||
result.setErrored(
|
||||
Math.toIntExact(
|
||||
runEvents.stream()
|
||||
.filter(
|
||||
runEvent ->
|
||||
AssertionRunStatus.COMPLETE.equals(runEvent.getStatus())
|
||||
&& runEvent.getResult() != null
|
||||
&& AssertionResultType.ERROR.equals(
|
||||
runEvent.getResult().getType()))
|
||||
.count()));
|
||||
result.setRunEvents(runEvents);
|
||||
return result;
|
||||
} catch (RemoteInvocationException e) {
|
||||
|
||||
@ -2,6 +2,8 @@ package com.linkedin.datahub.graphql.types.assertion;
|
||||
|
||||
import static com.linkedin.metadata.Constants.GLOBAL_TAGS_ASPECT_NAME;
|
||||
|
||||
import com.linkedin.assertion.AssertionAction;
|
||||
import com.linkedin.assertion.AssertionActions;
|
||||
import com.linkedin.assertion.AssertionInfo;
|
||||
import com.linkedin.common.DataPlatformInstance;
|
||||
import com.linkedin.common.GlobalTags;
|
||||
@ -10,24 +12,40 @@ import com.linkedin.common.urn.Urn;
|
||||
import com.linkedin.data.DataMap;
|
||||
import com.linkedin.datahub.graphql.QueryContext;
|
||||
import com.linkedin.datahub.graphql.generated.Assertion;
|
||||
import com.linkedin.datahub.graphql.generated.AssertionActionType;
|
||||
import com.linkedin.datahub.graphql.generated.AssertionSource;
|
||||
import com.linkedin.datahub.graphql.generated.AssertionSourceType;
|
||||
import com.linkedin.datahub.graphql.generated.AssertionStdAggregation;
|
||||
import com.linkedin.datahub.graphql.generated.AssertionStdOperator;
|
||||
import com.linkedin.datahub.graphql.generated.AssertionStdParameter;
|
||||
import com.linkedin.datahub.graphql.generated.AssertionStdParameterType;
|
||||
import com.linkedin.datahub.graphql.generated.AssertionStdParameters;
|
||||
import com.linkedin.datahub.graphql.generated.AssertionType;
|
||||
import com.linkedin.datahub.graphql.generated.AuditStamp;
|
||||
import com.linkedin.datahub.graphql.generated.DataPlatform;
|
||||
import com.linkedin.datahub.graphql.generated.DatasetAssertionInfo;
|
||||
import com.linkedin.datahub.graphql.generated.DatasetAssertionScope;
|
||||
import com.linkedin.datahub.graphql.generated.DateInterval;
|
||||
import com.linkedin.datahub.graphql.generated.EntityType;
|
||||
import com.linkedin.datahub.graphql.generated.FieldAssertionInfo;
|
||||
import com.linkedin.datahub.graphql.generated.FixedIntervalSchedule;
|
||||
import com.linkedin.datahub.graphql.generated.FreshnessAssertionInfo;
|
||||
import com.linkedin.datahub.graphql.generated.SchemaAssertionCompatibility;
|
||||
import com.linkedin.datahub.graphql.generated.SchemaAssertionField;
|
||||
import com.linkedin.datahub.graphql.generated.SchemaAssertionInfo;
|
||||
import com.linkedin.datahub.graphql.generated.SchemaFieldRef;
|
||||
import com.linkedin.datahub.graphql.generated.SqlAssertionInfo;
|
||||
import com.linkedin.datahub.graphql.generated.VolumeAssertionInfo;
|
||||
import com.linkedin.datahub.graphql.types.common.mappers.DataPlatformInstanceAspectMapper;
|
||||
import com.linkedin.datahub.graphql.types.common.mappers.StringMapMapper;
|
||||
import com.linkedin.datahub.graphql.types.dataset.mappers.SchemaFieldMapper;
|
||||
import com.linkedin.datahub.graphql.types.dataset.mappers.SchemaMetadataMapper;
|
||||
import com.linkedin.datahub.graphql.types.tag.mappers.GlobalTagsMapper;
|
||||
import com.linkedin.entity.EntityResponse;
|
||||
import com.linkedin.entity.EnvelopedAspect;
|
||||
import com.linkedin.entity.EnvelopedAspectMap;
|
||||
import com.linkedin.metadata.Constants;
|
||||
import com.linkedin.schema.SchemaField;
|
||||
import java.util.Collections;
|
||||
import java.util.stream.Collectors;
|
||||
import javax.annotation.Nullable;
|
||||
@ -48,6 +66,14 @@ public class AssertionMapper {
|
||||
result.setInfo(
|
||||
mapAssertionInfo(context, new AssertionInfo(envelopedAssertionInfo.getValue().data())));
|
||||
}
|
||||
|
||||
final EnvelopedAspect envelopedAssertionActions =
|
||||
aspects.get(Constants.ASSERTION_ACTIONS_ASPECT_NAME);
|
||||
if (envelopedAssertionActions != null) {
|
||||
result.setActions(
|
||||
mapAssertionActions(new AssertionActions(envelopedAssertionActions.getValue().data())));
|
||||
}
|
||||
|
||||
final EnvelopedAspect envelopedPlatformInstance =
|
||||
aspects.get(Constants.DATA_PLATFORM_INSTANCE_ASPECT_NAME);
|
||||
if (envelopedPlatformInstance != null) {
|
||||
@ -83,20 +109,93 @@ public class AssertionMapper {
|
||||
return result;
|
||||
}
|
||||
|
||||
private static com.linkedin.datahub.graphql.generated.AssertionInfo mapAssertionInfo(
|
||||
public static com.linkedin.datahub.graphql.generated.AssertionInfo mapAssertionInfo(
|
||||
@Nullable QueryContext context, final AssertionInfo gmsAssertionInfo) {
|
||||
final com.linkedin.datahub.graphql.generated.AssertionInfo assertionInfo =
|
||||
new com.linkedin.datahub.graphql.generated.AssertionInfo();
|
||||
assertionInfo.setType(AssertionType.valueOf(gmsAssertionInfo.getType().name()));
|
||||
|
||||
if (gmsAssertionInfo.hasLastUpdated()) {
|
||||
assertionInfo.setLastUpdated(
|
||||
new AuditStamp(
|
||||
gmsAssertionInfo.getLastUpdated().getTime(),
|
||||
gmsAssertionInfo.getLastUpdated().getActor().toString()));
|
||||
}
|
||||
if (gmsAssertionInfo.hasDatasetAssertion()) {
|
||||
DatasetAssertionInfo datasetAssertion =
|
||||
mapDatasetAssertionInfo(context, gmsAssertionInfo.getDatasetAssertion());
|
||||
assertionInfo.setDatasetAssertion(datasetAssertion);
|
||||
}
|
||||
assertionInfo.setDescription(gmsAssertionInfo.getDescription());
|
||||
// Description
|
||||
if (gmsAssertionInfo.hasDescription()) {
|
||||
assertionInfo.setDescription(gmsAssertionInfo.getDescription());
|
||||
}
|
||||
// FRESHNESS Assertions
|
||||
if (gmsAssertionInfo.hasFreshnessAssertion()) {
|
||||
FreshnessAssertionInfo freshnessAssertionInfo =
|
||||
FreshnessAssertionMapper.mapFreshnessAssertionInfo(
|
||||
context, gmsAssertionInfo.getFreshnessAssertion());
|
||||
assertionInfo.setFreshnessAssertion(freshnessAssertionInfo);
|
||||
}
|
||||
// VOLUME Assertions
|
||||
if (gmsAssertionInfo.hasVolumeAssertion()) {
|
||||
VolumeAssertionInfo volumeAssertionInfo =
|
||||
VolumeAssertionMapper.mapVolumeAssertionInfo(
|
||||
context, gmsAssertionInfo.getVolumeAssertion());
|
||||
assertionInfo.setVolumeAssertion(volumeAssertionInfo);
|
||||
}
|
||||
// SQL Assertions
|
||||
if (gmsAssertionInfo.hasSqlAssertion()) {
|
||||
SqlAssertionInfo sqlAssertionInfo =
|
||||
SqlAssertionMapper.mapSqlAssertionInfo(gmsAssertionInfo.getSqlAssertion());
|
||||
assertionInfo.setSqlAssertion(sqlAssertionInfo);
|
||||
}
|
||||
// FIELD Assertions
|
||||
if (gmsAssertionInfo.hasFieldAssertion()) {
|
||||
FieldAssertionInfo fieldAssertionInfo =
|
||||
FieldAssertionMapper.mapFieldAssertionInfo(context, gmsAssertionInfo.getFieldAssertion());
|
||||
assertionInfo.setFieldAssertion(fieldAssertionInfo);
|
||||
}
|
||||
// SCHEMA Assertions
|
||||
if (gmsAssertionInfo.hasSchemaAssertion()) {
|
||||
SchemaAssertionInfo schemaAssertionInfo =
|
||||
mapSchemaAssertionInfo(context, gmsAssertionInfo.getSchemaAssertion());
|
||||
assertionInfo.setSchemaAssertion(schemaAssertionInfo);
|
||||
}
|
||||
// Source Type
|
||||
if (gmsAssertionInfo.hasSource()) {
|
||||
assertionInfo.setSource(mapSource(gmsAssertionInfo.getSource()));
|
||||
}
|
||||
return assertionInfo;
|
||||
}
|
||||
|
||||
private static com.linkedin.datahub.graphql.generated.AssertionActions mapAssertionActions(
|
||||
final AssertionActions gmsAssertionActions) {
|
||||
final com.linkedin.datahub.graphql.generated.AssertionActions result =
|
||||
new com.linkedin.datahub.graphql.generated.AssertionActions();
|
||||
if (gmsAssertionActions.hasOnFailure()) {
|
||||
result.setOnFailure(
|
||||
gmsAssertionActions.getOnFailure().stream()
|
||||
.map(AssertionMapper::mapAssertionAction)
|
||||
.collect(Collectors.toList()));
|
||||
}
|
||||
if (gmsAssertionActions.hasOnSuccess()) {
|
||||
result.setOnSuccess(
|
||||
gmsAssertionActions.getOnSuccess().stream()
|
||||
.map(AssertionMapper::mapAssertionAction)
|
||||
.collect(Collectors.toList()));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private static com.linkedin.datahub.graphql.generated.AssertionAction mapAssertionAction(
|
||||
final AssertionAction gmsAssertionAction) {
|
||||
final com.linkedin.datahub.graphql.generated.AssertionAction result =
|
||||
new com.linkedin.datahub.graphql.generated.AssertionAction();
|
||||
result.setType(AssertionActionType.valueOf(gmsAssertionAction.getType().toString()));
|
||||
return result;
|
||||
}
|
||||
|
||||
private static DatasetAssertionInfo mapDatasetAssertionInfo(
|
||||
@Nullable QueryContext context,
|
||||
final com.linkedin.assertion.DatasetAssertionInfo gmsDatasetAssertion) {
|
||||
@ -152,7 +251,7 @@ public class AssertionMapper {
|
||||
return new SchemaFieldRef(schemaFieldUrn.toString(), schemaFieldUrn.getEntityKey().get(1));
|
||||
}
|
||||
|
||||
private static AssertionStdParameters mapParameters(
|
||||
protected static AssertionStdParameters mapParameters(
|
||||
final com.linkedin.assertion.AssertionStdParameters params) {
|
||||
final AssertionStdParameters result = new AssertionStdParameters();
|
||||
if (params.hasValue()) {
|
||||
@ -175,5 +274,61 @@ public class AssertionMapper {
|
||||
return result;
|
||||
}
|
||||
|
||||
private AssertionMapper() {}
|
||||
protected static FixedIntervalSchedule mapFixedIntervalSchedule(
|
||||
com.linkedin.assertion.FixedIntervalSchedule gmsFixedIntervalSchedule) {
|
||||
FixedIntervalSchedule fixedIntervalSchedule = new FixedIntervalSchedule();
|
||||
fixedIntervalSchedule.setUnit(DateInterval.valueOf(gmsFixedIntervalSchedule.getUnit().name()));
|
||||
fixedIntervalSchedule.setMultiple(gmsFixedIntervalSchedule.getMultiple());
|
||||
return fixedIntervalSchedule;
|
||||
}
|
||||
|
||||
private static AssertionSource mapSource(final com.linkedin.assertion.AssertionSource gmsSource) {
|
||||
AssertionSource result = new AssertionSource();
|
||||
result.setType(AssertionSourceType.valueOf(gmsSource.getType().toString()));
|
||||
if (gmsSource.hasCreated()) {
|
||||
result.setCreated(
|
||||
new AuditStamp(
|
||||
gmsSource.getCreated().getTime(), gmsSource.getCreated().getActor().toString()));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
protected static com.linkedin.datahub.graphql.generated.SchemaFieldSpec mapSchemaFieldSpec(
|
||||
final com.linkedin.schema.SchemaFieldSpec gmsField) {
|
||||
final com.linkedin.datahub.graphql.generated.SchemaFieldSpec result =
|
||||
new com.linkedin.datahub.graphql.generated.SchemaFieldSpec();
|
||||
result.setPath(gmsField.getPath());
|
||||
result.setType(gmsField.getType());
|
||||
result.setNativeType(gmsField.getNativeType());
|
||||
return result;
|
||||
}
|
||||
|
||||
private static SchemaAssertionInfo mapSchemaAssertionInfo(
|
||||
@Nullable final QueryContext context,
|
||||
final com.linkedin.assertion.SchemaAssertionInfo gmsSchemaAssertionInfo) {
|
||||
SchemaAssertionInfo result = new SchemaAssertionInfo();
|
||||
result.setCompatibility(
|
||||
SchemaAssertionCompatibility.valueOf(gmsSchemaAssertionInfo.getCompatibility().name()));
|
||||
result.setEntityUrn(gmsSchemaAssertionInfo.getEntity().toString());
|
||||
result.setSchema(
|
||||
SchemaMetadataMapper.INSTANCE.apply(
|
||||
context, gmsSchemaAssertionInfo.getSchema(), gmsSchemaAssertionInfo.getEntity(), 0L));
|
||||
result.setFields(
|
||||
gmsSchemaAssertionInfo.getSchema().getFields().stream()
|
||||
.map(AssertionMapper::mapSchemaField)
|
||||
.collect(Collectors.toList()));
|
||||
return result;
|
||||
}
|
||||
|
||||
private static SchemaAssertionField mapSchemaField(final SchemaField gmsField) {
|
||||
SchemaAssertionField result = new SchemaAssertionField();
|
||||
result.setPath(gmsField.getFieldPath());
|
||||
result.setType(new SchemaFieldMapper().mapSchemaFieldDataType(gmsField.getType()));
|
||||
if (gmsField.hasNativeDataType()) {
|
||||
result.setNativeType(gmsField.getNativeDataType());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
protected AssertionMapper() {}
|
||||
}
|
||||
|
||||
@ -28,8 +28,8 @@ public class AssertionType
|
||||
Constants.ASSERTION_KEY_ASPECT_NAME,
|
||||
Constants.ASSERTION_INFO_ASPECT_NAME,
|
||||
Constants.DATA_PLATFORM_INSTANCE_ASPECT_NAME,
|
||||
Constants.GLOBAL_TAGS_ASPECT_NAME);
|
||||
|
||||
Constants.GLOBAL_TAGS_ASPECT_NAME,
|
||||
Constants.ASSERTION_ACTIONS_ASPECT_NAME);
|
||||
private final EntityClient _entityClient;
|
||||
|
||||
public AssertionType(final EntityClient entityClient) {
|
||||
|
||||
@ -0,0 +1,92 @@
|
||||
package com.linkedin.datahub.graphql.types.assertion;
|
||||
|
||||
import com.linkedin.assertion.FieldAssertionInfo;
|
||||
import com.linkedin.datahub.graphql.QueryContext;
|
||||
import com.linkedin.datahub.graphql.generated.AssertionStdOperator;
|
||||
import com.linkedin.datahub.graphql.generated.FieldAssertionType;
|
||||
import com.linkedin.datahub.graphql.generated.FieldMetricType;
|
||||
import com.linkedin.datahub.graphql.generated.FieldTransformType;
|
||||
import com.linkedin.datahub.graphql.generated.FieldValuesFailThresholdType;
|
||||
import com.linkedin.datahub.graphql.types.dataset.mappers.DatasetFilterMapper;
|
||||
import javax.annotation.Nullable;
|
||||
|
||||
public class FieldAssertionMapper extends AssertionMapper {
|
||||
|
||||
public static com.linkedin.datahub.graphql.generated.FieldAssertionInfo mapFieldAssertionInfo(
|
||||
@Nullable final QueryContext context, final FieldAssertionInfo gmsFieldAssertionInfo) {
|
||||
final com.linkedin.datahub.graphql.generated.FieldAssertionInfo result =
|
||||
new com.linkedin.datahub.graphql.generated.FieldAssertionInfo();
|
||||
result.setEntityUrn(gmsFieldAssertionInfo.getEntity().toString());
|
||||
result.setType(FieldAssertionType.valueOf(gmsFieldAssertionInfo.getType().name()));
|
||||
if (gmsFieldAssertionInfo.hasFilter()) {
|
||||
result.setFilter(DatasetFilterMapper.map(context, gmsFieldAssertionInfo.getFilter()));
|
||||
}
|
||||
if (gmsFieldAssertionInfo.hasFieldValuesAssertion()) {
|
||||
result.setFieldValuesAssertion(
|
||||
mapFieldValuesAssertion(gmsFieldAssertionInfo.getFieldValuesAssertion()));
|
||||
}
|
||||
if (gmsFieldAssertionInfo.hasFieldMetricAssertion()) {
|
||||
result.setFieldMetricAssertion(
|
||||
mapFieldMetricAssertion(gmsFieldAssertionInfo.getFieldMetricAssertion()));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private static com.linkedin.datahub.graphql.generated.FieldValuesAssertion
|
||||
mapFieldValuesAssertion(
|
||||
final com.linkedin.assertion.FieldValuesAssertion gmsFieldValuesAssertion) {
|
||||
final com.linkedin.datahub.graphql.generated.FieldValuesAssertion result =
|
||||
new com.linkedin.datahub.graphql.generated.FieldValuesAssertion();
|
||||
result.setField(mapSchemaFieldSpec(gmsFieldValuesAssertion.getField()));
|
||||
result.setOperator(AssertionStdOperator.valueOf(gmsFieldValuesAssertion.getOperator().name()));
|
||||
result.setFailThreshold(
|
||||
mapFieldValuesFailThreshold(gmsFieldValuesAssertion.getFailThreshold()));
|
||||
result.setExcludeNulls(gmsFieldValuesAssertion.isExcludeNulls());
|
||||
|
||||
if (gmsFieldValuesAssertion.hasTransform()) {
|
||||
result.setTransform(mapFieldTransform(gmsFieldValuesAssertion.getTransform()));
|
||||
}
|
||||
|
||||
if (gmsFieldValuesAssertion.hasParameters()) {
|
||||
result.setParameters(mapParameters(gmsFieldValuesAssertion.getParameters()));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private static com.linkedin.datahub.graphql.generated.FieldMetricAssertion
|
||||
mapFieldMetricAssertion(
|
||||
final com.linkedin.assertion.FieldMetricAssertion gmsFieldMetricAssertion) {
|
||||
final com.linkedin.datahub.graphql.generated.FieldMetricAssertion result =
|
||||
new com.linkedin.datahub.graphql.generated.FieldMetricAssertion();
|
||||
result.setField(mapSchemaFieldSpec(gmsFieldMetricAssertion.getField()));
|
||||
result.setMetric(FieldMetricType.valueOf(gmsFieldMetricAssertion.getMetric().name()));
|
||||
result.setOperator(AssertionStdOperator.valueOf(gmsFieldMetricAssertion.getOperator().name()));
|
||||
|
||||
if (gmsFieldMetricAssertion.hasParameters()) {
|
||||
result.setParameters(mapParameters(gmsFieldMetricAssertion.getParameters()));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static com.linkedin.datahub.graphql.generated.FieldTransform mapFieldTransform(
|
||||
final com.linkedin.assertion.FieldTransform gmsFieldTransform) {
|
||||
final com.linkedin.datahub.graphql.generated.FieldTransform result =
|
||||
new com.linkedin.datahub.graphql.generated.FieldTransform();
|
||||
result.setType(FieldTransformType.valueOf(gmsFieldTransform.getType().name()));
|
||||
return result;
|
||||
}
|
||||
|
||||
private static com.linkedin.datahub.graphql.generated.FieldValuesFailThreshold
|
||||
mapFieldValuesFailThreshold(
|
||||
final com.linkedin.assertion.FieldValuesFailThreshold gmsFieldValuesFailThreshold) {
|
||||
final com.linkedin.datahub.graphql.generated.FieldValuesFailThreshold result =
|
||||
new com.linkedin.datahub.graphql.generated.FieldValuesFailThreshold();
|
||||
result.setType(
|
||||
FieldValuesFailThresholdType.valueOf(gmsFieldValuesFailThreshold.getType().name()));
|
||||
result.setValue(gmsFieldValuesFailThreshold.getValue());
|
||||
return result;
|
||||
}
|
||||
|
||||
private FieldAssertionMapper() {}
|
||||
}
|
||||
@ -0,0 +1,59 @@
|
||||
package com.linkedin.datahub.graphql.types.assertion;
|
||||
|
||||
import com.linkedin.data.template.GetMode;
|
||||
import com.linkedin.datahub.graphql.QueryContext;
|
||||
import com.linkedin.datahub.graphql.generated.FreshnessAssertionInfo;
|
||||
import com.linkedin.datahub.graphql.generated.FreshnessAssertionSchedule;
|
||||
import com.linkedin.datahub.graphql.generated.FreshnessAssertionScheduleType;
|
||||
import com.linkedin.datahub.graphql.generated.FreshnessAssertionType;
|
||||
import com.linkedin.datahub.graphql.generated.FreshnessCronSchedule;
|
||||
import com.linkedin.datahub.graphql.types.dataset.mappers.DatasetFilterMapper;
|
||||
import javax.annotation.Nullable;
|
||||
|
||||
public class FreshnessAssertionMapper extends AssertionMapper {
|
||||
|
||||
public static FreshnessAssertionInfo mapFreshnessAssertionInfo(
|
||||
@Nullable final QueryContext context,
|
||||
final com.linkedin.assertion.FreshnessAssertionInfo gmsFreshnessAssertionInfo) {
|
||||
FreshnessAssertionInfo freshnessAssertionInfo = new FreshnessAssertionInfo();
|
||||
freshnessAssertionInfo.setEntityUrn(gmsFreshnessAssertionInfo.getEntity().toString());
|
||||
freshnessAssertionInfo.setType(
|
||||
FreshnessAssertionType.valueOf(gmsFreshnessAssertionInfo.getType().name()));
|
||||
if (gmsFreshnessAssertionInfo.hasSchedule()) {
|
||||
freshnessAssertionInfo.setSchedule(
|
||||
mapFreshnessAssertionSchedule(gmsFreshnessAssertionInfo.getSchedule()));
|
||||
}
|
||||
if (gmsFreshnessAssertionInfo.hasFilter()) {
|
||||
freshnessAssertionInfo.setFilter(
|
||||
DatasetFilterMapper.map(context, gmsFreshnessAssertionInfo.getFilter()));
|
||||
}
|
||||
return freshnessAssertionInfo;
|
||||
}
|
||||
|
||||
private static FreshnessCronSchedule mapFreshnessCronSchedule(
|
||||
final com.linkedin.assertion.FreshnessCronSchedule gmsCronSchedule) {
|
||||
FreshnessCronSchedule cronSchedule = new FreshnessCronSchedule();
|
||||
cronSchedule.setCron(gmsCronSchedule.getCron());
|
||||
cronSchedule.setTimezone(gmsCronSchedule.getTimezone());
|
||||
cronSchedule.setWindowStartOffsetMs(gmsCronSchedule.getWindowStartOffsetMs(GetMode.NULL));
|
||||
return cronSchedule;
|
||||
}
|
||||
|
||||
private static FreshnessAssertionSchedule mapFreshnessAssertionSchedule(
|
||||
final com.linkedin.assertion.FreshnessAssertionSchedule gmsFreshnessAssertionSchedule) {
|
||||
FreshnessAssertionSchedule freshnessAssertionSchedule = new FreshnessAssertionSchedule();
|
||||
freshnessAssertionSchedule.setType(
|
||||
FreshnessAssertionScheduleType.valueOf(gmsFreshnessAssertionSchedule.getType().name()));
|
||||
if (gmsFreshnessAssertionSchedule.hasCron()) {
|
||||
freshnessAssertionSchedule.setCron(
|
||||
mapFreshnessCronSchedule(gmsFreshnessAssertionSchedule.getCron()));
|
||||
}
|
||||
if (gmsFreshnessAssertionSchedule.hasFixedInterval()) {
|
||||
freshnessAssertionSchedule.setFixedInterval(
|
||||
mapFixedIntervalSchedule(gmsFreshnessAssertionSchedule.getFixedInterval()));
|
||||
}
|
||||
return freshnessAssertionSchedule;
|
||||
}
|
||||
|
||||
private FreshnessAssertionMapper() {}
|
||||
}
|
||||
@ -0,0 +1,27 @@
|
||||
package com.linkedin.datahub.graphql.types.assertion;
|
||||
|
||||
import com.linkedin.assertion.SqlAssertionInfo;
|
||||
import com.linkedin.datahub.graphql.generated.AssertionStdOperator;
|
||||
import com.linkedin.datahub.graphql.generated.AssertionValueChangeType;
|
||||
import com.linkedin.datahub.graphql.generated.SqlAssertionType;
|
||||
|
||||
public class SqlAssertionMapper extends AssertionMapper {
|
||||
|
||||
public static com.linkedin.datahub.graphql.generated.SqlAssertionInfo mapSqlAssertionInfo(
|
||||
final SqlAssertionInfo gmsSqlAssertionInfo) {
|
||||
final com.linkedin.datahub.graphql.generated.SqlAssertionInfo result =
|
||||
new com.linkedin.datahub.graphql.generated.SqlAssertionInfo();
|
||||
result.setEntityUrn(gmsSqlAssertionInfo.getEntity().toString());
|
||||
result.setType(SqlAssertionType.valueOf(gmsSqlAssertionInfo.getType().name()));
|
||||
result.setStatement(gmsSqlAssertionInfo.getStatement());
|
||||
result.setOperator(AssertionStdOperator.valueOf(gmsSqlAssertionInfo.getOperator().name()));
|
||||
result.setParameters(mapParameters(gmsSqlAssertionInfo.getParameters()));
|
||||
if (gmsSqlAssertionInfo.hasChangeType()) {
|
||||
result.setChangeType(
|
||||
AssertionValueChangeType.valueOf(gmsSqlAssertionInfo.getChangeType().name()));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private SqlAssertionMapper() {}
|
||||
}
|
||||
@ -0,0 +1,115 @@
|
||||
package com.linkedin.datahub.graphql.types.assertion;
|
||||
|
||||
import com.linkedin.assertion.VolumeAssertionInfo;
|
||||
import com.linkedin.datahub.graphql.QueryContext;
|
||||
import com.linkedin.datahub.graphql.generated.AssertionStdOperator;
|
||||
import com.linkedin.datahub.graphql.generated.AssertionValueChangeType;
|
||||
import com.linkedin.datahub.graphql.generated.IncrementingSegmentFieldTransformerType;
|
||||
import com.linkedin.datahub.graphql.generated.VolumeAssertionType;
|
||||
import com.linkedin.datahub.graphql.types.dataset.mappers.DatasetFilterMapper;
|
||||
import javax.annotation.Nullable;
|
||||
|
||||
public class VolumeAssertionMapper extends AssertionMapper {
|
||||
|
||||
public static com.linkedin.datahub.graphql.generated.VolumeAssertionInfo mapVolumeAssertionInfo(
|
||||
@Nullable final QueryContext context, final VolumeAssertionInfo gmsVolumeAssertionInfo) {
|
||||
final com.linkedin.datahub.graphql.generated.VolumeAssertionInfo result =
|
||||
new com.linkedin.datahub.graphql.generated.VolumeAssertionInfo();
|
||||
result.setEntityUrn(gmsVolumeAssertionInfo.getEntity().toString());
|
||||
result.setType(VolumeAssertionType.valueOf(gmsVolumeAssertionInfo.getType().name()));
|
||||
if (gmsVolumeAssertionInfo.hasFilter()) {
|
||||
result.setFilter(DatasetFilterMapper.map(context, gmsVolumeAssertionInfo.getFilter()));
|
||||
}
|
||||
if (gmsVolumeAssertionInfo.hasRowCountTotal()) {
|
||||
result.setRowCountTotal(mapRowCountTotal(gmsVolumeAssertionInfo.getRowCountTotal()));
|
||||
}
|
||||
if (gmsVolumeAssertionInfo.hasRowCountChange()) {
|
||||
result.setRowCountChange(mapRowCountChange(gmsVolumeAssertionInfo.getRowCountChange()));
|
||||
}
|
||||
if (gmsVolumeAssertionInfo.hasIncrementingSegmentRowCountTotal()) {
|
||||
result.setIncrementingSegmentRowCountTotal(
|
||||
mapIncrementingSegmentRowCountTotal(
|
||||
gmsVolumeAssertionInfo.getIncrementingSegmentRowCountTotal()));
|
||||
}
|
||||
if (gmsVolumeAssertionInfo.hasIncrementingSegmentRowCountChange()) {
|
||||
result.setIncrementingSegmentRowCountChange(
|
||||
mapIncrementingSegmentRowCountChange(
|
||||
gmsVolumeAssertionInfo.getIncrementingSegmentRowCountChange()));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private static com.linkedin.datahub.graphql.generated.RowCountTotal mapRowCountTotal(
|
||||
final com.linkedin.assertion.RowCountTotal gmsRowCountTotal) {
|
||||
final com.linkedin.datahub.graphql.generated.RowCountTotal result =
|
||||
new com.linkedin.datahub.graphql.generated.RowCountTotal();
|
||||
result.setOperator(AssertionStdOperator.valueOf(gmsRowCountTotal.getOperator().name()));
|
||||
result.setParameters(mapParameters(gmsRowCountTotal.getParameters()));
|
||||
return result;
|
||||
}
|
||||
|
||||
private static com.linkedin.datahub.graphql.generated.RowCountChange mapRowCountChange(
|
||||
final com.linkedin.assertion.RowCountChange gmsRowCountChange) {
|
||||
final com.linkedin.datahub.graphql.generated.RowCountChange result =
|
||||
new com.linkedin.datahub.graphql.generated.RowCountChange();
|
||||
result.setOperator(AssertionStdOperator.valueOf(gmsRowCountChange.getOperator().name()));
|
||||
result.setParameters(mapParameters(gmsRowCountChange.getParameters()));
|
||||
result.setType(AssertionValueChangeType.valueOf(gmsRowCountChange.getType().name()));
|
||||
return result;
|
||||
}
|
||||
|
||||
private static com.linkedin.datahub.graphql.generated.IncrementingSegmentRowCountTotal
|
||||
mapIncrementingSegmentRowCountTotal(
|
||||
final com.linkedin.assertion.IncrementingSegmentRowCountTotal
|
||||
gmsIncrementingSegmentRowCountTotal) {
|
||||
final com.linkedin.datahub.graphql.generated.IncrementingSegmentRowCountTotal result =
|
||||
new com.linkedin.datahub.graphql.generated.IncrementingSegmentRowCountTotal();
|
||||
result.setOperator(
|
||||
AssertionStdOperator.valueOf(gmsIncrementingSegmentRowCountTotal.getOperator().name()));
|
||||
result.setParameters(mapParameters(gmsIncrementingSegmentRowCountTotal.getParameters()));
|
||||
result.setSegment(mapIncrementingSegmentSpec(gmsIncrementingSegmentRowCountTotal.getSegment()));
|
||||
return result;
|
||||
}
|
||||
|
||||
private static com.linkedin.datahub.graphql.generated.IncrementingSegmentRowCountChange
|
||||
mapIncrementingSegmentRowCountChange(
|
||||
final com.linkedin.assertion.IncrementingSegmentRowCountChange
|
||||
gmsIncrementingSegmentRowCountChange) {
|
||||
final com.linkedin.datahub.graphql.generated.IncrementingSegmentRowCountChange result =
|
||||
new com.linkedin.datahub.graphql.generated.IncrementingSegmentRowCountChange();
|
||||
result.setOperator(
|
||||
AssertionStdOperator.valueOf(gmsIncrementingSegmentRowCountChange.getOperator().name()));
|
||||
result.setParameters(mapParameters(gmsIncrementingSegmentRowCountChange.getParameters()));
|
||||
result.setSegment(
|
||||
mapIncrementingSegmentSpec(gmsIncrementingSegmentRowCountChange.getSegment()));
|
||||
result.setType(
|
||||
AssertionValueChangeType.valueOf(gmsIncrementingSegmentRowCountChange.getType().name()));
|
||||
return result;
|
||||
}
|
||||
|
||||
private static com.linkedin.datahub.graphql.generated.IncrementingSegmentSpec
|
||||
mapIncrementingSegmentSpec(final com.linkedin.assertion.IncrementingSegmentSpec gmsSegment) {
|
||||
final com.linkedin.datahub.graphql.generated.IncrementingSegmentSpec result =
|
||||
new com.linkedin.datahub.graphql.generated.IncrementingSegmentSpec();
|
||||
result.setField(mapSchemaFieldSpec(gmsSegment.getField()));
|
||||
if (gmsSegment.hasTransformer()) {
|
||||
result.setTransformer(mapIncrementingSegmentFieldTransformer(gmsSegment.getTransformer()));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private static com.linkedin.datahub.graphql.generated.IncrementingSegmentFieldTransformer
|
||||
mapIncrementingSegmentFieldTransformer(
|
||||
final com.linkedin.assertion.IncrementingSegmentFieldTransformer gmsTransformer) {
|
||||
final com.linkedin.datahub.graphql.generated.IncrementingSegmentFieldTransformer result =
|
||||
new com.linkedin.datahub.graphql.generated.IncrementingSegmentFieldTransformer();
|
||||
result.setType(
|
||||
IncrementingSegmentFieldTransformerType.valueOf(gmsTransformer.getType().name()));
|
||||
if (gmsTransformer.hasNativeType()) {
|
||||
result.setNativeType(gmsTransformer.getNativeType());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private VolumeAssertionMapper() {}
|
||||
}
|
||||
@ -51,7 +51,7 @@ public class SchemaFieldMapper {
|
||||
return result;
|
||||
}
|
||||
|
||||
private SchemaFieldDataType mapSchemaFieldDataType(
|
||||
public SchemaFieldDataType mapSchemaFieldDataType(
|
||||
@Nonnull final com.linkedin.schema.SchemaFieldDataType dataTypeUnion) {
|
||||
final com.linkedin.schema.SchemaFieldDataType.Type type = dataTypeUnion.getType();
|
||||
if (type.isBytesType()) {
|
||||
|
||||
896
datahub-graphql-core/src/main/resources/assertions.graphql
Normal file
896
datahub-graphql-core/src/main/resources/assertions.graphql
Normal file
@ -0,0 +1,896 @@
|
||||
"""
|
||||
Defines a schema field, each with a specified path and type.
|
||||
"""
|
||||
type SchemaAssertionField {
|
||||
"""
|
||||
The standard V1 path of the field within the schema.
|
||||
"""
|
||||
path: String!
|
||||
|
||||
"""
|
||||
The std type of the field
|
||||
"""
|
||||
type: SchemaFieldDataType!
|
||||
|
||||
"""
|
||||
Optional: The specific native or standard type of the field.
|
||||
"""
|
||||
nativeType: String
|
||||
}
|
||||
|
||||
"""
|
||||
Defines the required compatibility level for the schema assertion to pass.
|
||||
"""
|
||||
enum SchemaAssertionCompatibility {
|
||||
"""
|
||||
The schema must be exactly the same as the expected schema.
|
||||
"""
|
||||
EXACT_MATCH
|
||||
|
||||
"""
|
||||
The schema must be a superset of the expected schema.
|
||||
"""
|
||||
SUPERSET
|
||||
|
||||
"""
|
||||
The schema must be a subset of the expected schema.
|
||||
"""
|
||||
SUBSET
|
||||
}
|
||||
|
||||
"""
|
||||
The source of an assertion
|
||||
"""
|
||||
enum AssertionSourceType {
|
||||
"""
|
||||
The assertion was defined natively on DataHub by a user.
|
||||
"""
|
||||
NATIVE
|
||||
"""
|
||||
The assertion was defined and managed externally of DataHub.
|
||||
"""
|
||||
EXTERNAL
|
||||
"""
|
||||
The assertion was inferred, e.g. from offline AI / ML models.
|
||||
"""
|
||||
INFERRED
|
||||
}
|
||||
|
||||
"""
|
||||
The type of an Freshness assertion
|
||||
"""
|
||||
enum FreshnessAssertionType {
|
||||
"""
|
||||
An assertion defined against a Dataset Change Operation - insert, update, delete, etc
|
||||
"""
|
||||
DATASET_CHANGE
|
||||
"""
|
||||
An assertion defined against a Data Job run
|
||||
"""
|
||||
DATA_JOB_RUN
|
||||
}
|
||||
|
||||
extend type AssertionInfo {
|
||||
"""
|
||||
Information about an Freshness Assertion
|
||||
"""
|
||||
freshnessAssertion: FreshnessAssertionInfo
|
||||
|
||||
"""
|
||||
Information about an Volume Assertion
|
||||
"""
|
||||
volumeAssertion: VolumeAssertionInfo
|
||||
|
||||
"""
|
||||
Information about a SQL Assertion
|
||||
"""
|
||||
sqlAssertion: SqlAssertionInfo
|
||||
|
||||
"""
|
||||
Information about a Field Assertion
|
||||
"""
|
||||
fieldAssertion: FieldAssertionInfo
|
||||
|
||||
"""
|
||||
Schema assertion, e.g. defining the expected structure for an asset.
|
||||
"""
|
||||
schemaAssertion: SchemaAssertionInfo
|
||||
|
||||
"""
|
||||
The source or origin of the Assertion definition.
|
||||
"""
|
||||
source: AssertionSource
|
||||
|
||||
"""
|
||||
The time that the status last changed and the actor who changed it
|
||||
"""
|
||||
lastUpdated: AuditStamp
|
||||
}
|
||||
|
||||
extend type Assertion {
|
||||
"""
|
||||
The actions associated with the Assertion
|
||||
"""
|
||||
actions: AssertionActions
|
||||
}
|
||||
|
||||
"""
|
||||
Some actions associated with an assertion
|
||||
"""
|
||||
type AssertionActions {
|
||||
"""
|
||||
Actions to be executed on successful assertion run.
|
||||
"""
|
||||
onSuccess: [AssertionAction!]!
|
||||
|
||||
"""
|
||||
Actions to be executed on failed assertion run.
|
||||
"""
|
||||
onFailure: [AssertionAction!]!
|
||||
}
|
||||
|
||||
"""
|
||||
An action associated with an assertion
|
||||
"""
|
||||
type AssertionAction {
|
||||
"""
|
||||
The type of the actions
|
||||
"""
|
||||
type: AssertionActionType!
|
||||
}
|
||||
|
||||
|
||||
"""
|
||||
The type of the Action
|
||||
"""
|
||||
enum AssertionActionType {
|
||||
"""
|
||||
Raise an incident.
|
||||
"""
|
||||
RAISE_INCIDENT
|
||||
"""
|
||||
Resolve open incidents related to the assertion.
|
||||
"""
|
||||
RESOLVE_INCIDENT
|
||||
}
|
||||
|
||||
|
||||
"""
|
||||
Information about an Freshness assertion.
|
||||
"""
|
||||
type FreshnessAssertionInfo {
|
||||
"""
|
||||
The urn of the entity that the Freshness assertion is related to
|
||||
"""
|
||||
entityUrn: String!
|
||||
|
||||
"""
|
||||
The type of the Freshness Assertion
|
||||
"""
|
||||
type: FreshnessAssertionType!
|
||||
|
||||
"""
|
||||
Produce FAIL Assertion Result if the asset is not updated on the cadence and within the time range described by the schedule.
|
||||
"""
|
||||
schedule: FreshnessAssertionSchedule!
|
||||
|
||||
"""
|
||||
A filter applied when querying an external Dataset or Table
|
||||
"""
|
||||
filter: DatasetFilter
|
||||
}
|
||||
|
||||
"""
|
||||
Attributes defining a single Freshness schedule.
|
||||
"""
|
||||
type FreshnessAssertionSchedule {
|
||||
"""
|
||||
The type of schedule
|
||||
"""
|
||||
type: FreshnessAssertionScheduleType!
|
||||
|
||||
"""
|
||||
A cron schedule. This is populated if the type is CRON.
|
||||
"""
|
||||
cron: FreshnessCronSchedule
|
||||
|
||||
"""
|
||||
A fixed interval schedule. This is populated if the type is FIXED_INTERVAL.
|
||||
"""
|
||||
fixedInterval: FixedIntervalSchedule
|
||||
}
|
||||
|
||||
"""
|
||||
The type of an Freshness assertion
|
||||
"""
|
||||
enum FreshnessAssertionScheduleType {
|
||||
"""
|
||||
An schedule based on a CRON schedule representing the expected event times.
|
||||
"""
|
||||
CRON
|
||||
|
||||
"""
|
||||
A scheduled based on a recurring fixed schedule which is used to compute the expected operation window. E.g. "every 24 hours".
|
||||
"""
|
||||
FIXED_INTERVAL
|
||||
}
|
||||
|
||||
"""
|
||||
A cron-formatted schedule
|
||||
"""
|
||||
type FreshnessCronSchedule {
|
||||
"""
|
||||
A cron-formatted execution interval, as a cron string, e.g. 1 * * * *
|
||||
"""
|
||||
cron: String!
|
||||
|
||||
"""
|
||||
Timezone in which the cron interval applies, e.g. America/Los Angeles
|
||||
"""
|
||||
timezone: String!
|
||||
|
||||
"""
|
||||
An optional offset in milliseconds to SUBTRACT from the timestamp generated by the cron schedule
|
||||
to generate the lower bounds of the "Freshness window", or the window of time in which an event must have occurred in order for the Freshness
|
||||
to be considering passing.
|
||||
If left empty, the start of the Freshness window will be the _end_ of the previously evaluated Freshness window.
|
||||
"""
|
||||
windowStartOffsetMs: Long
|
||||
}
|
||||
|
||||
"""
|
||||
A fixed interval schedule.
|
||||
"""
|
||||
type FixedIntervalSchedule {
|
||||
"""
|
||||
Interval unit such as minute/hour/day etc.
|
||||
"""
|
||||
unit: DateInterval!
|
||||
|
||||
"""
|
||||
How many units. Defaults to 1.
|
||||
"""
|
||||
multiple: Int!
|
||||
}
|
||||
|
||||
"""
|
||||
The source of an Assertion
|
||||
"""
|
||||
type AssertionSource {
|
||||
"""
|
||||
The source type
|
||||
"""
|
||||
type: AssertionSourceType!
|
||||
"""
|
||||
The time at which the assertion was initially created and the actor who created it
|
||||
"""
|
||||
created: AuditStamp
|
||||
}
|
||||
|
||||
"""
|
||||
Information about the field to use in an assertion
|
||||
"""
|
||||
type SchemaFieldSpec {
|
||||
"""
|
||||
The field path
|
||||
"""
|
||||
path: String!
|
||||
|
||||
"""
|
||||
The DataHub standard schema field type.
|
||||
"""
|
||||
type: String!
|
||||
|
||||
"""
|
||||
The native field type
|
||||
"""
|
||||
nativeType: String!
|
||||
}
|
||||
|
||||
"""
|
||||
An enum to represent a type of change in an assertion value, metric, or measurement.
|
||||
"""
|
||||
enum AssertionValueChangeType {
|
||||
"""
|
||||
A change that is defined in absolute terms.
|
||||
"""
|
||||
ABSOLUTE
|
||||
|
||||
"""
|
||||
A change that is defined in relative terms using percentage change
|
||||
from the original value.
|
||||
"""
|
||||
PERCENTAGE
|
||||
}
|
||||
|
||||
"""
|
||||
A type of volume (row count) assertion
|
||||
"""
|
||||
enum VolumeAssertionType {
|
||||
"""
|
||||
A volume assertion that is evaluated against the total row count of a dataset.
|
||||
"""
|
||||
ROW_COUNT_TOTAL
|
||||
|
||||
"""
|
||||
A volume assertion that is evaluated against an incremental row count of a dataset,
|
||||
or a row count change.
|
||||
"""
|
||||
ROW_COUNT_CHANGE
|
||||
|
||||
"""
|
||||
A volume assertion that checks the latest "segment" in a table based on an incrementing
|
||||
column to check whether it's row count falls into a particular range.
|
||||
This can be used to monitor the row count of an incrementing date-partition column segment.
|
||||
"""
|
||||
INCREMENTING_SEGMENT_ROW_COUNT_TOTAL
|
||||
|
||||
"""
|
||||
A volume assertion that compares the row counts in neighboring "segments" or "partitions"
|
||||
of an incrementing column. This can be used to track changes between subsequent date partition
|
||||
in a table, for example.
|
||||
"""
|
||||
INCREMENTING_SEGMENT_ROW_COUNT_CHANGE
|
||||
}
|
||||
|
||||
"""
|
||||
Attributes defining an ROW_COUNT_TOTAL volume assertion.
|
||||
"""
|
||||
type RowCountTotal {
|
||||
"""
|
||||
The operator you'd like to apply.
|
||||
Note that only numeric operators are valid inputs:
|
||||
GREATER_THAN, GREATER_THAN_OR_EQUAL_TO, EQUAL_TO, LESS_THAN, LESS_THAN_OR_EQUAL_TO,
|
||||
BETWEEN.
|
||||
"""
|
||||
operator: AssertionStdOperator!
|
||||
|
||||
"""
|
||||
The parameters you'd like to provide as input to the operator.
|
||||
Note that only numeric parameter types are valid inputs: NUMBER.
|
||||
"""
|
||||
parameters: AssertionStdParameters!
|
||||
}
|
||||
|
||||
"""
|
||||
Attributes defining an ROW_COUNT_CHANGE volume assertion.
|
||||
"""
|
||||
type RowCountChange {
|
||||
"""
|
||||
The type of the value used to evaluate the assertion: a fixed absolute value or a relative percentage.
|
||||
"""
|
||||
type: AssertionValueChangeType!
|
||||
|
||||
"""
|
||||
The operator you'd like to apply.
|
||||
Note that only numeric operators are valid inputs:
|
||||
GREATER_THAN, GREATER_THAN_OR_EQUAL_TO, EQUAL_TO, LESS_THAN, LESS_THAN_OR_EQUAL_TO,
|
||||
BETWEEN.
|
||||
"""
|
||||
operator: AssertionStdOperator!
|
||||
|
||||
"""
|
||||
The parameters you'd like to provide as input to the operator.
|
||||
Note that only numeric parameter types are valid inputs: NUMBER.
|
||||
"""
|
||||
parameters: AssertionStdParameters!
|
||||
}
|
||||
|
||||
"""
|
||||
Attributes defining an INCREMENTING_SEGMENT_ROW_COUNT_TOTAL volume assertion.
|
||||
"""
|
||||
type IncrementingSegmentRowCountTotal {
|
||||
"""
|
||||
A specification of how the 'segment' can be derived using a column and an optional transformer function.
|
||||
"""
|
||||
segment: IncrementingSegmentSpec!
|
||||
|
||||
"""
|
||||
The operator you'd like to apply.
|
||||
Note that only numeric operators are valid inputs:
|
||||
GREATER_THAN, GREATER_THAN_OR_EQUAL_TO, EQUAL_TO, LESS_THAN, LESS_THAN_OR_EQUAL_TO,
|
||||
BETWEEN.
|
||||
"""
|
||||
operator: AssertionStdOperator!
|
||||
|
||||
"""
|
||||
The parameters you'd like to provide as input to the operator.
|
||||
Note that only numeric parameter types are valid inputs: NUMBER.
|
||||
"""
|
||||
parameters: AssertionStdParameters!
|
||||
}
|
||||
|
||||
"""
|
||||
Attributes defining an INCREMENTING_SEGMENT_ROW_COUNT_CHANGE volume assertion.
|
||||
"""
|
||||
type IncrementingSegmentRowCountChange {
|
||||
"""
|
||||
A specification of how the 'segment' can be derived using a column and an optional transformer function.
|
||||
"""
|
||||
segment: IncrementingSegmentSpec!
|
||||
|
||||
"""
|
||||
The type of the value used to evaluate the assertion: a fixed absolute value or a relative percentage.
|
||||
"""
|
||||
type: AssertionValueChangeType!
|
||||
|
||||
"""
|
||||
The operator you'd like to apply to the row count value
|
||||
Note that only numeric operators are valid inputs:
|
||||
GREATER_THAN, GREATER_THAN_OR_EQUAL_TO, EQUAL_TO, LESS_THAN, LESS_THAN_OR_EQUAL_TO,
|
||||
BETWEEN.
|
||||
"""
|
||||
operator: AssertionStdOperator!
|
||||
|
||||
"""
|
||||
The parameters you'd like to provide as input to the operator.
|
||||
Note that only numeric parameter types are valid inputs: NUMBER.
|
||||
"""
|
||||
parameters: AssertionStdParameters!
|
||||
}
|
||||
|
||||
"""
|
||||
Core attributes required to identify an incrementing segment in a table. This type is mainly useful
|
||||
for tables that constantly increase with new rows being added on a particular cadence (e.g. fact or event tables).
|
||||
|
||||
An incrementing segment represents a logical chunk of data which is INSERTED
|
||||
into a dataset on a regular interval, along with the presence of a constantly-incrementing column
|
||||
value such as an event time, date partition, or last modified column.
|
||||
|
||||
An incrementing segment is principally identified by 2 key attributes combined:
|
||||
|
||||
1. A field or column that represents the incrementing value. New rows that are inserted will be identified using this column.
|
||||
Note that the value of this column may not by itself represent the "bucket" or the "segment" in which the row falls.
|
||||
|
||||
2. [Optional] An transformer function that may be applied to the selected column value in order
|
||||
to obtain the final "segment identifier" or "bucket identifier". Rows that have the same value after applying the transformation
|
||||
will be grouped into the same segment, using which the final value (e.g. row count) will be determined.
|
||||
"""
|
||||
type IncrementingSegmentSpec {
|
||||
"""
|
||||
The field to use to generate segments. It must be constantly incrementing as new rows are inserted.
|
||||
"""
|
||||
field: SchemaFieldSpec!
|
||||
|
||||
"""
|
||||
Optional transformer function to apply to the field in order to obtain the final segment or bucket identifier.
|
||||
If not provided, then no operator will be applied to the field. (identity function)
|
||||
"""
|
||||
transformer: IncrementingSegmentFieldTransformer
|
||||
}
|
||||
|
||||
"""
|
||||
The definition of the transformer function that should be applied to a given field / column value in a dataset
|
||||
in order to determine the segment or bucket that it belongs to, which in turn is used to evaluate
|
||||
volume assertions.
|
||||
"""
|
||||
type IncrementingSegmentFieldTransformer {
|
||||
"""
|
||||
The 'standard' operator type. Note that not all source systems will support all operators.
|
||||
"""
|
||||
type: IncrementingSegmentFieldTransformerType!
|
||||
|
||||
"""
|
||||
The 'native' transformer type, useful as a back door if a custom transformer is required.
|
||||
This field is required if the type is NATIVE.
|
||||
"""
|
||||
nativeType: String
|
||||
}
|
||||
|
||||
"""
|
||||
The 'standard' transformer type. Note that not all source systems will support all operators.
|
||||
"""
|
||||
enum IncrementingSegmentFieldTransformerType {
|
||||
"""
|
||||
Rounds a timestamp (in seconds) down to the start of the month.
|
||||
"""
|
||||
TIMESTAMP_MS_TO_MINUTE
|
||||
|
||||
"""
|
||||
Rounds a timestamp (in milliseconds) down to the nearest hour.
|
||||
"""
|
||||
TIMESTAMP_MS_TO_HOUR
|
||||
|
||||
"""
|
||||
Rounds a timestamp (in milliseconds) down to the start of the day.
|
||||
"""
|
||||
TIMESTAMP_MS_TO_DATE
|
||||
|
||||
"""
|
||||
Rounds a timestamp (in milliseconds) down to the start of the month
|
||||
"""
|
||||
TIMESTAMP_MS_TO_MONTH
|
||||
|
||||
"""
|
||||
Rounds a timestamp (in milliseconds) down to the start of the year
|
||||
"""
|
||||
TIMESTAMP_MS_TO_YEAR
|
||||
|
||||
"""
|
||||
Rounds a numeric value down to the nearest integer.
|
||||
"""
|
||||
FLOOR
|
||||
|
||||
"""
|
||||
Rounds a numeric value up to the nearest integer.
|
||||
"""
|
||||
CEILING
|
||||
|
||||
"""
|
||||
A backdoor to provide a native operator type specific to a given source system like
|
||||
Snowflake, Redshift, BQ, etc.
|
||||
"""
|
||||
NATIVE
|
||||
}
|
||||
|
||||
"""
|
||||
A definition of a Volume (row count) assertion.
|
||||
"""
|
||||
type VolumeAssertionInfo {
|
||||
"""
|
||||
The entity targeted by this Volume check.
|
||||
"""
|
||||
entityUrn: String!
|
||||
|
||||
"""
|
||||
The type of the freshness assertion being monitored.
|
||||
"""
|
||||
type: VolumeAssertionType!
|
||||
|
||||
"""
|
||||
Produce FAILURE Assertion Result if the row count of the asset does not meet specific requirements.
|
||||
Required if type is 'ROW_COUNT_TOTAL'.
|
||||
"""
|
||||
rowCountTotal: RowCountTotal
|
||||
|
||||
"""
|
||||
Produce FAILURE Assertion Result if the row count delta of the asset does not meet specific requirements.
|
||||
Required if type is 'ROW_COUNT_CHANGE'.
|
||||
"""
|
||||
rowCountChange: RowCountChange
|
||||
|
||||
"""
|
||||
Produce FAILURE Assertion Result if the latest incrementing segment row count total of the asset
|
||||
does not meet specific requirements. Required if type is 'INCREMENTING_SEGMENT_ROW_COUNT_TOTAL'.
|
||||
"""
|
||||
incrementingSegmentRowCountTotal: IncrementingSegmentRowCountTotal
|
||||
|
||||
"""
|
||||
Produce FAILURE Assertion Result if the incrementing segment row count delta of the asset
|
||||
does not meet specific requirements. Required if type is 'INCREMENTING_SEGMENT_ROW_COUNT_CHANGE'.
|
||||
"""
|
||||
incrementingSegmentRowCountChange: IncrementingSegmentRowCountChange
|
||||
|
||||
"""
|
||||
A definition of the specific filters that should be applied, when performing monitoring.
|
||||
If not provided, there is no filter, and the full table is under consideration.
|
||||
"""
|
||||
filter: DatasetFilter
|
||||
}
|
||||
|
||||
"""
|
||||
The type of the SQL assertion being monitored.
|
||||
"""
|
||||
enum SqlAssertionType {
|
||||
"""
|
||||
A SQL Metric Assertion, e.g. one based on a numeric value returned by an arbitrary SQL query.
|
||||
"""
|
||||
METRIC
|
||||
|
||||
"""
|
||||
A SQL assertion that is evaluated against the CHANGE in a metric assertion over time.
|
||||
"""
|
||||
METRIC_CHANGE
|
||||
}
|
||||
|
||||
"""
|
||||
Attributes defining a SQL Assertion
|
||||
"""
|
||||
type SqlAssertionInfo {
|
||||
"""
|
||||
The type of the SQL assertion being monitored.
|
||||
"""
|
||||
type: SqlAssertionType!
|
||||
|
||||
"""
|
||||
The entity targeted by this SQL check.
|
||||
"""
|
||||
entityUrn: String!
|
||||
|
||||
"""
|
||||
The SQL statement to be executed when evaluating the assertion.
|
||||
"""
|
||||
statement: String!
|
||||
|
||||
"""
|
||||
The type of the value used to evaluate the assertion: a fixed absolute value or a relative percentage.
|
||||
Required if the type is METRIC_CHANGE.
|
||||
"""
|
||||
changeType: AssertionValueChangeType
|
||||
|
||||
"""
|
||||
The operator you'd like to apply to the result of the SQL query.
|
||||
"""
|
||||
operator: AssertionStdOperator!
|
||||
|
||||
"""
|
||||
The parameters you'd like to provide as input to the operator.
|
||||
"""
|
||||
parameters: AssertionStdParameters!
|
||||
}
|
||||
|
||||
"""
|
||||
The type of a Field assertion
|
||||
"""
|
||||
enum FieldAssertionType {
|
||||
"""
|
||||
An assertion used to validate the values contained with a field / column given a set of rows.
|
||||
"""
|
||||
FIELD_VALUES
|
||||
|
||||
"""
|
||||
An assertion used to validate the value of a common field / column metric (e.g. aggregation)
|
||||
such as null count + percentage, min, max, median, and more.
|
||||
"""
|
||||
FIELD_METRIC
|
||||
}
|
||||
|
||||
"""
|
||||
The type of the Field Transform
|
||||
"""
|
||||
enum FieldTransformType {
|
||||
"""
|
||||
Obtain the length of a string field / column (applicable to string types)
|
||||
"""
|
||||
LENGTH
|
||||
}
|
||||
|
||||
"""
|
||||
The type of failure threshold.
|
||||
"""
|
||||
enum FieldValuesFailThresholdType {
|
||||
"""
|
||||
The maximum number of column values (i.e. rows) that are allowed
|
||||
to fail the defined expectations before the assertion officially fails.
|
||||
"""
|
||||
COUNT
|
||||
|
||||
"""
|
||||
The maximum percentage of rows that are allowed
|
||||
to fail the defined column expectations before the assertion officially fails.
|
||||
"""
|
||||
PERCENTAGE
|
||||
}
|
||||
|
||||
"""
|
||||
A standard metric that can be derived from the set of values
|
||||
for a specific field / column of a dataset / table.
|
||||
"""
|
||||
enum FieldMetricType {
|
||||
"""
|
||||
The number of unique values found in the column value set
|
||||
"""
|
||||
UNIQUE_COUNT
|
||||
|
||||
"""
|
||||
The percentage of unique values to total rows for the dataset
|
||||
"""
|
||||
UNIQUE_PERCENTAGE
|
||||
|
||||
"""
|
||||
The number of null values found in the column value set
|
||||
"""
|
||||
NULL_COUNT
|
||||
|
||||
"""
|
||||
The percentage of null values to total rows for the dataset
|
||||
"""
|
||||
NULL_PERCENTAGE
|
||||
|
||||
"""
|
||||
The minimum value in the column set (applies to numeric columns)
|
||||
"""
|
||||
MIN
|
||||
|
||||
"""
|
||||
The maximum value in the column set (applies to numeric columns)
|
||||
"""
|
||||
MAX
|
||||
|
||||
"""
|
||||
The mean length found in the column set (applies to numeric columns)
|
||||
"""
|
||||
MEAN
|
||||
|
||||
"""
|
||||
The median length found in the column set (applies to numeric columns)
|
||||
"""
|
||||
MEDIAN
|
||||
|
||||
"""
|
||||
The stddev length found in the column set (applies to numeric columns)
|
||||
"""
|
||||
STDDEV
|
||||
|
||||
"""
|
||||
The number of negative values found in the value set (applies to numeric columns)
|
||||
"""
|
||||
NEGATIVE_COUNT
|
||||
|
||||
"""
|
||||
The percentage of negative values to total rows for the dataset (applies to numeric columns)
|
||||
"""
|
||||
NEGATIVE_PERCENTAGE
|
||||
|
||||
"""
|
||||
The number of zero values found in the value set (applies to numeric columns)
|
||||
"""
|
||||
ZERO_COUNT
|
||||
|
||||
"""
|
||||
The percentage of zero values to total rows for the dataset (applies to numeric columns)
|
||||
"""
|
||||
ZERO_PERCENTAGE
|
||||
|
||||
"""
|
||||
The minimum length found in the column set (applies to string columns)
|
||||
"""
|
||||
MIN_LENGTH
|
||||
|
||||
"""
|
||||
The maximum length found in the column set (applies to string columns)
|
||||
"""
|
||||
MAX_LENGTH
|
||||
|
||||
"""
|
||||
The number of empty string values found in the value set (applies to string columns).
|
||||
Note: This is a completely different metric different from NULL_COUNT!
|
||||
"""
|
||||
EMPTY_COUNT
|
||||
|
||||
"""
|
||||
The percentage of empty string values to total rows for the dataset (applies to string columns).
|
||||
Note: This is a completely different metric different from NULL_PERCENTAGE!
|
||||
"""
|
||||
EMPTY_PERCENTAGE
|
||||
}
|
||||
|
||||
"""
|
||||
A definition of a Field (Column) assertion.
|
||||
"""
|
||||
type FieldAssertionInfo {
|
||||
"""
|
||||
The type of the field assertion being monitored.
|
||||
"""
|
||||
type: FieldAssertionType!
|
||||
|
||||
"""
|
||||
The entity targeted by this Field check.
|
||||
"""
|
||||
entityUrn: String!
|
||||
|
||||
"""
|
||||
The definition of an assertion that validates individual values of a field / column for a set of rows.
|
||||
"""
|
||||
fieldValuesAssertion: FieldValuesAssertion
|
||||
|
||||
"""
|
||||
The definition of an assertion that validates a common metric obtained about a field / column for a set of rows.
|
||||
"""
|
||||
fieldMetricAssertion: FieldMetricAssertion
|
||||
|
||||
"""
|
||||
A definition of the specific filters that should be applied, when performing monitoring.
|
||||
If not provided, there is no filter, and the full table is under consideration.
|
||||
"""
|
||||
filter: DatasetFilter
|
||||
}
|
||||
|
||||
"""
|
||||
A definition of a Field Values assertion.
|
||||
"""
|
||||
type FieldValuesAssertion {
|
||||
"""
|
||||
The field under evaluation.
|
||||
"""
|
||||
field: SchemaFieldSpec!
|
||||
|
||||
"""
|
||||
An optional transform to apply to field values before evaluating the operator.
|
||||
"""
|
||||
transform: FieldTransform
|
||||
|
||||
"""
|
||||
The predicate to evaluate against a single value of the field.
|
||||
Depending on the operator, parameters may be required
|
||||
"""
|
||||
operator: AssertionStdOperator!
|
||||
|
||||
"""
|
||||
Standard parameters required for the assertion.
|
||||
"""
|
||||
parameters: AssertionStdParameters
|
||||
|
||||
"""
|
||||
Additional customization about when the assertion should be officially considered failing.
|
||||
"""
|
||||
failThreshold: FieldValuesFailThreshold!
|
||||
|
||||
"""
|
||||
Whether to ignore or allow nulls when running the values assertion.
|
||||
"""
|
||||
excludeNulls: Boolean!
|
||||
}
|
||||
|
||||
"""
|
||||
Definition of a transform applied to the values of a column / field.
|
||||
"""
|
||||
type FieldTransform {
|
||||
"""
|
||||
The type of the field transform.
|
||||
"""
|
||||
type: FieldTransformType!
|
||||
}
|
||||
|
||||
type FieldValuesFailThreshold {
|
||||
"""
|
||||
The type of failure threshold.
|
||||
"""
|
||||
type: FieldValuesFailThresholdType!
|
||||
|
||||
"""
|
||||
The value of the threshold, either representing a count or percentage.
|
||||
"""
|
||||
value: Long!
|
||||
}
|
||||
|
||||
"""
|
||||
A definition of a Field Metric assertion.
|
||||
"""
|
||||
type FieldMetricAssertion {
|
||||
"""
|
||||
The field under evaluation
|
||||
"""
|
||||
field: SchemaFieldSpec!
|
||||
|
||||
"""
|
||||
The specific metric to assert against.
|
||||
"""
|
||||
metric: FieldMetricType!
|
||||
|
||||
"""
|
||||
The predicate to evaluate against the metric for the field / column.
|
||||
"""
|
||||
operator: AssertionStdOperator!
|
||||
|
||||
"""
|
||||
Standard parameters required for the assertion.
|
||||
"""
|
||||
parameters: AssertionStdParameters
|
||||
}
|
||||
|
||||
"""
|
||||
Information about an Schema assertion
|
||||
"""
|
||||
type SchemaAssertionInfo {
|
||||
"""
|
||||
The entity targeted by this schema assertion.
|
||||
"""
|
||||
entityUrn: String!
|
||||
|
||||
"""
|
||||
A single field in the schema assertion.
|
||||
"""
|
||||
fields: [SchemaAssertionField!]!
|
||||
|
||||
"""
|
||||
A definition of the expected structure for the asset
|
||||
Deprecated! Use the simpler 'fields' instead.
|
||||
"""
|
||||
schema: SchemaMetadata
|
||||
|
||||
"""
|
||||
The compatibility level required for the assertion to pass.
|
||||
"""
|
||||
compatibility: SchemaAssertionCompatibility!
|
||||
}
|
||||
@ -7508,6 +7508,11 @@ type BatchSpec {
|
||||
The result type of an assertion, success or failure.
|
||||
"""
|
||||
enum AssertionResultType {
|
||||
"""
|
||||
The assertion has not yet been fully evaluated.
|
||||
"""
|
||||
INIT
|
||||
|
||||
"""
|
||||
The assertion succeeded.
|
||||
"""
|
||||
@ -7517,6 +7522,11 @@ enum AssertionResultType {
|
||||
The assertion failed.
|
||||
"""
|
||||
FAILURE
|
||||
|
||||
"""
|
||||
The assertion errored.
|
||||
"""
|
||||
ERROR
|
||||
}
|
||||
|
||||
"""
|
||||
@ -7678,6 +7688,16 @@ enum AssertionStdOperator {
|
||||
"""
|
||||
NOT_IN
|
||||
|
||||
"""
|
||||
Value being asserted is true.
|
||||
"""
|
||||
IS_TRUE
|
||||
|
||||
"""
|
||||
Value being asserted is false.
|
||||
"""
|
||||
IS_FALSE
|
||||
|
||||
"""
|
||||
Other
|
||||
"""
|
||||
@ -7824,6 +7844,11 @@ type AssertionRunEventsResult {
|
||||
"""
|
||||
succeeded: Int!
|
||||
|
||||
"""
|
||||
The number of errored run events
|
||||
"""
|
||||
errored: Int!
|
||||
|
||||
"""
|
||||
The run events themselves
|
||||
"""
|
||||
|
||||
@ -136,6 +136,36 @@ enum IncidentState {
|
||||
A specific type of incident
|
||||
"""
|
||||
enum IncidentType {
|
||||
"""
|
||||
A Freshness Assertion has failed, triggering the incident.
|
||||
Raised on assets where assertions are configured to generate incidents.
|
||||
"""
|
||||
FRESHNESS
|
||||
|
||||
"""
|
||||
A Volume Assertion has failed, triggering the incident.
|
||||
Raised on assets where assertions are configured to generate incidents.
|
||||
"""
|
||||
VOLUME
|
||||
|
||||
"""
|
||||
A Field Assertion has failed, triggering the incident.
|
||||
Raised on assets where assertions are configured to generate incidents.
|
||||
"""
|
||||
FIELD
|
||||
|
||||
"""
|
||||
A SQL Assertion has failed, triggering the incident.
|
||||
Raised on assets where assertions are configured to generate incidents.
|
||||
"""
|
||||
SQL
|
||||
|
||||
"""
|
||||
A Schema has failed, triggering the incident.
|
||||
Raised on assets where assertions are configured to generate incidents.
|
||||
"""
|
||||
DATA_SCHEMA
|
||||
|
||||
"""
|
||||
An operational incident, e.g. failure to materialize a dataset, or failure to execute a task / pipeline.
|
||||
"""
|
||||
@ -174,6 +204,11 @@ enum IncidentSourceType {
|
||||
The incident was created manually, from either the API or the UI.
|
||||
"""
|
||||
MANUAL
|
||||
|
||||
"""
|
||||
An assertion has failed, triggering the incident.
|
||||
"""
|
||||
ASSERTION_FAILURE
|
||||
}
|
||||
|
||||
"""
|
||||
|
||||
@ -97,6 +97,7 @@ public class AssertionRunEventResolverTest {
|
||||
assertEquals(result.getTotal(), 1);
|
||||
assertEquals(result.getFailed(), 0);
|
||||
assertEquals(result.getSucceeded(), 1);
|
||||
assertEquals(result.getErrored(), 0);
|
||||
|
||||
com.linkedin.datahub.graphql.generated.AssertionRunEvent graphqlRunEvent =
|
||||
resolver.get(mockEnv).get().getRunEvents().get(0);
|
||||
|
||||
@ -0,0 +1,346 @@
|
||||
package com.linkedin.datahub.graphql.types.assertion;
|
||||
|
||||
import static org.testng.Assert.assertEquals;
|
||||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.linkedin.assertion.AssertionInfo;
|
||||
import com.linkedin.assertion.AssertionSource;
|
||||
import com.linkedin.assertion.AssertionStdAggregation;
|
||||
import com.linkedin.assertion.AssertionStdOperator;
|
||||
import com.linkedin.assertion.AssertionStdParameter;
|
||||
import com.linkedin.assertion.AssertionStdParameterType;
|
||||
import com.linkedin.assertion.AssertionStdParameters;
|
||||
import com.linkedin.assertion.AssertionType;
|
||||
import com.linkedin.assertion.DatasetAssertionInfo;
|
||||
import com.linkedin.assertion.DatasetAssertionScope;
|
||||
import com.linkedin.assertion.FreshnessAssertionInfo;
|
||||
import com.linkedin.assertion.FreshnessAssertionSchedule;
|
||||
import com.linkedin.assertion.FreshnessAssertionScheduleType;
|
||||
import com.linkedin.assertion.FreshnessAssertionType;
|
||||
import com.linkedin.assertion.FreshnessCronSchedule;
|
||||
import com.linkedin.assertion.SchemaAssertionCompatibility;
|
||||
import com.linkedin.assertion.SchemaAssertionInfo;
|
||||
import com.linkedin.common.GlobalTags;
|
||||
import com.linkedin.common.TagAssociationArray;
|
||||
import com.linkedin.common.UrnArray;
|
||||
import com.linkedin.common.urn.TagUrn;
|
||||
import com.linkedin.common.urn.UrnUtils;
|
||||
import com.linkedin.data.DataMap;
|
||||
import com.linkedin.data.template.StringMap;
|
||||
import com.linkedin.datahub.graphql.generated.Assertion;
|
||||
import com.linkedin.datahub.graphql.generated.FixedIntervalSchedule;
|
||||
import com.linkedin.entity.Aspect;
|
||||
import com.linkedin.entity.EntityResponse;
|
||||
import com.linkedin.entity.EnvelopedAspect;
|
||||
import com.linkedin.entity.EnvelopedAspectMap;
|
||||
import com.linkedin.metadata.Constants;
|
||||
import com.linkedin.schema.MySqlDDL;
|
||||
import com.linkedin.schema.SchemaField;
|
||||
import com.linkedin.schema.SchemaFieldArray;
|
||||
import com.linkedin.schema.SchemaFieldDataType;
|
||||
import com.linkedin.schema.SchemaMetadata;
|
||||
import com.linkedin.schema.StringType;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
public class AssertionMapperTest {
|
||||
|
||||
@Test
|
||||
public void testMapDatasetAssertion() {
|
||||
// Case 1: Without nullable fields
|
||||
AssertionInfo input = createFreshnessAssertionInfoWithoutNullableFields();
|
||||
EntityResponse datasetAssertionEntityResponse = createAssertionInfoEntityResponse(input);
|
||||
Assertion output = AssertionMapper.map(null, datasetAssertionEntityResponse);
|
||||
verifyAssertionInfo(input, output);
|
||||
|
||||
// Case 2: With nullable fields
|
||||
input = createFreshnessAssertionInfoWithNullableFields();
|
||||
EntityResponse datasetAssertionEntityResponseWithNullables =
|
||||
createAssertionInfoEntityResponse(input);
|
||||
output = AssertionMapper.map(null, datasetAssertionEntityResponseWithNullables);
|
||||
verifyAssertionInfo(input, output);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMapTags() throws Exception {
|
||||
HashMap<String, EnvelopedAspect> aspects = new HashMap<>();
|
||||
AssertionInfo info = createFreshnessAssertionInfoWithoutNullableFields();
|
||||
|
||||
EnvelopedAspect envelopedTagsAspect = new EnvelopedAspect();
|
||||
GlobalTags tags = new GlobalTags();
|
||||
tags.setTags(
|
||||
new TagAssociationArray(
|
||||
new TagAssociationArray(
|
||||
Collections.singletonList(
|
||||
new com.linkedin.common.TagAssociation()
|
||||
.setTag(TagUrn.createFromString("urn:li:tag:test"))))));
|
||||
envelopedTagsAspect.setValue(new Aspect(tags.data()));
|
||||
|
||||
aspects.put(Constants.ASSERTION_INFO_ASPECT_NAME, createEnvelopedAspect(info.data()));
|
||||
aspects.put(Constants.GLOBAL_TAGS_ASPECT_NAME, createEnvelopedAspect(tags.data()));
|
||||
EntityResponse response = createEntityResponse(aspects);
|
||||
|
||||
Assertion assertion = AssertionMapper.map(null, response);
|
||||
assertEquals(assertion.getTags().getTags().size(), 1);
|
||||
assertEquals(
|
||||
assertion.getTags().getTags().get(0).getTag().getUrn().toString(), "urn:li:tag:test");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMapFreshnessAssertion() {
|
||||
// Case 1: Without nullable fields
|
||||
AssertionInfo inputInfo = createFreshnessAssertionInfoWithoutNullableFields();
|
||||
|
||||
EntityResponse freshnessAssertionEntityResponse = createAssertionInfoEntityResponse(inputInfo);
|
||||
Assertion output = AssertionMapper.map(null, freshnessAssertionEntityResponse);
|
||||
verifyAssertionInfo(inputInfo, output);
|
||||
|
||||
// Case 2: With nullable fields
|
||||
inputInfo = createDatasetAssertionInfoWithNullableFields();
|
||||
EntityResponse freshnessAssertionEntityResponseWithNullables =
|
||||
createAssertionInfoEntityResponse(inputInfo);
|
||||
output = AssertionMapper.map(null, freshnessAssertionEntityResponseWithNullables);
|
||||
verifyAssertionInfo(inputInfo, output);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMapDataSchemaAssertion() {
|
||||
AssertionInfo input = createSchemaAssertion();
|
||||
EntityResponse schemaAssertionEntityResponse = createAssertionInfoEntityResponse(input);
|
||||
Assertion output = AssertionMapper.map(null, schemaAssertionEntityResponse);
|
||||
verifyAssertionInfo(input, output);
|
||||
}
|
||||
|
||||
private void verifyAssertionInfo(AssertionInfo input, Assertion output) {
|
||||
Assert.assertNotNull(output);
|
||||
Assert.assertNotNull(output.getInfo());
|
||||
Assert.assertEquals(
|
||||
output.getInfo().getType().toString(), output.getInfo().getType().toString());
|
||||
|
||||
if (input.hasDatasetAssertion()) {
|
||||
verifyDatasetAssertion(input.getDatasetAssertion(), output.getInfo().getDatasetAssertion());
|
||||
}
|
||||
|
||||
if (input.hasFreshnessAssertion()) {
|
||||
verifyFreshnessAssertion(
|
||||
input.getFreshnessAssertion(), output.getInfo().getFreshnessAssertion());
|
||||
}
|
||||
|
||||
if (input.hasSchemaAssertion()) {
|
||||
verifySchemaAssertion(input.getSchemaAssertion(), output.getInfo().getSchemaAssertion());
|
||||
}
|
||||
|
||||
if (input.hasSource()) {
|
||||
verifySource(input.getSource(), output.getInfo().getSource());
|
||||
}
|
||||
}
|
||||
|
||||
private void verifyDatasetAssertion(
|
||||
DatasetAssertionInfo input,
|
||||
com.linkedin.datahub.graphql.generated.DatasetAssertionInfo output) {
|
||||
Assert.assertEquals(output.getOperator().toString(), input.getOperator().toString());
|
||||
Assert.assertEquals(output.getOperator().toString(), input.getOperator().toString());
|
||||
Assert.assertEquals(output.getScope().toString(), input.getScope().toString());
|
||||
Assert.assertEquals(output.getDatasetUrn(), input.getDataset().toString());
|
||||
if (input.hasAggregation()) {
|
||||
Assert.assertEquals(output.getAggregation().toString(), input.getAggregation().toString());
|
||||
}
|
||||
if (input.hasNativeType()) {
|
||||
Assert.assertEquals(output.getNativeType(), input.getNativeType().toString());
|
||||
}
|
||||
if (input.hasLogic()) {
|
||||
Assert.assertEquals(output.getLogic(), input.getLogic());
|
||||
}
|
||||
if (input.hasFields()) {
|
||||
Assert.assertTrue(
|
||||
input.getFields().stream()
|
||||
.allMatch(
|
||||
field ->
|
||||
output.getFields().stream()
|
||||
.anyMatch(outField -> field.toString().equals(outField.getUrn()))));
|
||||
}
|
||||
}
|
||||
|
||||
private void verifyFreshnessAssertion(
|
||||
FreshnessAssertionInfo input,
|
||||
com.linkedin.datahub.graphql.generated.FreshnessAssertionInfo output) {
|
||||
Assert.assertEquals(output.getType().toString(), input.getType().toString());
|
||||
Assert.assertEquals(output.getEntityUrn(), input.getEntity().toString());
|
||||
if (input.hasSchedule()) {
|
||||
verifyFreshnessSchedule(input.getSchedule(), output.getSchedule());
|
||||
}
|
||||
}
|
||||
|
||||
private void verifySchemaAssertion(
|
||||
SchemaAssertionInfo input,
|
||||
com.linkedin.datahub.graphql.generated.SchemaAssertionInfo output) {
|
||||
Assert.assertEquals(output.getEntityUrn(), input.getEntity().toString());
|
||||
Assert.assertEquals(output.getCompatibility().toString(), input.getCompatibility().toString());
|
||||
Assert.assertEquals(
|
||||
output.getSchema().getFields().size(), input.getSchema().getFields().size());
|
||||
}
|
||||
|
||||
private void verifyCronSchedule(
|
||||
FreshnessCronSchedule input,
|
||||
com.linkedin.datahub.graphql.generated.FreshnessCronSchedule output) {
|
||||
Assert.assertEquals(output.getCron(), input.getCron());
|
||||
Assert.assertEquals(output.getTimezone(), input.getTimezone());
|
||||
if (input.hasWindowStartOffsetMs()) {
|
||||
Assert.assertEquals(output.getWindowStartOffsetMs(), input.getWindowStartOffsetMs());
|
||||
}
|
||||
}
|
||||
|
||||
private void verifyFreshnessSchedule(
|
||||
FreshnessAssertionSchedule input,
|
||||
com.linkedin.datahub.graphql.generated.FreshnessAssertionSchedule output) {
|
||||
Assert.assertEquals(output.getType().toString(), input.getType().toString());
|
||||
if (input.hasCron()) {
|
||||
verifyCronSchedule(input.getCron(), output.getCron());
|
||||
}
|
||||
if (input.hasFixedInterval()) {
|
||||
verifyFixedIntervalSchedule(input.getFixedInterval(), output.getFixedInterval());
|
||||
}
|
||||
}
|
||||
|
||||
private void verifyFixedIntervalSchedule(
|
||||
com.linkedin.assertion.FixedIntervalSchedule input, FixedIntervalSchedule output) {
|
||||
Assert.assertEquals(output.getMultiple(), (int) input.getMultiple());
|
||||
Assert.assertEquals(output.getUnit().toString(), input.getUnit().toString());
|
||||
}
|
||||
|
||||
private void verifySource(
|
||||
AssertionSource input, com.linkedin.datahub.graphql.generated.AssertionSource output) {
|
||||
Assert.assertEquals(output.getType().toString(), input.getType().toString());
|
||||
}
|
||||
|
||||
private EntityResponse createAssertionInfoEntityResponse(final AssertionInfo info) {
|
||||
HashMap<String, EnvelopedAspect> aspects = new HashMap<>();
|
||||
aspects.put(Constants.ASSERTION_INFO_ASPECT_NAME, createEnvelopedAspect(info.data()));
|
||||
|
||||
return createEntityResponse(aspects);
|
||||
}
|
||||
|
||||
private EntityResponse createEntityResponse(Map<String, EnvelopedAspect> aspects) {
|
||||
EntityResponse entityResponse = new EntityResponse();
|
||||
entityResponse.setUrn(UrnUtils.getUrn("urn:li:assertion:1"));
|
||||
entityResponse.setAspects(new EnvelopedAspectMap(new HashMap<>()));
|
||||
aspects.forEach(
|
||||
(aspectName, envelopedAspect) -> {
|
||||
entityResponse.getAspects().put(aspectName, envelopedAspect);
|
||||
});
|
||||
|
||||
return entityResponse;
|
||||
}
|
||||
|
||||
private EnvelopedAspect createEnvelopedAspect(DataMap dataMap) {
|
||||
EnvelopedAspect envelopedAspect = new EnvelopedAspect();
|
||||
envelopedAspect.setValue(new Aspect(dataMap));
|
||||
return envelopedAspect;
|
||||
}
|
||||
|
||||
private AssertionInfo createDatasetAssertionInfoWithoutNullableFields() {
|
||||
AssertionInfo info = new AssertionInfo();
|
||||
info.setType(com.linkedin.assertion.AssertionType.DATASET);
|
||||
DatasetAssertionInfo datasetAssertionInfo = new DatasetAssertionInfo();
|
||||
datasetAssertionInfo.setDataset(UrnUtils.getUrn("urn:li:dataset:1"));
|
||||
datasetAssertionInfo.setScope(DatasetAssertionScope.DATASET_COLUMN);
|
||||
datasetAssertionInfo.setOperator(AssertionStdOperator.GREATER_THAN);
|
||||
info.setDatasetAssertion(datasetAssertionInfo);
|
||||
return info;
|
||||
}
|
||||
|
||||
private AssertionInfo createDatasetAssertionInfoWithNullableFields() {
|
||||
AssertionInfo infoWithoutNullables = createDatasetAssertionInfoWithoutNullableFields();
|
||||
DatasetAssertionInfo baseInfo = infoWithoutNullables.getDatasetAssertion();
|
||||
baseInfo.setFields(
|
||||
new UrnArray(
|
||||
Arrays.asList(
|
||||
UrnUtils.getUrn(
|
||||
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,name,PROD),field)"))));
|
||||
baseInfo.setAggregation(AssertionStdAggregation.SUM);
|
||||
baseInfo.setParameters(createAssertionStdParameters());
|
||||
baseInfo.setNativeType("native_type");
|
||||
baseInfo.setNativeParameters(new StringMap(Collections.singletonMap("key", "value")));
|
||||
baseInfo.setLogic("sample_logic");
|
||||
infoWithoutNullables.setSource(
|
||||
new AssertionSource().setType(com.linkedin.assertion.AssertionSourceType.INFERRED));
|
||||
return infoWithoutNullables;
|
||||
}
|
||||
|
||||
private AssertionInfo createFreshnessAssertionInfoWithoutNullableFields() {
|
||||
AssertionInfo info = new AssertionInfo();
|
||||
info.setType(AssertionType.FRESHNESS);
|
||||
FreshnessAssertionInfo freshnessAssertionInfo = new FreshnessAssertionInfo();
|
||||
freshnessAssertionInfo.setEntity(
|
||||
UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:hive,name,PROD)"));
|
||||
freshnessAssertionInfo.setType(FreshnessAssertionType.DATASET_CHANGE);
|
||||
info.setFreshnessAssertion(freshnessAssertionInfo);
|
||||
return info;
|
||||
}
|
||||
|
||||
private AssertionInfo createFreshnessAssertionInfoWithNullableFields() {
|
||||
AssertionInfo infoWithoutNullables = createFreshnessAssertionInfoWithoutNullableFields();
|
||||
FreshnessAssertionInfo baseInfo = infoWithoutNullables.getFreshnessAssertion();
|
||||
baseInfo.setSchedule(createFreshnessAssertionSchedule());
|
||||
infoWithoutNullables.setSource(
|
||||
new AssertionSource().setType(com.linkedin.assertion.AssertionSourceType.INFERRED));
|
||||
return infoWithoutNullables;
|
||||
}
|
||||
|
||||
private AssertionInfo createSchemaAssertion() {
|
||||
AssertionInfo info = new AssertionInfo();
|
||||
info.setType(AssertionType.DATA_SCHEMA);
|
||||
SchemaAssertionInfo schemaAssertionInfo = new SchemaAssertionInfo();
|
||||
schemaAssertionInfo.setEntity(UrnUtils.getUrn("urn:li:dataset:1"));
|
||||
schemaAssertionInfo.setCompatibility(SchemaAssertionCompatibility.SUPERSET);
|
||||
schemaAssertionInfo.setSchema(
|
||||
new SchemaMetadata()
|
||||
.setCluster("Test")
|
||||
.setHash("Test")
|
||||
.setPlatformSchema(SchemaMetadata.PlatformSchema.create(new MySqlDDL()))
|
||||
.setFields(
|
||||
new SchemaFieldArray(
|
||||
ImmutableList.of(
|
||||
new SchemaField()
|
||||
.setType(
|
||||
new SchemaFieldDataType()
|
||||
.setType(SchemaFieldDataType.Type.create(new StringType())))
|
||||
.setNullable(false)
|
||||
.setNativeDataType("string")
|
||||
.setFieldPath("test")))));
|
||||
return info;
|
||||
}
|
||||
|
||||
private AssertionStdParameters createAssertionStdParameters() {
|
||||
AssertionStdParameters parameters = new AssertionStdParameters();
|
||||
parameters.setValue(createAssertionStdParameter());
|
||||
parameters.setMinValue(createAssertionStdParameter());
|
||||
parameters.setMaxValue(createAssertionStdParameter());
|
||||
return parameters;
|
||||
}
|
||||
|
||||
private AssertionStdParameter createAssertionStdParameter() {
|
||||
AssertionStdParameter parameter = new AssertionStdParameter();
|
||||
parameter.setType(AssertionStdParameterType.NUMBER);
|
||||
parameter.setValue("100");
|
||||
return parameter;
|
||||
}
|
||||
|
||||
private FreshnessAssertionSchedule createFreshnessAssertionSchedule() {
|
||||
FreshnessAssertionSchedule schedule = new FreshnessAssertionSchedule();
|
||||
schedule.setType(FreshnessAssertionScheduleType.CRON);
|
||||
schedule.setCron(createCronSchedule());
|
||||
return schedule;
|
||||
}
|
||||
|
||||
private FreshnessCronSchedule createCronSchedule() {
|
||||
FreshnessCronSchedule cronSchedule = new FreshnessCronSchedule();
|
||||
cronSchedule.setCron("0 0 * * *");
|
||||
cronSchedule.setTimezone("UTC");
|
||||
return cronSchedule;
|
||||
}
|
||||
}
|
||||
@ -7,6 +7,10 @@ import com.datahub.authentication.Authentication;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
import com.linkedin.assertion.AssertionAction;
|
||||
import com.linkedin.assertion.AssertionActionArray;
|
||||
import com.linkedin.assertion.AssertionActionType;
|
||||
import com.linkedin.assertion.AssertionActions;
|
||||
import com.linkedin.assertion.AssertionInfo;
|
||||
import com.linkedin.assertion.AssertionType;
|
||||
import com.linkedin.common.DataPlatformInstance;
|
||||
@ -48,6 +52,17 @@ public class AssertionTypeTest {
|
||||
new DataPlatformInstance()
|
||||
.setPlatform(new DataPlatformUrn("snowflake"))
|
||||
.setInstance(null, SetMode.IGNORE_NULL);
|
||||
// Acryl SaaS Only
|
||||
private static final AssertionActions TEST_ASSERTION_ACTIONS =
|
||||
new AssertionActions()
|
||||
.setOnSuccess(
|
||||
new AssertionActionArray(
|
||||
ImmutableList.of(
|
||||
new AssertionAction().setType(AssertionActionType.RAISE_INCIDENT))))
|
||||
.setOnFailure(
|
||||
new AssertionActionArray(
|
||||
ImmutableList.of(
|
||||
new AssertionAction().setType(AssertionActionType.RESOLVE_INCIDENT))));
|
||||
|
||||
private static final String TEST_ASSERTION_URN_2 = "urn:li:assertion:guid-2";
|
||||
|
||||
@ -69,6 +84,9 @@ public class AssertionTypeTest {
|
||||
assertion1Aspects.put(
|
||||
Constants.ASSERTION_INFO_ASPECT_NAME,
|
||||
new EnvelopedAspect().setValue(new Aspect(TEST_ASSERTION_INFO.data())));
|
||||
assertion1Aspects.put(
|
||||
Constants.ASSERTION_ACTIONS_ASPECT_NAME,
|
||||
new EnvelopedAspect().setValue(new Aspect(TEST_ASSERTION_ACTIONS.data())));
|
||||
Mockito.when(
|
||||
client.batchGetV2(
|
||||
any(),
|
||||
@ -112,6 +130,12 @@ public class AssertionTypeTest {
|
||||
assertEquals(assertion.getInfo().getType().toString(), AssertionType.DATASET.toString());
|
||||
assertEquals(assertion.getInfo().getDatasetAssertion(), null);
|
||||
assertEquals(assertion.getPlatform().getUrn(), "urn:li:dataPlatform:snowflake");
|
||||
assertEquals(
|
||||
assertion.getActions().getOnSuccess().get(0).getType(),
|
||||
com.linkedin.datahub.graphql.generated.AssertionActionType.RAISE_INCIDENT);
|
||||
assertEquals(
|
||||
assertion.getActions().getOnFailure().get(0).getType(),
|
||||
com.linkedin.datahub.graphql.generated.AssertionActionType.RESOLVE_INCIDENT);
|
||||
|
||||
// Assert second element is null.
|
||||
assertNull(result.get(1));
|
||||
|
||||
@ -0,0 +1,100 @@
|
||||
package com.linkedin.datahub.graphql.types.assertion;
|
||||
|
||||
import com.linkedin.assertion.AssertionStdOperator;
|
||||
import com.linkedin.assertion.FieldAssertionInfo;
|
||||
import com.linkedin.assertion.FieldAssertionType;
|
||||
import com.linkedin.assertion.FieldMetricAssertion;
|
||||
import com.linkedin.assertion.FieldMetricType;
|
||||
import com.linkedin.assertion.FieldTransform;
|
||||
import com.linkedin.assertion.FieldTransformType;
|
||||
import com.linkedin.assertion.FieldValuesAssertion;
|
||||
import com.linkedin.assertion.FieldValuesFailThreshold;
|
||||
import com.linkedin.assertion.FieldValuesFailThresholdType;
|
||||
import com.linkedin.common.urn.Urn;
|
||||
import com.linkedin.dataset.DatasetFilter;
|
||||
import com.linkedin.dataset.DatasetFilterType;
|
||||
import com.linkedin.schema.SchemaFieldSpec;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
public class FieldAssertionMapperTest {
|
||||
@Test
|
||||
public void testMapFieldValuesAssertionInfo() throws Exception {
|
||||
FieldAssertionInfo fieldAssertionInfo =
|
||||
new FieldAssertionInfo()
|
||||
.setEntity(new Urn("urn:li:dataset:(urn:li:dataPlatform:foo,bar,baz)"))
|
||||
.setFilter(
|
||||
new DatasetFilter().setType(DatasetFilterType.SQL).setSql("WHERE value > 5;"))
|
||||
.setType(FieldAssertionType.FIELD_VALUES)
|
||||
.setFieldValuesAssertion(
|
||||
new FieldValuesAssertion()
|
||||
.setExcludeNulls(true)
|
||||
.setFailThreshold(
|
||||
new FieldValuesFailThreshold()
|
||||
.setType(FieldValuesFailThresholdType.PERCENTAGE)
|
||||
.setValue(5L))
|
||||
.setField(
|
||||
new SchemaFieldSpec()
|
||||
.setPath("path")
|
||||
.setType("STRING")
|
||||
.setNativeType("VARCHAR"))
|
||||
.setOperator(AssertionStdOperator.IS_TRUE)
|
||||
.setTransform(new FieldTransform().setType(FieldTransformType.LENGTH)));
|
||||
|
||||
com.linkedin.datahub.graphql.generated.FieldAssertionInfo result =
|
||||
FieldAssertionMapper.mapFieldAssertionInfo(null, fieldAssertionInfo);
|
||||
Assert.assertEquals(result.getEntityUrn(), "urn:li:dataset:(urn:li:dataPlatform:foo,bar,baz)");
|
||||
Assert.assertEquals(
|
||||
result.getType(), com.linkedin.datahub.graphql.generated.FieldAssertionType.FIELD_VALUES);
|
||||
Assert.assertEquals(
|
||||
result.getFilter().getType(), com.linkedin.datahub.graphql.generated.DatasetFilterType.SQL);
|
||||
Assert.assertEquals(result.getFilter().getSql(), "WHERE value > 5;");
|
||||
Assert.assertEquals(result.getFieldValuesAssertion().getField().getPath(), "path");
|
||||
Assert.assertEquals(result.getFieldValuesAssertion().getField().getType(), "STRING");
|
||||
Assert.assertEquals(result.getFieldValuesAssertion().getField().getNativeType(), "VARCHAR");
|
||||
Assert.assertEquals(
|
||||
result.getFieldValuesAssertion().getOperator(),
|
||||
com.linkedin.datahub.graphql.generated.AssertionStdOperator.IS_TRUE);
|
||||
Assert.assertEquals(
|
||||
result.getFieldValuesAssertion().getTransform().getType(),
|
||||
com.linkedin.datahub.graphql.generated.FieldTransformType.LENGTH);
|
||||
Assert.assertEquals(result.getFieldValuesAssertion().getExcludeNulls(), true);
|
||||
Assert.assertEquals(
|
||||
result.getFieldValuesAssertion().getFailThreshold().getType(),
|
||||
com.linkedin.datahub.graphql.generated.FieldValuesFailThresholdType.PERCENTAGE);
|
||||
Assert.assertEquals(
|
||||
result.getFieldValuesAssertion().getFailThreshold().getValue(), Long.valueOf(5L));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMapFieldMetricAssertionInfo() throws Exception {
|
||||
FieldAssertionInfo fieldAssertionInfo =
|
||||
new FieldAssertionInfo()
|
||||
.setEntity(new Urn("urn:li:dataset:(urn:li:dataPlatform:foo,bar,baz)"))
|
||||
.setType(FieldAssertionType.FIELD_METRIC)
|
||||
.setFieldMetricAssertion(
|
||||
new FieldMetricAssertion()
|
||||
.setField(
|
||||
new SchemaFieldSpec()
|
||||
.setPath("path")
|
||||
.setType("STRING")
|
||||
.setNativeType("VARCHAR"))
|
||||
.setOperator(AssertionStdOperator.IS_TRUE)
|
||||
.setMetric(FieldMetricType.MEDIAN));
|
||||
|
||||
com.linkedin.datahub.graphql.generated.FieldAssertionInfo result =
|
||||
FieldAssertionMapper.mapFieldAssertionInfo(null, fieldAssertionInfo);
|
||||
Assert.assertEquals(result.getEntityUrn(), "urn:li:dataset:(urn:li:dataPlatform:foo,bar,baz)");
|
||||
Assert.assertEquals(
|
||||
result.getType(), com.linkedin.datahub.graphql.generated.FieldAssertionType.FIELD_METRIC);
|
||||
Assert.assertEquals(result.getFieldMetricAssertion().getField().getPath(), "path");
|
||||
Assert.assertEquals(result.getFieldMetricAssertion().getField().getType(), "STRING");
|
||||
Assert.assertEquals(result.getFieldMetricAssertion().getField().getNativeType(), "VARCHAR");
|
||||
Assert.assertEquals(
|
||||
result.getFieldMetricAssertion().getOperator(),
|
||||
com.linkedin.datahub.graphql.generated.AssertionStdOperator.IS_TRUE);
|
||||
Assert.assertEquals(
|
||||
result.getFieldMetricAssertion().getMetric(),
|
||||
com.linkedin.datahub.graphql.generated.FieldMetricType.MEDIAN);
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,82 @@
|
||||
package com.linkedin.datahub.graphql.types.assertion;
|
||||
|
||||
import com.linkedin.assertion.FixedIntervalSchedule;
|
||||
import com.linkedin.assertion.FreshnessAssertionInfo;
|
||||
import com.linkedin.assertion.FreshnessAssertionSchedule;
|
||||
import com.linkedin.assertion.FreshnessAssertionScheduleType;
|
||||
import com.linkedin.assertion.FreshnessAssertionType;
|
||||
import com.linkedin.assertion.FreshnessCronSchedule;
|
||||
import com.linkedin.common.urn.Urn;
|
||||
import com.linkedin.dataset.DatasetFilter;
|
||||
import com.linkedin.dataset.DatasetFilterType;
|
||||
import com.linkedin.timeseries.CalendarInterval;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
public class FreshnessAssertionMapperTest {
|
||||
@Test
|
||||
public void testMapCronFreshnessAssertionInfo() throws Exception {
|
||||
FreshnessAssertionInfo freshnessAssertionInfo =
|
||||
new FreshnessAssertionInfo()
|
||||
.setEntity(new Urn("urn:li:dataset:(urn:li:dataPlatform:foo,bar,baz)"))
|
||||
.setType(FreshnessAssertionType.DATASET_CHANGE)
|
||||
.setFilter(
|
||||
new DatasetFilter().setType(DatasetFilterType.SQL).setSql("WHERE value > 5;"))
|
||||
.setSchedule(
|
||||
new FreshnessAssertionSchedule()
|
||||
.setType(FreshnessAssertionScheduleType.CRON)
|
||||
.setCron(
|
||||
new FreshnessCronSchedule()
|
||||
.setCron("0 0 0 * * ? *")
|
||||
.setTimezone("America/Los_Angeles")
|
||||
.setWindowStartOffsetMs(10L)));
|
||||
|
||||
com.linkedin.datahub.graphql.generated.FreshnessAssertionInfo result =
|
||||
FreshnessAssertionMapper.mapFreshnessAssertionInfo(null, freshnessAssertionInfo);
|
||||
Assert.assertEquals(result.getEntityUrn(), "urn:li:dataset:(urn:li:dataPlatform:foo,bar,baz)");
|
||||
Assert.assertEquals(
|
||||
result.getType(),
|
||||
com.linkedin.datahub.graphql.generated.FreshnessAssertionType.DATASET_CHANGE);
|
||||
Assert.assertEquals(
|
||||
result.getFilter().getType(), com.linkedin.datahub.graphql.generated.DatasetFilterType.SQL);
|
||||
Assert.assertEquals(result.getFilter().getSql(), "WHERE value > 5;");
|
||||
Assert.assertEquals(
|
||||
result.getSchedule().getType(),
|
||||
com.linkedin.datahub.graphql.generated.FreshnessAssertionScheduleType.CRON);
|
||||
Assert.assertEquals(result.getSchedule().getCron().getCron(), "0 0 0 * * ? *");
|
||||
Assert.assertEquals(result.getSchedule().getCron().getTimezone(), "America/Los_Angeles");
|
||||
Assert.assertEquals(result.getSchedule().getCron().getWindowStartOffsetMs(), Long.valueOf(10L));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMapFixedIntervalFreshnessAssertionInfo() throws Exception {
|
||||
FreshnessAssertionInfo freshnessAssertionInfo =
|
||||
new FreshnessAssertionInfo()
|
||||
.setEntity(new Urn("urn:li:dataset:(urn:li:dataPlatform:foo,bar,baz)"))
|
||||
.setType(FreshnessAssertionType.DATASET_CHANGE)
|
||||
.setFilter(
|
||||
new DatasetFilter().setType(DatasetFilterType.SQL).setSql("WHERE value > 5;"))
|
||||
.setSchedule(
|
||||
new FreshnessAssertionSchedule()
|
||||
.setType(FreshnessAssertionScheduleType.FIXED_INTERVAL)
|
||||
.setFixedInterval(
|
||||
new FixedIntervalSchedule().setUnit(CalendarInterval.DAY).setMultiple(10)));
|
||||
|
||||
com.linkedin.datahub.graphql.generated.FreshnessAssertionInfo result =
|
||||
FreshnessAssertionMapper.mapFreshnessAssertionInfo(null, freshnessAssertionInfo);
|
||||
Assert.assertEquals(result.getEntityUrn(), "urn:li:dataset:(urn:li:dataPlatform:foo,bar,baz)");
|
||||
Assert.assertEquals(
|
||||
result.getType(),
|
||||
com.linkedin.datahub.graphql.generated.FreshnessAssertionType.DATASET_CHANGE);
|
||||
Assert.assertEquals(
|
||||
result.getFilter().getType(), com.linkedin.datahub.graphql.generated.DatasetFilterType.SQL);
|
||||
Assert.assertEquals(result.getFilter().getSql(), "WHERE value > 5;");
|
||||
Assert.assertEquals(
|
||||
result.getSchedule().getType(),
|
||||
com.linkedin.datahub.graphql.generated.FreshnessAssertionScheduleType.FIXED_INTERVAL);
|
||||
Assert.assertEquals(
|
||||
result.getSchedule().getFixedInterval().getUnit(),
|
||||
com.linkedin.datahub.graphql.generated.DateInterval.DAY);
|
||||
Assert.assertEquals(result.getSchedule().getFixedInterval().getMultiple(), 10);
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,78 @@
|
||||
package com.linkedin.datahub.graphql.types.assertion;
|
||||
|
||||
import com.linkedin.assertion.AssertionStdOperator;
|
||||
import com.linkedin.assertion.AssertionStdParameter;
|
||||
import com.linkedin.assertion.AssertionStdParameterType;
|
||||
import com.linkedin.assertion.AssertionStdParameters;
|
||||
import com.linkedin.assertion.AssertionValueChangeType;
|
||||
import com.linkedin.assertion.SqlAssertionInfo;
|
||||
import com.linkedin.assertion.SqlAssertionType;
|
||||
import com.linkedin.common.urn.Urn;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
public class SqlAssertionMapperTest {
|
||||
@Test
|
||||
public void testMapMetricSqlAssertionInfo() throws Exception {
|
||||
SqlAssertionInfo sqlAssertionInfo =
|
||||
new SqlAssertionInfo()
|
||||
.setEntity(new Urn("urn:li:dataset:(urn:li:dataPlatform:foo,bar,baz)"))
|
||||
.setType(SqlAssertionType.METRIC)
|
||||
.setStatement("SELECT COUNT(*) FROM foo.bar.baz")
|
||||
.setOperator(AssertionStdOperator.GREATER_THAN)
|
||||
.setParameters(
|
||||
new AssertionStdParameters()
|
||||
.setValue(
|
||||
new AssertionStdParameter()
|
||||
.setType(AssertionStdParameterType.NUMBER)
|
||||
.setValue(("5"))));
|
||||
|
||||
com.linkedin.datahub.graphql.generated.SqlAssertionInfo result =
|
||||
SqlAssertionMapper.mapSqlAssertionInfo(sqlAssertionInfo);
|
||||
Assert.assertEquals(result.getEntityUrn(), "urn:li:dataset:(urn:li:dataPlatform:foo,bar,baz)");
|
||||
Assert.assertEquals(
|
||||
result.getType(), com.linkedin.datahub.graphql.generated.SqlAssertionType.METRIC);
|
||||
Assert.assertEquals(result.getStatement(), "SELECT COUNT(*) FROM foo.bar.baz");
|
||||
Assert.assertEquals(
|
||||
result.getOperator(),
|
||||
com.linkedin.datahub.graphql.generated.AssertionStdOperator.GREATER_THAN);
|
||||
Assert.assertEquals(
|
||||
result.getParameters().getValue().getType(),
|
||||
com.linkedin.datahub.graphql.generated.AssertionStdParameterType.NUMBER);
|
||||
Assert.assertEquals(result.getParameters().getValue().getValue(), "5");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMapMetricChangeSqlAssertionInfo() throws Exception {
|
||||
SqlAssertionInfo sqlAssertionInfo =
|
||||
new SqlAssertionInfo()
|
||||
.setEntity(new Urn("urn:li:dataset:(urn:li:dataPlatform:foo,bar,baz)"))
|
||||
.setType(SqlAssertionType.METRIC_CHANGE)
|
||||
.setStatement("SELECT COUNT(*) FROM foo.bar.baz")
|
||||
.setChangeType(AssertionValueChangeType.ABSOLUTE)
|
||||
.setOperator(AssertionStdOperator.GREATER_THAN)
|
||||
.setParameters(
|
||||
new AssertionStdParameters()
|
||||
.setValue(
|
||||
new AssertionStdParameter()
|
||||
.setType(AssertionStdParameterType.NUMBER)
|
||||
.setValue(("5"))));
|
||||
|
||||
com.linkedin.datahub.graphql.generated.SqlAssertionInfo result =
|
||||
SqlAssertionMapper.mapSqlAssertionInfo(sqlAssertionInfo);
|
||||
Assert.assertEquals(result.getEntityUrn(), "urn:li:dataset:(urn:li:dataPlatform:foo,bar,baz)");
|
||||
Assert.assertEquals(
|
||||
result.getType(), com.linkedin.datahub.graphql.generated.SqlAssertionType.METRIC_CHANGE);
|
||||
Assert.assertEquals(result.getStatement(), "SELECT COUNT(*) FROM foo.bar.baz");
|
||||
Assert.assertEquals(
|
||||
result.getOperator(),
|
||||
com.linkedin.datahub.graphql.generated.AssertionStdOperator.GREATER_THAN);
|
||||
Assert.assertEquals(
|
||||
result.getParameters().getValue().getType(),
|
||||
com.linkedin.datahub.graphql.generated.AssertionStdParameterType.NUMBER);
|
||||
Assert.assertEquals(result.getParameters().getValue().getValue(), "5");
|
||||
Assert.assertEquals(
|
||||
result.getChangeType(),
|
||||
com.linkedin.datahub.graphql.generated.AssertionValueChangeType.ABSOLUTE);
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,207 @@
|
||||
package com.linkedin.datahub.graphql.types.assertion;
|
||||
|
||||
import com.linkedin.assertion.AssertionStdOperator;
|
||||
import com.linkedin.assertion.AssertionStdParameter;
|
||||
import com.linkedin.assertion.AssertionStdParameterType;
|
||||
import com.linkedin.assertion.AssertionStdParameters;
|
||||
import com.linkedin.assertion.AssertionValueChangeType;
|
||||
import com.linkedin.assertion.IncrementingSegmentFieldTransformer;
|
||||
import com.linkedin.assertion.IncrementingSegmentFieldTransformerType;
|
||||
import com.linkedin.assertion.IncrementingSegmentRowCountChange;
|
||||
import com.linkedin.assertion.IncrementingSegmentRowCountTotal;
|
||||
import com.linkedin.assertion.RowCountChange;
|
||||
import com.linkedin.assertion.RowCountTotal;
|
||||
import com.linkedin.assertion.VolumeAssertionInfo;
|
||||
import com.linkedin.assertion.VolumeAssertionType;
|
||||
import com.linkedin.common.urn.Urn;
|
||||
import com.linkedin.dataset.DatasetFilter;
|
||||
import com.linkedin.dataset.DatasetFilterType;
|
||||
import com.linkedin.schema.SchemaFieldSpec;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
public class VolumeAssertionMapperTest {
|
||||
@Test
|
||||
public void testMapRowCountTotalVolumeAssertionInfo() throws Exception {
|
||||
VolumeAssertionInfo volumeAssertionInfo =
|
||||
new VolumeAssertionInfo()
|
||||
.setEntity(new Urn("urn:li:dataset:(urn:li:dataPlatform:foo,bar,baz)"))
|
||||
.setType(VolumeAssertionType.ROW_COUNT_TOTAL)
|
||||
.setFilter(
|
||||
new DatasetFilter().setType(DatasetFilterType.SQL).setSql("WHERE value > 5;"))
|
||||
.setRowCountTotal(
|
||||
new RowCountTotal()
|
||||
.setOperator(AssertionStdOperator.GREATER_THAN_OR_EQUAL_TO)
|
||||
.setParameters(
|
||||
new AssertionStdParameters()
|
||||
.setValue(
|
||||
new AssertionStdParameter()
|
||||
.setType(AssertionStdParameterType.NUMBER)
|
||||
.setValue("10"))));
|
||||
|
||||
com.linkedin.datahub.graphql.generated.VolumeAssertionInfo result =
|
||||
VolumeAssertionMapper.mapVolumeAssertionInfo(null, volumeAssertionInfo);
|
||||
Assert.assertEquals(result.getEntityUrn(), "urn:li:dataset:(urn:li:dataPlatform:foo,bar,baz)");
|
||||
Assert.assertEquals(
|
||||
result.getType(),
|
||||
com.linkedin.datahub.graphql.generated.VolumeAssertionType.ROW_COUNT_TOTAL);
|
||||
Assert.assertEquals(
|
||||
result.getFilter().getType(), com.linkedin.datahub.graphql.generated.DatasetFilterType.SQL);
|
||||
Assert.assertEquals(result.getFilter().getSql(), "WHERE value > 5;");
|
||||
Assert.assertEquals(
|
||||
result.getRowCountTotal().getOperator(),
|
||||
com.linkedin.datahub.graphql.generated.AssertionStdOperator.GREATER_THAN_OR_EQUAL_TO);
|
||||
Assert.assertEquals(
|
||||
result.getRowCountTotal().getParameters().getValue().getType(),
|
||||
com.linkedin.datahub.graphql.generated.AssertionStdParameterType.NUMBER);
|
||||
Assert.assertEquals(result.getRowCountTotal().getParameters().getValue().getValue(), "10");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMapRowCountChangeVolumeAssertionInfo() throws Exception {
|
||||
VolumeAssertionInfo volumeAssertionInfo =
|
||||
new VolumeAssertionInfo()
|
||||
.setEntity(new Urn("urn:li:dataset:(urn:li:dataPlatform:foo,bar,baz)"))
|
||||
.setType(VolumeAssertionType.ROW_COUNT_CHANGE)
|
||||
.setFilter(
|
||||
new DatasetFilter().setType(DatasetFilterType.SQL).setSql("WHERE value > 5;"))
|
||||
.setRowCountChange(
|
||||
new RowCountChange()
|
||||
.setOperator(AssertionStdOperator.GREATER_THAN_OR_EQUAL_TO)
|
||||
.setParameters(
|
||||
new AssertionStdParameters()
|
||||
.setValue(
|
||||
new AssertionStdParameter()
|
||||
.setType(AssertionStdParameterType.NUMBER)
|
||||
.setValue("10")))
|
||||
.setType(AssertionValueChangeType.ABSOLUTE));
|
||||
|
||||
com.linkedin.datahub.graphql.generated.VolumeAssertionInfo result =
|
||||
VolumeAssertionMapper.mapVolumeAssertionInfo(null, volumeAssertionInfo);
|
||||
Assert.assertEquals(result.getEntityUrn(), "urn:li:dataset:(urn:li:dataPlatform:foo,bar,baz)");
|
||||
Assert.assertEquals(
|
||||
result.getType(),
|
||||
com.linkedin.datahub.graphql.generated.VolumeAssertionType.ROW_COUNT_CHANGE);
|
||||
Assert.assertEquals(
|
||||
result.getFilter().getType(), com.linkedin.datahub.graphql.generated.DatasetFilterType.SQL);
|
||||
Assert.assertEquals(result.getFilter().getSql(), "WHERE value > 5;");
|
||||
Assert.assertEquals(
|
||||
result.getRowCountChange().getOperator(),
|
||||
com.linkedin.datahub.graphql.generated.AssertionStdOperator.GREATER_THAN_OR_EQUAL_TO);
|
||||
Assert.assertEquals(
|
||||
result.getRowCountChange().getParameters().getValue().getType(),
|
||||
com.linkedin.datahub.graphql.generated.AssertionStdParameterType.NUMBER);
|
||||
Assert.assertEquals(result.getRowCountChange().getParameters().getValue().getValue(), "10");
|
||||
Assert.assertEquals(
|
||||
result.getRowCountChange().getType(),
|
||||
com.linkedin.datahub.graphql.generated.AssertionValueChangeType.ABSOLUTE);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMapIncrementingSegmentRowCountTotalVolumeAssertionInfo() throws Exception {
|
||||
VolumeAssertionInfo volumeAssertionInfo =
|
||||
new VolumeAssertionInfo()
|
||||
.setEntity(new Urn("urn:li:dataset:(urn:li:dataPlatform:foo,bar,baz)"))
|
||||
.setType(VolumeAssertionType.INCREMENTING_SEGMENT_ROW_COUNT_TOTAL)
|
||||
.setIncrementingSegmentRowCountTotal(
|
||||
new IncrementingSegmentRowCountTotal()
|
||||
.setOperator(AssertionStdOperator.GREATER_THAN_OR_EQUAL_TO)
|
||||
.setParameters(
|
||||
new AssertionStdParameters()
|
||||
.setValue(
|
||||
new AssertionStdParameter()
|
||||
.setType(AssertionStdParameterType.NUMBER)
|
||||
.setValue("10")))
|
||||
.setSegment(
|
||||
new com.linkedin.assertion.IncrementingSegmentSpec()
|
||||
.setField(
|
||||
new SchemaFieldSpec()
|
||||
.setPath("path")
|
||||
.setNativeType("VARCHAR")
|
||||
.setType("STRING"))
|
||||
.setTransformer(
|
||||
new IncrementingSegmentFieldTransformer()
|
||||
.setType(IncrementingSegmentFieldTransformerType.CEILING)
|
||||
.setNativeType("CEILING"))));
|
||||
|
||||
com.linkedin.datahub.graphql.generated.VolumeAssertionInfo result =
|
||||
VolumeAssertionMapper.mapVolumeAssertionInfo(null, volumeAssertionInfo);
|
||||
Assert.assertEquals(result.getEntityUrn(), "urn:li:dataset:(urn:li:dataPlatform:foo,bar,baz)");
|
||||
Assert.assertEquals(
|
||||
result.getType(),
|
||||
com.linkedin.datahub.graphql.generated.VolumeAssertionType
|
||||
.INCREMENTING_SEGMENT_ROW_COUNT_TOTAL);
|
||||
Assert.assertEquals(
|
||||
result.getIncrementingSegmentRowCountTotal().getOperator(),
|
||||
com.linkedin.datahub.graphql.generated.AssertionStdOperator.GREATER_THAN_OR_EQUAL_TO);
|
||||
Assert.assertEquals(
|
||||
result.getIncrementingSegmentRowCountTotal().getParameters().getValue().getType(),
|
||||
com.linkedin.datahub.graphql.generated.AssertionStdParameterType.NUMBER);
|
||||
Assert.assertEquals(
|
||||
result.getIncrementingSegmentRowCountTotal().getParameters().getValue().getValue(), "10");
|
||||
Assert.assertEquals(
|
||||
result.getIncrementingSegmentRowCountTotal().getSegment().getField().getPath(), "path");
|
||||
Assert.assertEquals(
|
||||
result.getIncrementingSegmentRowCountTotal().getSegment().getField().getNativeType(),
|
||||
"VARCHAR");
|
||||
Assert.assertEquals(
|
||||
result.getIncrementingSegmentRowCountTotal().getSegment().getField().getType(), "STRING");
|
||||
Assert.assertEquals(
|
||||
result.getIncrementingSegmentRowCountTotal().getSegment().getTransformer().getType(),
|
||||
com.linkedin.datahub.graphql.generated.IncrementingSegmentFieldTransformerType.CEILING);
|
||||
Assert.assertEquals(
|
||||
result.getIncrementingSegmentRowCountTotal().getSegment().getTransformer().getNativeType(),
|
||||
"CEILING");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMapIncrementingSegmentRowCountChangeVolumeAssertionInfo() throws Exception {
|
||||
VolumeAssertionInfo volumeAssertionInfo =
|
||||
new VolumeAssertionInfo()
|
||||
.setEntity(new Urn("urn:li:dataset:(urn:li:dataPlatform:foo,bar,baz)"))
|
||||
.setType(VolumeAssertionType.INCREMENTING_SEGMENT_ROW_COUNT_CHANGE)
|
||||
.setIncrementingSegmentRowCountChange(
|
||||
new IncrementingSegmentRowCountChange()
|
||||
.setType(AssertionValueChangeType.ABSOLUTE)
|
||||
.setOperator(AssertionStdOperator.GREATER_THAN_OR_EQUAL_TO)
|
||||
.setParameters(
|
||||
new AssertionStdParameters()
|
||||
.setValue(
|
||||
new AssertionStdParameter()
|
||||
.setType(AssertionStdParameterType.NUMBER)
|
||||
.setValue("10")))
|
||||
.setSegment(
|
||||
new com.linkedin.assertion.IncrementingSegmentSpec()
|
||||
.setField(
|
||||
new SchemaFieldSpec()
|
||||
.setPath("path")
|
||||
.setNativeType("VARCHAR")
|
||||
.setType("STRING"))));
|
||||
|
||||
com.linkedin.datahub.graphql.generated.VolumeAssertionInfo result =
|
||||
VolumeAssertionMapper.mapVolumeAssertionInfo(null, volumeAssertionInfo);
|
||||
Assert.assertEquals(result.getEntityUrn(), "urn:li:dataset:(urn:li:dataPlatform:foo,bar,baz)");
|
||||
Assert.assertEquals(
|
||||
result.getType(),
|
||||
com.linkedin.datahub.graphql.generated.VolumeAssertionType
|
||||
.INCREMENTING_SEGMENT_ROW_COUNT_CHANGE);
|
||||
Assert.assertEquals(
|
||||
result.getIncrementingSegmentRowCountChange().getType(),
|
||||
com.linkedin.datahub.graphql.generated.AssertionValueChangeType.ABSOLUTE);
|
||||
Assert.assertEquals(
|
||||
result.getIncrementingSegmentRowCountChange().getOperator(),
|
||||
com.linkedin.datahub.graphql.generated.AssertionStdOperator.GREATER_THAN_OR_EQUAL_TO);
|
||||
Assert.assertEquals(
|
||||
result.getIncrementingSegmentRowCountChange().getParameters().getValue().getType(),
|
||||
com.linkedin.datahub.graphql.generated.AssertionStdParameterType.NUMBER);
|
||||
Assert.assertEquals(
|
||||
result.getIncrementingSegmentRowCountChange().getParameters().getValue().getValue(), "10");
|
||||
Assert.assertEquals(
|
||||
result.getIncrementingSegmentRowCountChange().getSegment().getField().getPath(), "path");
|
||||
Assert.assertEquals(
|
||||
result.getIncrementingSegmentRowCountChange().getSegment().getField().getNativeType(),
|
||||
"VARCHAR");
|
||||
Assert.assertEquals(
|
||||
result.getIncrementingSegmentRowCountChange().getSegment().getField().getType(), "STRING");
|
||||
}
|
||||
}
|
||||
@ -9,6 +9,7 @@ cat ../../datahub-graphql-core/src/main/resources/app.graphql >> combined.graphq
|
||||
cat ../../datahub-graphql-core/src/main/resources/auth.graphql >> combined.graphql
|
||||
cat ../../datahub-graphql-core/src/main/resources/constraints.graphql >> combined.graphql
|
||||
cat ../../datahub-graphql-core/src/main/resources/entity.graphql >> combined.graphql
|
||||
cat ../../datahub-graphql-core/src/main/resources/assertions.graphql >> combined.graphql
|
||||
cat ../../datahub-graphql-core/src/main/resources/ingestion.graphql >> combined.graphql
|
||||
cat ../../datahub-graphql-core/src/main/resources/recommendation.graphql >> combined.graphql
|
||||
cat ../../datahub-graphql-core/src/main/resources/search.graphql >> combined.graphql
|
||||
|
||||
@ -285,6 +285,7 @@ public class Constants {
|
||||
public static final String ASSERTION_INFO_ASPECT_NAME = "assertionInfo";
|
||||
public static final String ASSERTION_RUN_EVENT_ASPECT_NAME = "assertionRunEvent";
|
||||
public static final String ASSERTION_RUN_EVENT_STATUS_COMPLETE = "COMPLETE";
|
||||
public static final String ASSERTION_ACTIONS_ASPECT_NAME = "assertionActions";
|
||||
|
||||
// Tests
|
||||
public static final String TEST_ENTITY_NAME = "test";
|
||||
|
||||
@ -0,0 +1,76 @@
|
||||
version: 1
|
||||
namespace: test-config-id-1
|
||||
assertions:
|
||||
# Freshness Assertion
|
||||
- entity: urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.public.test_assertions_all_times,PROD)
|
||||
type: freshness
|
||||
lookback_interval: "1 hour"
|
||||
last_modified_field: col_timestamp
|
||||
schedule:
|
||||
type: cron
|
||||
cron: 0 * * * *
|
||||
meta:
|
||||
entity_qualified_name: TEST_DB.PUBLIC.TEST_ASSERTIONS_ALL_TIMES
|
||||
entity_schema:
|
||||
- col: col_date
|
||||
native_type: DATE
|
||||
# Volume Assertion
|
||||
- type: volume
|
||||
entity: urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.public.test_assertions_all_times,PROD)
|
||||
metric: row_count
|
||||
condition:
|
||||
type: less_than_or_equal_to
|
||||
value: 1000
|
||||
schedule:
|
||||
type: cron
|
||||
cron: 0 * * * *
|
||||
meta:
|
||||
entity_qualified_name: TEST_DB.PUBLIC.TEST_ASSERTIONS_ALL_TIMES
|
||||
entity_schema:
|
||||
- col: col_date
|
||||
native_type: DATE
|
||||
# Field Metric Assertion
|
||||
- type: field
|
||||
entity: urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.public.test_assertions_all_times,PROD)
|
||||
field: col_date
|
||||
metric: null_count
|
||||
condition:
|
||||
type: equal_to
|
||||
value: 0
|
||||
schedule:
|
||||
type: cron
|
||||
cron: 0 * * * *
|
||||
meta:
|
||||
entity_qualified_name: TEST_DB.PUBLIC.TEST_ASSERTIONS_ALL_TIMES
|
||||
entity_schema:
|
||||
- col: col_date
|
||||
native_type: DATE
|
||||
# Field Value Assertion
|
||||
- type: field
|
||||
entity: urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.public.purchase_event,PROD)
|
||||
field: quantity
|
||||
condition:
|
||||
type: between
|
||||
min: 0
|
||||
max: 10
|
||||
schedule:
|
||||
type: on_table_change
|
||||
meta:
|
||||
entity_qualified_name: TEST_DB.PUBLIC.PURCHASE_EVENT
|
||||
entity_schema:
|
||||
- col: quantity
|
||||
native_type: FLOAT
|
||||
# Custom SQL Metric Assertion
|
||||
- type: sql
|
||||
entity: urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.public.purchase_event,PROD)
|
||||
statement: select mode(quantity) from test_db.public.purchase_event
|
||||
condition:
|
||||
type: equal_to
|
||||
value: 5
|
||||
schedule:
|
||||
type: on_table_change
|
||||
meta:
|
||||
entity_qualified_name: TEST_DB.PUBLIC.PURCHASE_EVENT
|
||||
entity_schema:
|
||||
- col: quantity
|
||||
native_type: FLOAT
|
||||
@ -0,0 +1,57 @@
|
||||
from abc import abstractmethod
|
||||
from typing import Optional
|
||||
|
||||
from datahub.api.entities.assertion.assertion_trigger import AssertionTrigger
|
||||
from datahub.configuration.pydantic_migration_helpers import v1_ConfigModel, v1_Field
|
||||
from datahub.metadata.com.linkedin.pegasus2avro.assertion import AssertionInfo
|
||||
|
||||
|
||||
class BaseAssertionProtocol(v1_ConfigModel):
|
||||
@abstractmethod
|
||||
def get_id(self) -> str:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_assertion_info_aspect(
|
||||
self,
|
||||
) -> AssertionInfo:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_assertion_trigger(
|
||||
self,
|
||||
) -> Optional[AssertionTrigger]:
|
||||
pass
|
||||
|
||||
|
||||
class BaseAssertion(v1_ConfigModel):
|
||||
id_raw: Optional[str] = v1_Field(
|
||||
default=None,
|
||||
description="The raw id of the assertion."
|
||||
"If provided, this is used when creating identifier for this assertion"
|
||||
"along with assertion type and entity.",
|
||||
)
|
||||
|
||||
id: Optional[str] = v1_Field(
|
||||
default=None,
|
||||
description="The id of the assertion."
|
||||
"If provided, this is used as identifier for this assertion."
|
||||
"If provided, no other assertion fields are considered to create identifier.",
|
||||
)
|
||||
|
||||
description: Optional[str] = None
|
||||
|
||||
# Can contain metadata extracted from datahub. e.g.
|
||||
# - entity qualified name
|
||||
# - entity schema
|
||||
meta: Optional[dict] = None
|
||||
|
||||
|
||||
class BaseEntityAssertion(BaseAssertion):
|
||||
entity: str = v1_Field(
|
||||
description="The entity urn that the assertion is associated with"
|
||||
)
|
||||
|
||||
trigger: Optional[AssertionTrigger] = v1_Field(
|
||||
description="The trigger schedule for assertion", alias="schedule"
|
||||
)
|
||||
@ -0,0 +1,41 @@
|
||||
from typing import List, Optional
|
||||
|
||||
from ruamel.yaml import YAML
|
||||
from typing_extensions import Literal
|
||||
|
||||
from datahub.api.entities.assertion.datahub_assertion import DataHubAssertion
|
||||
from datahub.configuration.pydantic_migration_helpers import v1_ConfigModel, v1_Field
|
||||
|
||||
|
||||
class AssertionsConfigSpec(v1_ConfigModel):
|
||||
"""
|
||||
Declarative configuration specification for datahub assertions.
|
||||
|
||||
This model is used as a simpler, Python-native representation to define assertions.
|
||||
It can be easily parsed from a equivalent YAML file.
|
||||
|
||||
Currently, this is converted into series of assertion MCPs that can be emitted to DataHub.
|
||||
In future, this would invoke datahub GraphQL API to upsert assertions.
|
||||
"""
|
||||
|
||||
version: Literal[1]
|
||||
|
||||
id: Optional[str] = v1_Field(
|
||||
default=None,
|
||||
alias="namespace",
|
||||
description="Unique identifier of assertions configuration file",
|
||||
)
|
||||
|
||||
assertions: List[DataHubAssertion]
|
||||
|
||||
@classmethod
|
||||
def from_yaml(
|
||||
cls,
|
||||
file: str,
|
||||
) -> "AssertionsConfigSpec":
|
||||
with open(file) as fp:
|
||||
yaml = YAML(typ="rt") # default, if not specfied, is 'rt' (round-trip)
|
||||
orig_dictionary = yaml.load(fp)
|
||||
parsed_spec = AssertionsConfigSpec.parse_obj(orig_dictionary)
|
||||
# parsed_spec._original_yaml_dict = orig_dictionary
|
||||
return parsed_spec
|
||||
@ -0,0 +1,304 @@
|
||||
import json
|
||||
from typing import List, Optional, Union
|
||||
|
||||
from typing_extensions import Literal, Protocol
|
||||
|
||||
from datahub.configuration.pydantic_migration_helpers import v1_ConfigModel
|
||||
from datahub.metadata.schema_classes import (
|
||||
AssertionStdOperatorClass,
|
||||
AssertionStdParameterClass,
|
||||
AssertionStdParametersClass,
|
||||
AssertionStdParameterTypeClass,
|
||||
)
|
||||
|
||||
|
||||
class Operator(Protocol):
|
||||
"""Specification for an assertion operator.
|
||||
|
||||
This class exists only for documentation (not used in typing checking).
|
||||
"""
|
||||
|
||||
operator: str
|
||||
|
||||
def id(self) -> str:
|
||||
...
|
||||
|
||||
def generate_parameters(self) -> AssertionStdParametersClass:
|
||||
...
|
||||
|
||||
|
||||
def _generate_assertion_std_parameter(
|
||||
value: Union[str, int, float, list]
|
||||
) -> AssertionStdParameterClass:
|
||||
if isinstance(value, str):
|
||||
return AssertionStdParameterClass(
|
||||
value=value, type=AssertionStdParameterTypeClass.STRING
|
||||
)
|
||||
elif isinstance(value, (int, float)):
|
||||
return AssertionStdParameterClass(
|
||||
value=str(value), type=AssertionStdParameterTypeClass.NUMBER
|
||||
)
|
||||
elif isinstance(value, list):
|
||||
return AssertionStdParameterClass(
|
||||
value=json.dumps(value), type=AssertionStdParameterTypeClass.LIST
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Unsupported assertion parameter {value} of type {type(value)}"
|
||||
)
|
||||
|
||||
|
||||
Param = Union[str, int, float, List[Union[str, float, int]]]
|
||||
|
||||
|
||||
def _generate_assertion_std_parameters(
|
||||
value: Optional[Param] = None,
|
||||
min_value: Optional[Param] = None,
|
||||
max_value: Optional[Param] = None,
|
||||
) -> AssertionStdParametersClass:
|
||||
return AssertionStdParametersClass(
|
||||
value=_generate_assertion_std_parameter(value) if value else None,
|
||||
minValue=_generate_assertion_std_parameter(min_value) if min_value else None,
|
||||
maxValue=_generate_assertion_std_parameter(max_value) if max_value else None,
|
||||
)
|
||||
|
||||
|
||||
class EqualToOperator(v1_ConfigModel):
|
||||
type: Literal["equal_to"]
|
||||
value: Union[str, int, float]
|
||||
|
||||
operator: str = AssertionStdOperatorClass.EQUAL_TO
|
||||
|
||||
def id(self) -> str:
|
||||
return f"{self.type}-{self.value}"
|
||||
|
||||
def generate_parameters(self) -> AssertionStdParametersClass:
|
||||
return _generate_assertion_std_parameters(value=self.value)
|
||||
|
||||
|
||||
class NotEqualToOperator(v1_ConfigModel):
|
||||
type: Literal["not_equal_to"]
|
||||
value: Union[str, int, float]
|
||||
|
||||
operator: str = AssertionStdOperatorClass.NOT_EQUAL_TO
|
||||
|
||||
def id(self) -> str:
|
||||
return f"{self.type}-{self.value}"
|
||||
|
||||
def generate_parameters(self) -> AssertionStdParametersClass:
|
||||
return _generate_assertion_std_parameters(value=self.value)
|
||||
|
||||
|
||||
class BetweenOperator(v1_ConfigModel):
|
||||
type: Literal["between"]
|
||||
min: Union[int, float]
|
||||
max: Union[int, float]
|
||||
|
||||
operator: str = AssertionStdOperatorClass.BETWEEN
|
||||
|
||||
def id(self) -> str:
|
||||
return f"{self.type}-{self.min}-{self.max}"
|
||||
|
||||
def generate_parameters(self) -> AssertionStdParametersClass:
|
||||
return _generate_assertion_std_parameters(
|
||||
min_value=self.min, max_value=self.max
|
||||
)
|
||||
|
||||
|
||||
class LessThanOperator(v1_ConfigModel):
|
||||
type: Literal["less_than"]
|
||||
value: Union[int, float]
|
||||
|
||||
operator: str = AssertionStdOperatorClass.LESS_THAN
|
||||
|
||||
def id(self) -> str:
|
||||
return f"{self.type}-{self.value}"
|
||||
|
||||
def generate_parameters(self) -> AssertionStdParametersClass:
|
||||
return _generate_assertion_std_parameters(value=self.value)
|
||||
|
||||
|
||||
class GreaterThanOperator(v1_ConfigModel):
|
||||
type: Literal["greater_than"]
|
||||
value: Union[int, float]
|
||||
|
||||
operator: str = AssertionStdOperatorClass.GREATER_THAN
|
||||
|
||||
def id(self) -> str:
|
||||
return f"{self.type}-{self.value}"
|
||||
|
||||
def generate_parameters(self) -> AssertionStdParametersClass:
|
||||
return _generate_assertion_std_parameters(value=self.value)
|
||||
|
||||
|
||||
class LessThanOrEqualToOperator(v1_ConfigModel):
|
||||
type: Literal["less_than_or_equal_to"]
|
||||
value: Union[int, float]
|
||||
|
||||
operator: str = AssertionStdOperatorClass.LESS_THAN_OR_EQUAL_TO
|
||||
|
||||
def id(self) -> str:
|
||||
return f"{self.type}-{self.value}"
|
||||
|
||||
def generate_parameters(self) -> AssertionStdParametersClass:
|
||||
return _generate_assertion_std_parameters(value=self.value)
|
||||
|
||||
|
||||
class GreaterThanOrEqualToOperator(v1_ConfigModel):
|
||||
type: Literal["greater_than_or_equal_to"]
|
||||
value: Union[int, float]
|
||||
|
||||
operator: str = AssertionStdOperatorClass.GREATER_THAN_OR_EQUAL_TO
|
||||
|
||||
def id(self) -> str:
|
||||
return f"{self.type}-{self.value}"
|
||||
|
||||
def generate_parameters(self) -> AssertionStdParametersClass:
|
||||
return _generate_assertion_std_parameters(value=self.value)
|
||||
|
||||
|
||||
class InOperator(v1_ConfigModel):
|
||||
type: Literal["in"]
|
||||
value: List[Union[str, float, int]]
|
||||
|
||||
operator: str = AssertionStdOperatorClass.IN
|
||||
|
||||
def id(self) -> str:
|
||||
return f"{self.type}-{self.value}"
|
||||
|
||||
def generate_parameters(self) -> AssertionStdParametersClass:
|
||||
return _generate_assertion_std_parameters(value=self.value)
|
||||
|
||||
|
||||
class NotInOperator(v1_ConfigModel):
|
||||
type: Literal["not_in"]
|
||||
value: List[Union[str, float, int]]
|
||||
|
||||
operator: str = AssertionStdOperatorClass.NOT_IN
|
||||
|
||||
def id(self) -> str:
|
||||
return f"{self.type}-{self.value}"
|
||||
|
||||
def generate_parameters(self) -> AssertionStdParametersClass:
|
||||
return _generate_assertion_std_parameters(value=self.value)
|
||||
|
||||
|
||||
class IsNullOperator(v1_ConfigModel):
|
||||
type: Literal["is_null"]
|
||||
|
||||
operator: str = AssertionStdOperatorClass.NULL
|
||||
|
||||
def id(self) -> str:
|
||||
return f"{self.type}"
|
||||
|
||||
def generate_parameters(self) -> AssertionStdParametersClass:
|
||||
return _generate_assertion_std_parameters()
|
||||
|
||||
|
||||
class NotNullOperator(v1_ConfigModel):
|
||||
type: Literal["is_not_null"]
|
||||
|
||||
operator: str = AssertionStdOperatorClass.NOT_NULL
|
||||
|
||||
def id(self) -> str:
|
||||
return f"{self.type}"
|
||||
|
||||
def generate_parameters(self) -> AssertionStdParametersClass:
|
||||
return _generate_assertion_std_parameters()
|
||||
|
||||
|
||||
class IsTrueOperator(v1_ConfigModel):
|
||||
type: Literal["is_true"]
|
||||
|
||||
operator: str = AssertionStdOperatorClass.IS_TRUE
|
||||
|
||||
def id(self) -> str:
|
||||
return f"{self.type}"
|
||||
|
||||
def generate_parameters(self) -> AssertionStdParametersClass:
|
||||
return _generate_assertion_std_parameters()
|
||||
|
||||
|
||||
class IsFalseOperator(v1_ConfigModel):
|
||||
type: Literal["is_false"]
|
||||
|
||||
operator: str = AssertionStdOperatorClass.IS_FALSE
|
||||
|
||||
def id(self) -> str:
|
||||
return f"{self.type}"
|
||||
|
||||
def generate_parameters(self) -> AssertionStdParametersClass:
|
||||
return _generate_assertion_std_parameters()
|
||||
|
||||
|
||||
class ContainsOperator(v1_ConfigModel):
|
||||
type: Literal["contains"]
|
||||
value: str
|
||||
|
||||
operator: str = AssertionStdOperatorClass.CONTAIN
|
||||
|
||||
def id(self) -> str:
|
||||
return f"{self.type}-{self.value}"
|
||||
|
||||
def generate_parameters(self) -> AssertionStdParametersClass:
|
||||
return _generate_assertion_std_parameters(value=self.value)
|
||||
|
||||
|
||||
class EndsWithOperator(v1_ConfigModel):
|
||||
type: Literal["ends_with"]
|
||||
value: str
|
||||
|
||||
operator: str = AssertionStdOperatorClass.END_WITH
|
||||
|
||||
def id(self) -> str:
|
||||
return f"{self.type}-{self.value}"
|
||||
|
||||
def generate_parameters(self) -> AssertionStdParametersClass:
|
||||
return _generate_assertion_std_parameters(value=self.value)
|
||||
|
||||
|
||||
class StartsWithOperator(v1_ConfigModel):
|
||||
type: Literal["starts_with"]
|
||||
value: str
|
||||
|
||||
operator: str = AssertionStdOperatorClass.START_WITH
|
||||
|
||||
def id(self) -> str:
|
||||
return f"{self.type}-{self.value}"
|
||||
|
||||
def generate_parameters(self) -> AssertionStdParametersClass:
|
||||
return _generate_assertion_std_parameters(value=self.value)
|
||||
|
||||
|
||||
class MatchesRegexOperator(v1_ConfigModel):
|
||||
type: Literal["matches_regex"]
|
||||
value: str
|
||||
|
||||
operator: str = AssertionStdOperatorClass.REGEX_MATCH
|
||||
|
||||
def id(self) -> str:
|
||||
return f"{self.type}-{self.value}"
|
||||
|
||||
def generate_parameters(self) -> AssertionStdParametersClass:
|
||||
return _generate_assertion_std_parameters(value=self.value)
|
||||
|
||||
|
||||
Operators = Union[
|
||||
InOperator,
|
||||
NotInOperator,
|
||||
EqualToOperator,
|
||||
NotEqualToOperator,
|
||||
BetweenOperator,
|
||||
LessThanOperator,
|
||||
LessThanOrEqualToOperator,
|
||||
GreaterThanOperator,
|
||||
GreaterThanOrEqualToOperator,
|
||||
IsNullOperator,
|
||||
NotNullOperator,
|
||||
IsTrueOperator,
|
||||
IsFalseOperator,
|
||||
ContainsOperator,
|
||||
EndsWithOperator,
|
||||
StartsWithOperator,
|
||||
MatchesRegexOperator,
|
||||
]
|
||||
@ -0,0 +1,52 @@
|
||||
from datetime import timedelta
|
||||
from typing import Union
|
||||
|
||||
import humanfriendly
|
||||
from typing_extensions import Literal
|
||||
|
||||
from datahub.configuration.pydantic_migration_helpers import (
|
||||
v1_ConfigModel,
|
||||
v1_Field,
|
||||
v1_validator,
|
||||
)
|
||||
|
||||
|
||||
class CronTrigger(v1_ConfigModel):
|
||||
type: Literal["cron"]
|
||||
cron: str = v1_Field(
|
||||
description="The cron expression to use. See https://crontab.guru/ for help."
|
||||
)
|
||||
timezone: str = v1_Field(
|
||||
"UTC",
|
||||
description="The timezone to use for the cron schedule. Defaults to UTC.",
|
||||
)
|
||||
|
||||
|
||||
class IntervalTrigger(v1_ConfigModel):
|
||||
type: Literal["interval"]
|
||||
interval: timedelta
|
||||
|
||||
@v1_validator("interval", pre=True)
|
||||
def lookback_interval_to_timedelta(cls, v):
|
||||
if isinstance(v, str):
|
||||
seconds = humanfriendly.parse_timespan(v)
|
||||
return timedelta(seconds=seconds)
|
||||
raise ValueError("Invalid value.")
|
||||
|
||||
|
||||
class EntityChangeTrigger(v1_ConfigModel):
|
||||
type: Literal["on_table_change"]
|
||||
|
||||
|
||||
class ManualTrigger(v1_ConfigModel):
|
||||
type: Literal["manual"]
|
||||
|
||||
|
||||
class AssertionTrigger(v1_ConfigModel):
|
||||
__root__: Union[
|
||||
CronTrigger, IntervalTrigger, EntityChangeTrigger, ManualTrigger
|
||||
] = v1_Field(discriminator="type")
|
||||
|
||||
@property
|
||||
def trigger(self):
|
||||
return self.__root__
|
||||
@ -0,0 +1,81 @@
|
||||
from abc import abstractmethod
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Literal
|
||||
|
||||
from datahub.api.entities.assertion.assertion_config_spec import AssertionsConfigSpec
|
||||
from datahub.ingestion.api.report import Report
|
||||
from datahub.utilities.lossy_collections import LossyDict, LossyList
|
||||
|
||||
|
||||
class StrEnum(str, Enum):
|
||||
pass
|
||||
|
||||
|
||||
class CompileResultArtifactType(StrEnum):
|
||||
SQL_QUERIES = "SQL_QUERIES"
|
||||
COMPILE_REPORT = "COMPILE_REPORT"
|
||||
|
||||
|
||||
@dataclass
|
||||
class CompileResultArtifact(Report):
|
||||
name: str
|
||||
type: CompileResultArtifactType
|
||||
path: Path
|
||||
description: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class AssertionCompilationReport(Report):
|
||||
"""Additional details to debug compilation"""
|
||||
|
||||
num_processed: int = 0
|
||||
num_compile_succeeded: int = 0
|
||||
num_compile_failed: int = 0 # Likely due to assertion not supported in platform
|
||||
|
||||
warnings: LossyDict[str, LossyList[str]] = field(default_factory=LossyDict)
|
||||
failures: LossyDict[str, LossyList[str]] = field(default_factory=LossyDict)
|
||||
|
||||
artifacts: List[Path] = field(default_factory=list)
|
||||
|
||||
def report_warning(self, key: str, reason: str) -> None:
|
||||
warnings = self.warnings.get(key, LossyList())
|
||||
warnings.append(reason)
|
||||
self.warnings[key] = warnings
|
||||
|
||||
def report_failure(self, key: str, reason: str) -> None:
|
||||
failures = self.failures.get(key, LossyList())
|
||||
failures.append(reason)
|
||||
self.failures[key] = failures
|
||||
|
||||
|
||||
@dataclass
|
||||
class AssertionCompilationResult:
|
||||
"""Results of compilation step , along with detailed report object"""
|
||||
|
||||
platform: str
|
||||
status: Literal["success", "failure"]
|
||||
|
||||
report: AssertionCompilationReport = field(
|
||||
default_factory=AssertionCompilationReport
|
||||
)
|
||||
|
||||
artifacts: List[CompileResultArtifact] = field(default_factory=list)
|
||||
|
||||
def add_artifact(self, artifact: CompileResultArtifact) -> None:
|
||||
self.artifacts.append(artifact)
|
||||
self.report.artifacts.append(artifact.path)
|
||||
|
||||
|
||||
class AssertionCompiler:
|
||||
@classmethod
|
||||
@abstractmethod
|
||||
def create(cls, output_dir: str, extras: Dict[str, str]) -> "AssertionCompiler":
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def compile(
|
||||
self, assertion_config_spec: AssertionsConfigSpec
|
||||
) -> AssertionCompilationResult:
|
||||
pass
|
||||
@ -0,0 +1,35 @@
|
||||
from typing import Optional, Union
|
||||
|
||||
from datahub.api.entities.assertion.assertion import BaseAssertionProtocol
|
||||
from datahub.api.entities.assertion.assertion_trigger import AssertionTrigger
|
||||
from datahub.api.entities.assertion.field_assertion import FieldAssertion
|
||||
from datahub.api.entities.assertion.freshness_assertion import FreshnessAssertion
|
||||
from datahub.api.entities.assertion.sql_assertion import SQLAssertion
|
||||
from datahub.api.entities.assertion.volume_assertion import VolumeAssertion
|
||||
from datahub.configuration.pydantic_migration_helpers import v1_Field
|
||||
from datahub.metadata.com.linkedin.pegasus2avro.assertion import AssertionInfo
|
||||
|
||||
|
||||
class DataHubAssertion(BaseAssertionProtocol):
|
||||
__root__: Union[
|
||||
FreshnessAssertion,
|
||||
VolumeAssertion,
|
||||
SQLAssertion,
|
||||
FieldAssertion,
|
||||
# TODO: Add SchemaAssertion
|
||||
] = v1_Field(discriminator="type")
|
||||
|
||||
@property
|
||||
def assertion(self):
|
||||
return self.__root__.assertion
|
||||
|
||||
def get_assertion_info_aspect(
|
||||
self,
|
||||
) -> AssertionInfo:
|
||||
return self.__root__.get_assertion_info_aspect()
|
||||
|
||||
def get_id(self) -> str:
|
||||
return self.__root__.get_id()
|
||||
|
||||
def get_assertion_trigger(self) -> Optional[AssertionTrigger]:
|
||||
return self.__root__.get_assertion_trigger()
|
||||
@ -0,0 +1,158 @@
|
||||
from enum import Enum
|
||||
from typing import Optional, Union
|
||||
|
||||
from typing_extensions import Literal
|
||||
|
||||
from datahub.api.entities.assertion.assertion import (
|
||||
BaseAssertionProtocol,
|
||||
BaseEntityAssertion,
|
||||
)
|
||||
from datahub.api.entities.assertion.assertion_operator import Operators
|
||||
from datahub.api.entities.assertion.assertion_trigger import AssertionTrigger
|
||||
from datahub.api.entities.assertion.field_metric import FieldMetric
|
||||
from datahub.api.entities.assertion.filter import DatasetFilter
|
||||
from datahub.configuration.pydantic_migration_helpers import v1_ConfigModel, v1_Field
|
||||
from datahub.emitter.mce_builder import datahub_guid
|
||||
from datahub.metadata.com.linkedin.pegasus2avro.assertion import (
|
||||
AssertionInfo,
|
||||
AssertionType,
|
||||
FieldAssertionInfo,
|
||||
FieldAssertionType,
|
||||
)
|
||||
from datahub.metadata.com.linkedin.pegasus2avro.schema import SchemaFieldSpec
|
||||
from datahub.metadata.schema_classes import (
|
||||
FieldMetricAssertionClass,
|
||||
FieldTransformClass,
|
||||
FieldTransformTypeClass,
|
||||
FieldValuesAssertionClass,
|
||||
FieldValuesFailThresholdClass,
|
||||
FieldValuesFailThresholdTypeClass,
|
||||
)
|
||||
|
||||
|
||||
class FieldValuesFailThreshold(v1_ConfigModel):
|
||||
type: Literal["count", "percentage"] = v1_Field(default="count")
|
||||
value: int = v1_Field(default=0)
|
||||
|
||||
def to_field_values_failure_threshold(self) -> FieldValuesFailThresholdClass:
|
||||
return FieldValuesFailThresholdClass(
|
||||
type=(
|
||||
FieldValuesFailThresholdTypeClass.COUNT
|
||||
if self.type == Literal["count"]
|
||||
else FieldValuesFailThresholdTypeClass.PERCENTAGE
|
||||
),
|
||||
value=self.value,
|
||||
)
|
||||
|
||||
|
||||
class FieldTransform(Enum):
|
||||
LENGTH = "length"
|
||||
|
||||
|
||||
class FieldValuesAssertion(BaseEntityAssertion):
|
||||
type: Literal["field"]
|
||||
field: str
|
||||
field_transform: Optional[FieldTransform] = v1_Field(default=None)
|
||||
operator: Operators = v1_Field(discriminator="type", alias="condition")
|
||||
filters: Optional[DatasetFilter] = v1_Field(default=None)
|
||||
failure_threshold: FieldValuesFailThreshold = v1_Field(
|
||||
default=FieldValuesFailThreshold()
|
||||
)
|
||||
exclude_nulls: bool = v1_Field(default=True)
|
||||
|
||||
def get_assertion_info(
|
||||
self,
|
||||
) -> AssertionInfo:
|
||||
return AssertionInfo(
|
||||
description=self.description,
|
||||
type=AssertionType.FIELD,
|
||||
fieldAssertion=FieldAssertionInfo(
|
||||
type=FieldAssertionType.FIELD_VALUES,
|
||||
entity=self.entity,
|
||||
fieldValuesAssertion=FieldValuesAssertionClass(
|
||||
field=SchemaFieldSpec(
|
||||
path=self.field,
|
||||
type="", # Not required
|
||||
nativeType="", # Not required
|
||||
),
|
||||
operator=self.operator.operator,
|
||||
parameters=self.operator.generate_parameters(),
|
||||
failThreshold=self.failure_threshold.to_field_values_failure_threshold(),
|
||||
excludeNulls=self.exclude_nulls,
|
||||
transform=(
|
||||
FieldTransformClass(type=FieldTransformTypeClass.LENGTH)
|
||||
if self.field_transform == Literal["length"]
|
||||
else None
|
||||
),
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
def get_id(self) -> str:
|
||||
guid_dict = {
|
||||
"entity": self.entity,
|
||||
"type": self.type,
|
||||
"field": self.field,
|
||||
"operator": str(self.operator.operator),
|
||||
"id_raw": self.id_raw,
|
||||
}
|
||||
return self.id or datahub_guid(guid_dict)
|
||||
|
||||
|
||||
class FieldMetricAssertion(BaseEntityAssertion):
|
||||
type: Literal["field"]
|
||||
field: str
|
||||
operator: Operators = v1_Field(discriminator="type", alias="condition")
|
||||
metric: FieldMetric
|
||||
filters: Optional[DatasetFilter] = v1_Field(default=None)
|
||||
|
||||
def get_assertion_info(
|
||||
self,
|
||||
) -> AssertionInfo:
|
||||
return AssertionInfo(
|
||||
description=self.description,
|
||||
type=AssertionType.FIELD,
|
||||
fieldAssertion=FieldAssertionInfo(
|
||||
type=FieldAssertionType.FIELD_METRIC,
|
||||
entity=self.entity,
|
||||
fieldMetricAssertion=FieldMetricAssertionClass(
|
||||
field=SchemaFieldSpec(
|
||||
path=self.field,
|
||||
type="", # Not required
|
||||
nativeType="", # Not required
|
||||
),
|
||||
metric=self.metric.name,
|
||||
operator=self.operator.operator,
|
||||
parameters=self.operator.generate_parameters(),
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
def get_id(self) -> str:
|
||||
guid_dict = {
|
||||
"entity": self.entity,
|
||||
"type": self.type,
|
||||
"field": self.field,
|
||||
"metric": self.metric.value,
|
||||
"id_raw": self.id_raw,
|
||||
}
|
||||
return self.id or datahub_guid(guid_dict)
|
||||
|
||||
|
||||
class FieldAssertion(BaseAssertionProtocol):
|
||||
__root__: Union[FieldMetricAssertion, FieldValuesAssertion]
|
||||
|
||||
@property
|
||||
def assertion(self):
|
||||
return self.__root__
|
||||
|
||||
def get_id(self) -> str:
|
||||
return self.__root__.get_id()
|
||||
|
||||
def get_assertion_info_aspect(
|
||||
self,
|
||||
) -> AssertionInfo:
|
||||
return self.__root__.get_assertion_info()
|
||||
|
||||
def get_assertion_trigger(self) -> Optional[AssertionTrigger]:
|
||||
return self.__root__.trigger
|
||||
@ -0,0 +1,21 @@
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class FieldMetric(Enum):
|
||||
UNIQUE_COUNT = "unique_count"
|
||||
UNIQUE_PERCENTAGE = "unique_percentage"
|
||||
NULL_COUNT = "null_count"
|
||||
NULL_PERCENTAGE = "null_percentage"
|
||||
MIN = "min"
|
||||
MAX = "max"
|
||||
MEAN = "mean"
|
||||
MEDIAN = "median"
|
||||
STDDEV = "stddev"
|
||||
NEGATIVE_COUNT = "negative_count"
|
||||
NEGATIVE_PERCENTAGE = "negative_percentage"
|
||||
ZERO_COUNT = "zero_count"
|
||||
ZERO_PERCENTAGE = "zero_percentage"
|
||||
MIN_LENGTH = "min_length"
|
||||
MAX_LENGTH = "max_length"
|
||||
EMPTY_COUNT = "empty_count"
|
||||
EMPTY_PERCENTAGE = "empty_percentage"
|
||||
@ -0,0 +1,13 @@
|
||||
from typing_extensions import Literal
|
||||
|
||||
from datahub.configuration.pydantic_migration_helpers import v1_ConfigModel
|
||||
|
||||
|
||||
class SqlFilter(v1_ConfigModel):
|
||||
type: Literal["sql"]
|
||||
sql: str
|
||||
|
||||
|
||||
DatasetFilter = SqlFilter
|
||||
# class DatasetFilter(v1_ConfigModel):
|
||||
# __root__: Union[SqlFilter] = v1_Field(discriminator="type")
|
||||
@ -0,0 +1,124 @@
|
||||
from datetime import timedelta
|
||||
from enum import Enum
|
||||
from typing import Optional, Union
|
||||
|
||||
import humanfriendly
|
||||
from typing_extensions import Literal
|
||||
|
||||
from datahub.api.entities.assertion.assertion import (
|
||||
BaseAssertionProtocol,
|
||||
BaseEntityAssertion,
|
||||
)
|
||||
from datahub.api.entities.assertion.assertion_trigger import AssertionTrigger
|
||||
from datahub.api.entities.assertion.filter import DatasetFilter
|
||||
from datahub.configuration.pydantic_migration_helpers import v1_Field, v1_validator
|
||||
from datahub.emitter.mce_builder import datahub_guid
|
||||
from datahub.metadata.com.linkedin.pegasus2avro.assertion import (
|
||||
AssertionInfo,
|
||||
AssertionType,
|
||||
FixedIntervalSchedule,
|
||||
FreshnessAssertionInfo,
|
||||
FreshnessAssertionSchedule,
|
||||
FreshnessAssertionScheduleType,
|
||||
FreshnessAssertionType,
|
||||
FreshnessCronSchedule,
|
||||
)
|
||||
from datahub.metadata.com.linkedin.pegasus2avro.timeseries import CalendarInterval
|
||||
|
||||
|
||||
class FreshnessSourceType(Enum):
|
||||
LAST_MODIFIED_COLUMN = "last_modified_column"
|
||||
|
||||
|
||||
class CronFreshnessAssertion(BaseEntityAssertion):
|
||||
type: Literal["freshness"]
|
||||
freshness_type: Literal["cron"]
|
||||
cron: str = v1_Field(
|
||||
description="The cron expression to use. See https://crontab.guru/ for help."
|
||||
)
|
||||
timezone: str = v1_Field(
|
||||
"UTC",
|
||||
description="The timezone to use for the cron schedule. Defaults to UTC.",
|
||||
)
|
||||
source_type: FreshnessSourceType = v1_Field(
|
||||
default=FreshnessSourceType.LAST_MODIFIED_COLUMN
|
||||
)
|
||||
last_modified_field: str
|
||||
filters: Optional[DatasetFilter] = v1_Field(default=None)
|
||||
|
||||
def get_assertion_info(
|
||||
self,
|
||||
) -> AssertionInfo:
|
||||
return AssertionInfo(
|
||||
description=self.description,
|
||||
type=AssertionType.FRESHNESS,
|
||||
freshnessAssertion=FreshnessAssertionInfo(
|
||||
type=FreshnessAssertionType.DATASET_CHANGE,
|
||||
entity=self.entity,
|
||||
schedule=FreshnessAssertionSchedule(
|
||||
type=FreshnessAssertionScheduleType.CRON,
|
||||
cron=FreshnessCronSchedule(cron=self.cron, timezone=self.timezone),
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class FixedIntervalFreshnessAssertion(BaseEntityAssertion):
|
||||
type: Literal["freshness"]
|
||||
freshness_type: Literal["interval"] = v1_Field(default="interval")
|
||||
lookback_interval: timedelta
|
||||
filters: Optional[DatasetFilter] = v1_Field(default=None)
|
||||
source_type: FreshnessSourceType = v1_Field(
|
||||
default=FreshnessSourceType.LAST_MODIFIED_COLUMN
|
||||
)
|
||||
last_modified_field: str
|
||||
|
||||
@v1_validator("lookback_interval", pre=True)
|
||||
def lookback_interval_to_timedelta(cls, v):
|
||||
if isinstance(v, str):
|
||||
seconds = humanfriendly.parse_timespan(v)
|
||||
return timedelta(seconds=seconds)
|
||||
raise ValueError("Invalid value.")
|
||||
|
||||
def get_assertion_info(
|
||||
self,
|
||||
) -> AssertionInfo:
|
||||
return AssertionInfo(
|
||||
description=self.description,
|
||||
type=AssertionType.FRESHNESS,
|
||||
freshnessAssertion=FreshnessAssertionInfo(
|
||||
type=FreshnessAssertionType.DATASET_CHANGE,
|
||||
entity=self.entity,
|
||||
schedule=FreshnessAssertionSchedule(
|
||||
type=FreshnessAssertionScheduleType.FIXED_INTERVAL,
|
||||
fixedInterval=FixedIntervalSchedule(
|
||||
unit=CalendarInterval.SECOND,
|
||||
multiple=self.lookback_interval.seconds,
|
||||
),
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class FreshnessAssertion(BaseAssertionProtocol):
|
||||
__root__: Union[FixedIntervalFreshnessAssertion, CronFreshnessAssertion]
|
||||
|
||||
@property
|
||||
def assertion(self):
|
||||
return self.__root__
|
||||
|
||||
def get_id(self) -> str:
|
||||
guid_dict = {
|
||||
"entity": self.__root__.entity,
|
||||
"type": self.__root__.type,
|
||||
"id_raw": self.__root__.id_raw,
|
||||
}
|
||||
return self.__root__.id or datahub_guid(guid_dict)
|
||||
|
||||
def get_assertion_info_aspect(
|
||||
self,
|
||||
) -> AssertionInfo:
|
||||
return self.__root__.get_assertion_info()
|
||||
|
||||
def get_assertion_trigger(self) -> Optional[AssertionTrigger]:
|
||||
return self.__root__.trigger
|
||||
@ -0,0 +1,91 @@
|
||||
from typing import Optional, Union
|
||||
|
||||
from typing_extensions import Literal
|
||||
|
||||
from datahub.api.entities.assertion.assertion import (
|
||||
BaseAssertionProtocol,
|
||||
BaseEntityAssertion,
|
||||
)
|
||||
from datahub.api.entities.assertion.assertion_operator import Operators
|
||||
from datahub.api.entities.assertion.assertion_trigger import AssertionTrigger
|
||||
from datahub.configuration.pydantic_migration_helpers import v1_Field
|
||||
from datahub.emitter.mce_builder import datahub_guid
|
||||
from datahub.metadata.com.linkedin.pegasus2avro.assertion import (
|
||||
AssertionInfo,
|
||||
AssertionType,
|
||||
AssertionValueChangeType,
|
||||
SqlAssertionInfo,
|
||||
SqlAssertionType,
|
||||
)
|
||||
|
||||
|
||||
class SqlMetricAssertion(BaseEntityAssertion):
|
||||
type: Literal["sql"]
|
||||
statement: str
|
||||
operator: Operators = v1_Field(discriminator="type", alias="condition")
|
||||
|
||||
def get_assertion_info(
|
||||
self,
|
||||
) -> AssertionInfo:
|
||||
return AssertionInfo(
|
||||
description=self.description,
|
||||
type=AssertionType.SQL,
|
||||
sqlAssertion=SqlAssertionInfo(
|
||||
type=SqlAssertionType.METRIC,
|
||||
entity=self.entity,
|
||||
statement=self.statement,
|
||||
operator=self.operator.operator,
|
||||
parameters=self.operator.generate_parameters(),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class SqlMetricChangeAssertion(BaseEntityAssertion):
|
||||
type: Literal["sql"]
|
||||
statement: str
|
||||
change_type: Literal["absolute", "percentage"]
|
||||
operator: Operators = v1_Field(discriminator="type", alias="condition")
|
||||
|
||||
def get_assertion_info(
|
||||
self,
|
||||
) -> AssertionInfo:
|
||||
return AssertionInfo(
|
||||
description=self.description,
|
||||
type=AssertionType.SQL,
|
||||
sqlAssertion=SqlAssertionInfo(
|
||||
type=SqlAssertionType.METRIC_CHANGE,
|
||||
entity=self.entity,
|
||||
statement=self.statement,
|
||||
changeType=(
|
||||
AssertionValueChangeType.ABSOLUTE
|
||||
if self.change_type == Literal["absolute"]
|
||||
else AssertionValueChangeType.PERCENTAGE
|
||||
),
|
||||
operator=self.operator.operator,
|
||||
parameters=self.operator.generate_parameters(),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class SQLAssertion(BaseAssertionProtocol):
|
||||
__root__: Union[SqlMetricAssertion, SqlMetricChangeAssertion] = v1_Field()
|
||||
|
||||
@property
|
||||
def assertion(self):
|
||||
return self.__root__
|
||||
|
||||
def get_id(self) -> str:
|
||||
guid_dict = {
|
||||
"entity": self.__root__.entity,
|
||||
"type": self.__root__.type,
|
||||
"id_raw": self.__root__.id_raw,
|
||||
}
|
||||
return self.__root__.id or datahub_guid(guid_dict)
|
||||
|
||||
def get_assertion_info_aspect(
|
||||
self,
|
||||
) -> AssertionInfo:
|
||||
return self.__root__.get_assertion_info()
|
||||
|
||||
def get_assertion_trigger(self) -> Optional[AssertionTrigger]:
|
||||
return self.__root__.trigger
|
||||
@ -0,0 +1,98 @@
|
||||
from typing import Optional, Union
|
||||
|
||||
from typing_extensions import Literal
|
||||
|
||||
from datahub.api.entities.assertion.assertion import (
|
||||
BaseAssertionProtocol,
|
||||
BaseEntityAssertion,
|
||||
)
|
||||
from datahub.api.entities.assertion.assertion_operator import Operators
|
||||
from datahub.api.entities.assertion.assertion_trigger import AssertionTrigger
|
||||
from datahub.api.entities.assertion.filter import DatasetFilter
|
||||
from datahub.configuration.pydantic_migration_helpers import v1_Field
|
||||
from datahub.emitter.mce_builder import datahub_guid
|
||||
from datahub.metadata.com.linkedin.pegasus2avro.assertion import (
|
||||
AssertionInfo,
|
||||
AssertionType,
|
||||
AssertionValueChangeType,
|
||||
RowCountChange,
|
||||
RowCountTotal,
|
||||
VolumeAssertionInfo,
|
||||
VolumeAssertionType,
|
||||
)
|
||||
|
||||
|
||||
class RowCountTotalVolumeAssertion(BaseEntityAssertion):
|
||||
type: Literal["volume"]
|
||||
metric: Literal["row_count"] = v1_Field(default="row_count")
|
||||
operator: Operators = v1_Field(discriminator="type", alias="condition")
|
||||
filters: Optional[DatasetFilter] = v1_Field(default=None)
|
||||
|
||||
def get_assertion_info(
|
||||
self,
|
||||
) -> AssertionInfo:
|
||||
return AssertionInfo(
|
||||
description=self.description,
|
||||
type=AssertionType.VOLUME,
|
||||
volumeAssertion=VolumeAssertionInfo(
|
||||
type=VolumeAssertionType.ROW_COUNT_TOTAL,
|
||||
entity=self.entity,
|
||||
rowCountTotal=RowCountTotal(
|
||||
operator=self.operator.operator,
|
||||
parameters=self.operator.generate_parameters(),
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class RowCountChangeVolumeAssertion(BaseEntityAssertion):
|
||||
type: Literal["volume"]
|
||||
metric: Literal["row_count"] = v1_Field(default="row_count")
|
||||
change_type: Literal["absolute", "percentage"]
|
||||
operator: Operators = v1_Field(discriminator="type", alias="condition")
|
||||
filters: Optional[DatasetFilter] = v1_Field(default=None)
|
||||
|
||||
def get_assertion_info(
|
||||
self,
|
||||
) -> AssertionInfo:
|
||||
return AssertionInfo(
|
||||
description=self.description,
|
||||
type=AssertionType.VOLUME,
|
||||
volumeAssertion=VolumeAssertionInfo(
|
||||
type=VolumeAssertionType.ROW_COUNT_CHANGE,
|
||||
entity=self.entity,
|
||||
rowCountChange=RowCountChange(
|
||||
type=(
|
||||
AssertionValueChangeType.ABSOLUTE
|
||||
if self.change_type == Literal["absolute"]
|
||||
else AssertionValueChangeType.PERCENTAGE
|
||||
),
|
||||
operator=self.operator.operator,
|
||||
parameters=self.operator.generate_parameters(),
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class VolumeAssertion(BaseAssertionProtocol):
|
||||
__root__: Union[RowCountTotalVolumeAssertion, RowCountChangeVolumeAssertion]
|
||||
|
||||
@property
|
||||
def assertion(self):
|
||||
return self.__root__
|
||||
|
||||
def get_id(self) -> str:
|
||||
guid_dict = {
|
||||
"entity": self.__root__.entity,
|
||||
"type": self.__root__.type,
|
||||
"id_raw": self.__root__.id_raw,
|
||||
}
|
||||
return self.__root__.id or datahub_guid(guid_dict)
|
||||
|
||||
def get_assertion_info_aspect(
|
||||
self,
|
||||
) -> AssertionInfo:
|
||||
return self.__root__.get_assertion_info()
|
||||
|
||||
def get_assertion_trigger(self) -> Optional[AssertionTrigger]:
|
||||
return self.__root__.trigger
|
||||
151
metadata-ingestion/src/datahub/cli/specific/assertions_cli.py
Normal file
151
metadata-ingestion/src/datahub/cli/specific/assertions_cli.py
Normal file
@ -0,0 +1,151 @@
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
import click
|
||||
from click_default_group import DefaultGroup
|
||||
|
||||
from datahub.api.entities.assertion.assertion_config_spec import AssertionsConfigSpec
|
||||
from datahub.api.entities.assertion.compiler_interface import (
|
||||
AssertionCompilationResult,
|
||||
CompileResultArtifact,
|
||||
CompileResultArtifactType,
|
||||
)
|
||||
from datahub.emitter.mce_builder import make_assertion_urn
|
||||
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
||||
from datahub.ingestion.graph.client import get_default_graph
|
||||
from datahub.integrations.assertion.registry import ASSERTION_PLATFORMS
|
||||
from datahub.telemetry import telemetry
|
||||
from datahub.upgrade import upgrade
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
REPORT_FILE_NAME = "compile_report.json"
|
||||
|
||||
|
||||
@click.group(cls=DefaultGroup, default="upsert")
|
||||
def assertions() -> None:
|
||||
"""A group of commands to interact with the Assertion entity in DataHub."""
|
||||
pass
|
||||
|
||||
|
||||
@assertions.command()
|
||||
@click.option("-f", "--file", required=True, type=click.Path(exists=True))
|
||||
@upgrade.check_upgrade
|
||||
@telemetry.with_telemetry()
|
||||
def upsert(file: str) -> None:
|
||||
"""Upsert (create or update) a set of assertions in DataHub."""
|
||||
|
||||
assertions_spec: AssertionsConfigSpec = AssertionsConfigSpec.from_yaml(file)
|
||||
|
||||
with get_default_graph() as graph:
|
||||
for assertion_spec in assertions_spec.assertions:
|
||||
try:
|
||||
mcp = MetadataChangeProposalWrapper(
|
||||
entityUrn=make_assertion_urn(assertion_spec.get_id()),
|
||||
aspect=assertion_spec.get_assertion_info_aspect(),
|
||||
)
|
||||
graph.emit_mcp(mcp)
|
||||
# TODO: Validate uniqueness of assertion ids. Report if duplicates found.
|
||||
# TODO: Use upsert graphql endpoints here instead of graph.emit_mcp.
|
||||
click.secho(f"Update succeeded for urn {mcp.entityUrn}.", fg="green")
|
||||
except Exception as e:
|
||||
logger.exception(e)
|
||||
click.secho(
|
||||
f"Update failed for {mcp.entityUrn}: {e}",
|
||||
fg="red",
|
||||
)
|
||||
|
||||
|
||||
@assertions.command()
|
||||
@click.option("-f", "--file", required=True, type=click.Path(exists=True))
|
||||
@click.option("-p", "--platform", required=True, type=str)
|
||||
@click.option("-o", "--output-to", required=False, type=click.Path(exists=True))
|
||||
@click.option(
|
||||
"-x",
|
||||
"--extras",
|
||||
required=False,
|
||||
multiple=True,
|
||||
default=[],
|
||||
help="Platform-specific extra key-value inputs in form key=value",
|
||||
)
|
||||
@upgrade.check_upgrade
|
||||
@telemetry.with_telemetry()
|
||||
def compile(
|
||||
file: str, platform: str, output_to: Optional[str], extras: List[str]
|
||||
) -> None:
|
||||
"""Compile a set of assertions for input assertion platform.
|
||||
Note that this does not run any code or execute any queries on assertion platform
|
||||
and only creates artifacts specific to assertion platform that can be executed manually.
|
||||
In future, we may introduce separate command to automatically apply these compiled changes
|
||||
in assertion platform. Currently, generated result artifacts are stored in target folder
|
||||
unless another folder is specified using option `--output-to <folder>`.
|
||||
"""
|
||||
|
||||
if platform not in ASSERTION_PLATFORMS:
|
||||
click.secho(
|
||||
f"Platform {platform} is not supported.",
|
||||
fg="red",
|
||||
)
|
||||
|
||||
if output_to is None:
|
||||
output_to = f"{os.getcwd()}/target"
|
||||
|
||||
if not os.path.isdir(output_to):
|
||||
os.mkdir(output_to)
|
||||
|
||||
assertions_spec: AssertionsConfigSpec = AssertionsConfigSpec.from_yaml(file)
|
||||
|
||||
try:
|
||||
compiler = ASSERTION_PLATFORMS[platform].create(
|
||||
output_dir=output_to, extras=extras_list_to_dict(extras)
|
||||
)
|
||||
result = compiler.compile(assertions_spec)
|
||||
|
||||
write_report_file(output_to, result)
|
||||
click.secho("Compile report:", bold=True)
|
||||
click.echo(result.report.as_string())
|
||||
if result.status == "failure":
|
||||
click.secho("Failure", fg="yellow", bold=True)
|
||||
else:
|
||||
click.secho("Success", fg="green", bold=True)
|
||||
except Exception as e:
|
||||
logger.exception(e)
|
||||
click.secho(
|
||||
f"Compile failed: {e}",
|
||||
fg="red",
|
||||
)
|
||||
|
||||
|
||||
def write_report_file(output_to: str, result: AssertionCompilationResult) -> None:
|
||||
report_path = Path(output_to) / REPORT_FILE_NAME
|
||||
with (report_path).open("w") as f:
|
||||
result.add_artifact(
|
||||
CompileResultArtifact(
|
||||
name=REPORT_FILE_NAME,
|
||||
path=report_path,
|
||||
type=CompileResultArtifactType.COMPILE_REPORT,
|
||||
description="Detailed report about compile status",
|
||||
)
|
||||
)
|
||||
f.write(result.report.as_json())
|
||||
|
||||
|
||||
def extras_list_to_dict(extras: List[str]) -> Dict[str, str]:
|
||||
extra_properties: Dict[str, str] = dict()
|
||||
for x in extras:
|
||||
parts = x.split("=")
|
||||
assert (
|
||||
len(parts) == 2
|
||||
), f"Invalid value for extras {x}, should be in format key=value"
|
||||
extra_properties[parts[0]] = parts[1]
|
||||
return extra_properties
|
||||
|
||||
|
||||
# TODO: support for
|
||||
# Immediate:
|
||||
# 1. delete assertions (from datahub)
|
||||
# Later:
|
||||
# 3. execute compiled assertions on assertion platform (Later, requires connection details to platform),
|
||||
# 4. cleanup assertions from assertion platform (generate artifacts. optionally execute)
|
||||
@ -25,6 +25,7 @@ from datahub.cli.get_cli import get
|
||||
from datahub.cli.ingest_cli import ingest
|
||||
from datahub.cli.migrate import migrate
|
||||
from datahub.cli.put_cli import put
|
||||
from datahub.cli.specific.assertions_cli import assertions
|
||||
from datahub.cli.specific.datacontract_cli import datacontract
|
||||
from datahub.cli.specific.dataproduct_cli import dataproduct
|
||||
from datahub.cli.specific.dataset_cli import dataset
|
||||
@ -164,6 +165,7 @@ datahub.add_command(dataset)
|
||||
datahub.add_command(properties)
|
||||
datahub.add_command(forms)
|
||||
datahub.add_command(datacontract)
|
||||
datahub.add_command(assertions)
|
||||
|
||||
try:
|
||||
from datahub.cli.lite_cli import lite
|
||||
|
||||
@ -0,0 +1,129 @@
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Callable, Iterable, List, Optional
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from datahub.emitter.mce_builder import (
|
||||
make_assertion_urn,
|
||||
make_data_platform_urn,
|
||||
make_dataplatform_instance_urn,
|
||||
)
|
||||
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
||||
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
||||
from datahub.ingestion.source.snowflake.snowflake_config import SnowflakeV2Config
|
||||
from datahub.ingestion.source.snowflake.snowflake_query import SnowflakeQuery
|
||||
from datahub.ingestion.source.snowflake.snowflake_report import SnowflakeV2Report
|
||||
from datahub.ingestion.source.snowflake.snowflake_utils import (
|
||||
SnowflakeCommonMixin,
|
||||
SnowflakeConnectionMixin,
|
||||
SnowflakeQueryMixin,
|
||||
)
|
||||
from datahub.metadata.com.linkedin.pegasus2avro.assertion import (
|
||||
AssertionResult,
|
||||
AssertionResultType,
|
||||
AssertionRunEvent,
|
||||
AssertionRunStatus,
|
||||
)
|
||||
from datahub.metadata.com.linkedin.pegasus2avro.common import DataPlatformInstance
|
||||
from datahub.utilities.time import datetime_to_ts_millis
|
||||
|
||||
logger: logging.Logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DataQualityMonitoringResult(BaseModel):
|
||||
MEASUREMENT_TIME: datetime
|
||||
METRIC_NAME: str
|
||||
TABLE_NAME: str
|
||||
TABLE_SCHEMA: str
|
||||
TABLE_DATABASE: str
|
||||
VALUE: int
|
||||
|
||||
|
||||
class SnowflakeAssertionsHandler(
|
||||
SnowflakeCommonMixin, SnowflakeQueryMixin, SnowflakeConnectionMixin
|
||||
):
|
||||
def __init__(
|
||||
self,
|
||||
config: SnowflakeV2Config,
|
||||
report: SnowflakeV2Report,
|
||||
dataset_urn_builder: Callable[[str], str],
|
||||
) -> None:
|
||||
self.config = config
|
||||
self.report = report
|
||||
self.logger = logger
|
||||
self.dataset_urn_builder = dataset_urn_builder
|
||||
self.connection = None
|
||||
self._urns_processed: List[str] = []
|
||||
|
||||
def get_assertion_workunits(
|
||||
self, discovered_datasets: List[str]
|
||||
) -> Iterable[MetadataWorkUnit]:
|
||||
|
||||
self.connection = self.create_connection()
|
||||
if self.connection is None:
|
||||
return
|
||||
|
||||
cur = self.query(
|
||||
SnowflakeQuery.dmf_assertion_results(
|
||||
datetime_to_ts_millis(self.config.start_time),
|
||||
datetime_to_ts_millis(self.config.end_time),
|
||||
)
|
||||
)
|
||||
for db_row in cur:
|
||||
mcp = self._process_result_row(db_row, discovered_datasets)
|
||||
if mcp:
|
||||
yield mcp.as_workunit(is_primary_source=False)
|
||||
|
||||
if mcp.entityUrn and mcp.entityUrn not in self._urns_processed:
|
||||
self._urns_processed.append(mcp.entityUrn)
|
||||
yield self._gen_platform_instance_wu(mcp.entityUrn)
|
||||
|
||||
def _gen_platform_instance_wu(self, urn: str) -> MetadataWorkUnit:
|
||||
|
||||
# Construct a MetadataChangeProposalWrapper object for assertion platform
|
||||
return MetadataChangeProposalWrapper(
|
||||
entityUrn=urn,
|
||||
aspect=DataPlatformInstance(
|
||||
platform=make_data_platform_urn(self.platform),
|
||||
instance=(
|
||||
make_dataplatform_instance_urn(
|
||||
self.platform, self.config.platform_instance
|
||||
)
|
||||
if self.config.platform_instance
|
||||
else None
|
||||
),
|
||||
),
|
||||
).as_workunit(is_primary_source=False)
|
||||
|
||||
def _process_result_row(
|
||||
self, result_row: dict, discovered_datasets: List[str]
|
||||
) -> Optional[MetadataChangeProposalWrapper]:
|
||||
try:
|
||||
result = DataQualityMonitoringResult.parse_obj(result_row)
|
||||
assertion_guid = result.METRIC_NAME.split("__")[-1].lower()
|
||||
status = bool(result.VALUE) # 1 if PASS, 0 if FAIL
|
||||
assertee = self.get_dataset_identifier(
|
||||
result.TABLE_NAME, result.TABLE_SCHEMA, result.TABLE_DATABASE
|
||||
)
|
||||
if assertee in discovered_datasets:
|
||||
return MetadataChangeProposalWrapper(
|
||||
entityUrn=make_assertion_urn(assertion_guid),
|
||||
aspect=AssertionRunEvent(
|
||||
timestampMillis=datetime_to_ts_millis(result.MEASUREMENT_TIME),
|
||||
runId=result.MEASUREMENT_TIME.strftime("%Y-%m-%dT%H:%M:%SZ"),
|
||||
asserteeUrn=self.dataset_urn_builder(assertee),
|
||||
status=AssertionRunStatus.COMPLETE,
|
||||
assertionUrn=make_assertion_urn(assertion_guid),
|
||||
result=AssertionResult(
|
||||
type=(
|
||||
AssertionResultType.SUCCESS
|
||||
if status
|
||||
else AssertionResultType.FAILURE
|
||||
)
|
||||
),
|
||||
),
|
||||
)
|
||||
except Exception as e:
|
||||
self.report.report_warning("assertion-result-parse-failure", str(e))
|
||||
return None
|
||||
@ -164,6 +164,12 @@ class SnowflakeV2Config(
|
||||
"username.",
|
||||
)
|
||||
|
||||
include_assertion_results: bool = Field(
|
||||
default=False,
|
||||
description="Whether to ingest assertion run results for assertions created using Datahub"
|
||||
" assertions CLI in snowflake",
|
||||
)
|
||||
|
||||
@validator("convert_urns_to_lowercase")
|
||||
def validate_convert_urns_to_lowercase(cls, v):
|
||||
if not v:
|
||||
|
||||
@ -1016,3 +1016,26 @@ class SnowflakeQuery:
|
||||
ORDER BY
|
||||
h.downstream_table_name
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def dmf_assertion_results(start_time_millis: int, end_time_millis: int) -> str:
|
||||
|
||||
pattern = r"datahub\\_\\_%"
|
||||
escape_pattern = r"\\"
|
||||
return f"""
|
||||
SELECT
|
||||
MEASUREMENT_TIME AS "MEASUREMENT_TIME",
|
||||
METRIC_NAME AS "METRIC_NAME",
|
||||
TABLE_NAME AS "TABLE_NAME",
|
||||
TABLE_SCHEMA AS "TABLE_SCHEMA",
|
||||
TABLE_DATABASE AS "TABLE_DATABASE",
|
||||
VALUE::INT AS "VALUE"
|
||||
FROM
|
||||
SNOWFLAKE.LOCAL.DATA_QUALITY_MONITORING_RESULTS
|
||||
WHERE
|
||||
MEASUREMENT_TIME >= to_timestamp_ltz({start_time_millis}, 3)
|
||||
AND MEASUREMENT_TIME < to_timestamp_ltz({end_time_millis}, 3)
|
||||
AND METRIC_NAME ilike '{pattern}' escape '{escape_pattern}'
|
||||
ORDER BY MEASUREMENT_TIME ASC;
|
||||
|
||||
"""
|
||||
|
||||
@ -50,6 +50,9 @@ from datahub.ingestion.source.snowflake.constants import (
|
||||
SnowflakeEdition,
|
||||
SnowflakeObjectDomain,
|
||||
)
|
||||
from datahub.ingestion.source.snowflake.snowflake_assertion import (
|
||||
SnowflakeAssertionsHandler,
|
||||
)
|
||||
from datahub.ingestion.source.snowflake.snowflake_config import (
|
||||
SnowflakeV2Config,
|
||||
TagOption,
|
||||
@ -604,6 +607,11 @@ class SnowflakeV2Source(
|
||||
) and self.usage_extractor:
|
||||
yield from self.usage_extractor.get_usage_workunits(discovered_datasets)
|
||||
|
||||
if self.config.include_assertion_results:
|
||||
yield from SnowflakeAssertionsHandler(
|
||||
self.config, self.report, self.gen_dataset_urn
|
||||
).get_assertion_workunits(discovered_datasets)
|
||||
|
||||
def report_cache_info(self) -> None:
|
||||
lru_cache_functions: List[Callable] = [
|
||||
self.data_dictionary.get_tables_for_database,
|
||||
|
||||
@ -0,0 +1,61 @@
|
||||
from functools import lru_cache
|
||||
from typing import List, Optional, Tuple, TypedDict
|
||||
|
||||
from datahub.api.entities.assertion.assertion import BaseEntityAssertion
|
||||
from datahub.ingestion.graph.client import get_default_graph
|
||||
from datahub.metadata.com.linkedin.pegasus2avro.dataset import DatasetProperties
|
||||
from datahub.metadata.com.linkedin.pegasus2avro.schema import SchemaMetadata
|
||||
from datahub.utilities.urns.urn import Urn
|
||||
|
||||
|
||||
class ColumnDict(TypedDict):
|
||||
col: str
|
||||
native_type: str
|
||||
|
||||
|
||||
@lru_cache
|
||||
def get_qualified_name_from_datahub(urn: str) -> Optional[str]:
|
||||
with get_default_graph() as graph:
|
||||
props: Optional[DatasetProperties] = graph.get_aspect(urn, DatasetProperties)
|
||||
if props is not None:
|
||||
return props.qualifiedName
|
||||
return None
|
||||
|
||||
|
||||
@lru_cache
|
||||
def get_schema_from_datahub(urn: str) -> Optional[List[ColumnDict]]:
|
||||
with get_default_graph() as graph:
|
||||
schema: Optional[SchemaMetadata] = graph.get_aspect(urn, SchemaMetadata)
|
||||
if schema is not None:
|
||||
return [
|
||||
{"col": field.fieldPath, "native_type": field.nativeDataType}
|
||||
for field in schema.fields
|
||||
]
|
||||
return None
|
||||
|
||||
|
||||
def get_entity_name(assertion: BaseEntityAssertion) -> Tuple[str, str, str]:
|
||||
if assertion.meta and assertion.meta.get("entity_qualified_name"):
|
||||
parts = assertion.meta["entity_qualified_name"].split(".")
|
||||
else:
|
||||
qualified_name = get_qualified_name_from_datahub(assertion.entity)
|
||||
if qualified_name is not None:
|
||||
parts = qualified_name.split(".")
|
||||
else:
|
||||
urn_id = Urn.create_from_string(assertion.entity).entity_ids[1]
|
||||
parts = urn_id.split(".")
|
||||
if len(parts) > 3:
|
||||
parts = parts[-3:]
|
||||
assert len(parts) == 3
|
||||
database = parts[-3]
|
||||
schema = parts[-2]
|
||||
table = parts[-1]
|
||||
return database, schema, table
|
||||
|
||||
|
||||
def get_entity_schema(assertion: BaseEntityAssertion) -> Optional[List[ColumnDict]]:
|
||||
if assertion.meta and assertion.meta.get("entity_schema"):
|
||||
return assertion.meta.get("entity_schema")
|
||||
elif get_schema_from_datahub(assertion.entity):
|
||||
return get_schema_from_datahub(assertion.entity)
|
||||
return None
|
||||
@ -0,0 +1,8 @@
|
||||
from typing import Dict, Type
|
||||
|
||||
from datahub.api.entities.assertion.compiler_interface import AssertionCompiler
|
||||
from datahub.integrations.assertion.snowflake.compiler import SnowflakeAssertionCompiler
|
||||
|
||||
ASSERTION_PLATFORMS: Dict[str, Type[AssertionCompiler]] = {
|
||||
"snowflake": SnowflakeAssertionCompiler
|
||||
}
|
||||
@ -0,0 +1,237 @@
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Dict, Tuple
|
||||
|
||||
from datahub.api.entities.assertion.assertion_config_spec import AssertionsConfigSpec
|
||||
from datahub.api.entities.assertion.assertion_operator import LessThanOrEqualToOperator
|
||||
from datahub.api.entities.assertion.assertion_trigger import (
|
||||
AssertionTrigger,
|
||||
CronTrigger,
|
||||
EntityChangeTrigger,
|
||||
IntervalTrigger,
|
||||
)
|
||||
from datahub.api.entities.assertion.compiler_interface import (
|
||||
AssertionCompilationResult,
|
||||
AssertionCompiler,
|
||||
CompileResultArtifact,
|
||||
CompileResultArtifactType,
|
||||
)
|
||||
from datahub.api.entities.assertion.datahub_assertion import DataHubAssertion
|
||||
from datahub.api.entities.assertion.field_assertion import FieldValuesAssertion
|
||||
from datahub.api.entities.assertion.freshness_assertion import (
|
||||
FixedIntervalFreshnessAssertion,
|
||||
)
|
||||
from datahub.emitter.mce_builder import make_assertion_urn
|
||||
from datahub.integrations.assertion.common import get_entity_name, get_entity_schema
|
||||
from datahub.integrations.assertion.snowflake.dmf_generator import SnowflakeDMFHandler
|
||||
from datahub.integrations.assertion.snowflake.field_metric_sql_generator import (
|
||||
SnowflakeFieldMetricSQLGenerator,
|
||||
)
|
||||
from datahub.integrations.assertion.snowflake.field_values_metric_sql_generator import (
|
||||
SnowflakeFieldValuesMetricSQLGenerator,
|
||||
)
|
||||
from datahub.integrations.assertion.snowflake.metric_operator_sql_generator import (
|
||||
SnowflakeMetricEvalOperatorSQLGenerator,
|
||||
)
|
||||
from datahub.integrations.assertion.snowflake.metric_sql_generator import (
|
||||
SnowflakeMetricSQLGenerator,
|
||||
)
|
||||
|
||||
logger = logging.Logger(__name__)
|
||||
|
||||
DMF_DEFINITIONS_FILE_NAME = "dmf_definitions.sql"
|
||||
DMF_ASSOCIATIONS_FILE_NAME = "dmf_associations.sql"
|
||||
DMF_SCHEMA_PROPERTY_KEY = "DMF_SCHEMA"
|
||||
|
||||
|
||||
class SnowflakeAssertionCompiler(AssertionCompiler):
|
||||
def __init__(self, output_dir: str, extras: Dict[str, str]) -> None:
|
||||
self.output_dir = Path(output_dir)
|
||||
self.extras = extras
|
||||
self.metric_generator = SnowflakeMetricSQLGenerator(
|
||||
SnowflakeFieldMetricSQLGenerator(), SnowflakeFieldValuesMetricSQLGenerator()
|
||||
)
|
||||
self.metric_evaluator = SnowflakeMetricEvalOperatorSQLGenerator()
|
||||
self.dmf_handler = SnowflakeDMFHandler()
|
||||
|
||||
self._entity_schedule_history: Dict[str, AssertionTrigger] = dict()
|
||||
|
||||
@classmethod
|
||||
def create(
|
||||
cls, output_dir: str, extras: Dict[str, str]
|
||||
) -> "SnowflakeAssertionCompiler":
|
||||
assert os.path.exists(
|
||||
output_dir
|
||||
), f"Specified location {output_dir} does not exist."
|
||||
|
||||
assert os.path.isdir(
|
||||
output_dir
|
||||
), f"Specified location {output_dir} is not a folder."
|
||||
|
||||
assert any(
|
||||
x.upper() == DMF_SCHEMA_PROPERTY_KEY for x in extras
|
||||
), "Must specify value for DMF schema using -x DMF_SCHEMA=<db.schema>"
|
||||
|
||||
return SnowflakeAssertionCompiler(output_dir, extras)
|
||||
|
||||
def compile(
|
||||
self, assertion_config_spec: AssertionsConfigSpec
|
||||
) -> AssertionCompilationResult:
|
||||
result = AssertionCompilationResult("snowflake", "success")
|
||||
|
||||
# TODO: Create/Report permissions sql
|
||||
|
||||
dmf_definitions_path = self.output_dir / DMF_DEFINITIONS_FILE_NAME
|
||||
dmf_associations_path = self.output_dir / DMF_ASSOCIATIONS_FILE_NAME
|
||||
with (dmf_definitions_path).open("w") as definitions, (
|
||||
dmf_associations_path
|
||||
).open("w") as associations:
|
||||
for assertion_spec in assertion_config_spec.assertions:
|
||||
result.report.num_processed += 1
|
||||
try:
|
||||
start_line = f"\n-- Start of Assertion {assertion_spec.get_id()}\n"
|
||||
(dmf_definition, dmf_association) = self.process_assertion(
|
||||
assertion_spec
|
||||
)
|
||||
end_line = f"\n-- End of Assertion {assertion_spec.get_id()}\n"
|
||||
|
||||
definitions.write(start_line)
|
||||
definitions.write(dmf_definition)
|
||||
definitions.write(end_line)
|
||||
|
||||
associations.write(start_line)
|
||||
associations.write(dmf_association)
|
||||
associations.write(end_line)
|
||||
|
||||
result.report.num_compile_succeeded += 1
|
||||
except Exception as e:
|
||||
result.status = "failure"
|
||||
result.report.report_failure(
|
||||
assertion_spec.get_id(),
|
||||
f"Failed to compile assertion of type {assertion_spec.assertion.type} due to error: {e}",
|
||||
)
|
||||
result.report.num_compile_failed += 1
|
||||
if result.report.num_compile_succeeded > 0:
|
||||
result.add_artifact(
|
||||
CompileResultArtifact(
|
||||
name=DMF_DEFINITIONS_FILE_NAME,
|
||||
path=dmf_definitions_path,
|
||||
type=CompileResultArtifactType.SQL_QUERIES,
|
||||
description="SQL file containing DMF create definitions equivalent to Datahub Assertions",
|
||||
)
|
||||
)
|
||||
result.add_artifact(
|
||||
CompileResultArtifact(
|
||||
name=DMF_ASSOCIATIONS_FILE_NAME,
|
||||
path=dmf_associations_path,
|
||||
type=CompileResultArtifactType.SQL_QUERIES,
|
||||
description="ALTER TABLE queries to associate DMFs to table to run on configured schedule.",
|
||||
)
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
def process_assertion(self, assertion: DataHubAssertion) -> Tuple[str, str]:
|
||||
# TODO: support schema assertion ?
|
||||
|
||||
# For freshness assertion, metric is difference in seconds between assertion execution time
|
||||
# and last time table was updated.
|
||||
# For field values assertion, metric is number or percentage of rows that do not satify
|
||||
# operator condition.
|
||||
# For remaining assertions, numeric metric is discernible in assertion definition itself.
|
||||
metric_definition = self.metric_generator.metric_sql(assertion.assertion)
|
||||
|
||||
if isinstance(assertion.assertion, FixedIntervalFreshnessAssertion):
|
||||
assertion_sql = self.metric_evaluator.operator_sql(
|
||||
LessThanOrEqualToOperator(
|
||||
type="less_than_or_equal_to",
|
||||
value=assertion.assertion.lookback_interval.total_seconds(),
|
||||
),
|
||||
metric_definition,
|
||||
)
|
||||
elif isinstance(assertion.assertion, FieldValuesAssertion):
|
||||
assertion_sql = self.metric_evaluator.operator_sql(
|
||||
LessThanOrEqualToOperator(
|
||||
type="less_than_or_equal_to",
|
||||
value=assertion.assertion.failure_threshold.value,
|
||||
),
|
||||
metric_definition,
|
||||
)
|
||||
else:
|
||||
assertion_sql = self.metric_evaluator.operator_sql(
|
||||
assertion.assertion.operator, metric_definition
|
||||
)
|
||||
|
||||
dmf_name = get_dmf_name(assertion)
|
||||
dmf_schema_name = self.extras[DMF_SCHEMA_PROPERTY_KEY]
|
||||
|
||||
args_create_dmf, args_add_dmf = get_dmf_args(assertion)
|
||||
|
||||
entity_name = get_entity_name(assertion.assertion)
|
||||
|
||||
self._entity_schedule_history.setdefault(
|
||||
assertion.assertion.entity, assertion.assertion.trigger
|
||||
)
|
||||
if (
|
||||
assertion.assertion.entity in self._entity_schedule_history
|
||||
and self._entity_schedule_history[assertion.assertion.entity]
|
||||
!= assertion.assertion.trigger
|
||||
):
|
||||
raise ValueError(
|
||||
"Assertions on same entity must have same schedules as of now."
|
||||
f" Found different schedules on entity {assertion.assertion.entity} ->"
|
||||
f" ({self._entity_schedule_history[assertion.assertion.entity].trigger}),"
|
||||
f" ({assertion.assertion.trigger.trigger})"
|
||||
)
|
||||
|
||||
dmf_schedule = get_dmf_schedule(assertion.assertion.trigger)
|
||||
dmf_definition = self.dmf_handler.create_dmf(
|
||||
f"{dmf_schema_name}.{dmf_name}",
|
||||
args_create_dmf,
|
||||
assertion.assertion.description
|
||||
or f"Created via DataHub for assertion {make_assertion_urn(assertion.get_id())} of type {assertion.assertion.type}",
|
||||
assertion_sql,
|
||||
)
|
||||
dmf_association = self.dmf_handler.add_dmf_to_table(
|
||||
f"{dmf_schema_name}.{dmf_name}",
|
||||
args_add_dmf,
|
||||
dmf_schedule,
|
||||
".".join(entity_name),
|
||||
)
|
||||
|
||||
return dmf_definition, dmf_association
|
||||
|
||||
|
||||
def get_dmf_name(assertion: DataHubAssertion) -> str:
|
||||
return f"datahub__{assertion.get_id()}"
|
||||
|
||||
|
||||
def get_dmf_args(assertion: DataHubAssertion) -> Tuple[str, str]:
|
||||
"""Returns Tuple with
|
||||
- Args used to create DMF
|
||||
- Args used to add DMF to table"""
|
||||
# Snowflake does not allow creating custom data metric
|
||||
# function without column name argument.
|
||||
# So we fetch any one column from table's schema
|
||||
args_create_dmf = "ARGT TABLE({col_name} {col_type})"
|
||||
args_add_dmf = "{col_name}"
|
||||
entity_schema = get_entity_schema(assertion.assertion)
|
||||
if entity_schema:
|
||||
for col_dict in entity_schema:
|
||||
return args_create_dmf.format(
|
||||
col_name=col_dict["col"], col_type=col_dict["native_type"]
|
||||
), args_add_dmf.format(col_name=col_dict["col"])
|
||||
|
||||
raise ValueError("entity schema not available")
|
||||
|
||||
|
||||
def get_dmf_schedule(trigger: AssertionTrigger) -> str:
|
||||
if isinstance(trigger.trigger, EntityChangeTrigger):
|
||||
return "TRIGGER_ON_CHANGES"
|
||||
elif isinstance(trigger.trigger, CronTrigger):
|
||||
return f"USING CRON {trigger.trigger.cron} {trigger.trigger.timezone}"
|
||||
elif isinstance(trigger.trigger, IntervalTrigger):
|
||||
return f"{trigger.trigger.interval.seconds/60} MIN"
|
||||
else:
|
||||
raise ValueError(f"Unsupported trigger type {type(trigger.trigger)}")
|
||||
@ -0,0 +1,22 @@
|
||||
class SnowflakeDMFHandler:
|
||||
def create_dmf(
|
||||
self, dmf_name: str, dmf_args: str, dmf_comment: str, dmf_sql: str
|
||||
) -> str:
|
||||
return f"""
|
||||
CREATE or REPLACE DATA METRIC FUNCTION
|
||||
{dmf_name} ({dmf_args})
|
||||
RETURNS NUMBER
|
||||
COMMENT = '{dmf_comment}'
|
||||
AS
|
||||
$$
|
||||
{dmf_sql}
|
||||
$$;
|
||||
"""
|
||||
|
||||
def add_dmf_to_table(
|
||||
self, dmf_name: str, dmf_col_args: str, dmf_schedule: str, table_identifier: str
|
||||
) -> str:
|
||||
return f"""
|
||||
ALTER TABLE {table_identifier} SET DATA_METRIC_SCHEDULE = '{dmf_schedule}';
|
||||
ALTER TABLE {table_identifier} ADD DATA METRIC FUNCTION {dmf_name} ON ({dmf_col_args});
|
||||
"""
|
||||
@ -0,0 +1,154 @@
|
||||
from typing import List, Optional
|
||||
|
||||
from datahub.api.entities.assertion.field_assertion import FieldMetricAssertion
|
||||
from datahub.api.entities.assertion.field_metric import FieldMetric
|
||||
from datahub.integrations.assertion.common import get_entity_name
|
||||
|
||||
|
||||
class SnowflakeFieldMetricSQLGenerator:
|
||||
def unique_count_sql(
|
||||
self, field_name: str, entity_name: str, dataset_filter: Optional[str]
|
||||
) -> str:
|
||||
return f"""select count(distinct {field_name})
|
||||
from {entity_name} {self._setup_where_clause([dataset_filter])}"""
|
||||
|
||||
def unique_percentage_sql(
|
||||
self, field_name: str, entity_name: str, dataset_filter: Optional[str]
|
||||
) -> str:
|
||||
return f"""select count(distinct {field_name})/count(*)
|
||||
from {entity_name} {self._setup_where_clause([dataset_filter])}"""
|
||||
|
||||
def null_count_sql(
|
||||
self, field_name: str, entity_name: str, dataset_filter: Optional[str]
|
||||
) -> str:
|
||||
where_clause = self._setup_where_clause(
|
||||
[dataset_filter, f"{field_name} is null"]
|
||||
)
|
||||
return f"""select count(*)
|
||||
from {entity_name} {where_clause}"""
|
||||
|
||||
def null_percentage_sql(
|
||||
self, field_name: str, entity_name: str, dataset_filter: Optional[str]
|
||||
) -> str:
|
||||
return f"""select ({self.null_count_sql(field_name, entity_name, dataset_filter)})/count(*)
|
||||
from {entity_name} {self._setup_where_clause([dataset_filter])}"""
|
||||
|
||||
def min_sql(
|
||||
self, field_name: str, entity_name: str, dataset_filter: Optional[str]
|
||||
) -> str:
|
||||
return f"""select min({field_name})
|
||||
from {entity_name} {self._setup_where_clause([dataset_filter])}"""
|
||||
|
||||
def max_sql(
|
||||
self, field_name: str, entity_name: str, dataset_filter: Optional[str]
|
||||
) -> str:
|
||||
return f"""select max({field_name})
|
||||
from {entity_name} {self._setup_where_clause([dataset_filter])}"""
|
||||
|
||||
def mean_sql(
|
||||
self, field_name: str, entity_name: str, dataset_filter: Optional[str]
|
||||
) -> str:
|
||||
return f"""select avg({field_name})
|
||||
from {entity_name} {self._setup_where_clause([dataset_filter])}"""
|
||||
|
||||
def median_sql(
|
||||
self, field_name: str, entity_name: str, dataset_filter: Optional[str]
|
||||
) -> str:
|
||||
return f"""select median({field_name})
|
||||
from {entity_name} {self._setup_where_clause([dataset_filter])}"""
|
||||
|
||||
def stddev_sql(
|
||||
self, field_name: str, entity_name: str, dataset_filter: Optional[str]
|
||||
) -> str:
|
||||
return f"""select stddev({field_name})
|
||||
from {entity_name} {self._setup_where_clause([dataset_filter])}"""
|
||||
|
||||
def negative_count_sql(
|
||||
self, field_name: str, entity_name: str, dataset_filter: Optional[str]
|
||||
) -> str:
|
||||
where_clause = self._setup_where_clause([dataset_filter, f"{field_name} < 0"])
|
||||
return f"""select count(*)
|
||||
from {entity_name} {where_clause}"""
|
||||
|
||||
def negative_percentage_sql(
|
||||
self, field_name: str, entity_name: str, dataset_filter: Optional[str]
|
||||
) -> str:
|
||||
return f"""select ({self.negative_count_sql(field_name, entity_name, dataset_filter)})/count(*)
|
||||
from {entity_name} {self._setup_where_clause([dataset_filter])}"""
|
||||
|
||||
def zero_count_sql(
|
||||
self, field_name: str, entity_name: str, dataset_filter: Optional[str]
|
||||
) -> str:
|
||||
where_clause = self._setup_where_clause([dataset_filter, f"{field_name} = 0"])
|
||||
return f"""select count(*)
|
||||
from {entity_name} {where_clause}"""
|
||||
|
||||
def zero_percentage_sql(
|
||||
self, field_name: str, entity_name: str, dataset_filter: Optional[str]
|
||||
) -> str:
|
||||
return f"""select ({self.zero_count_sql(field_name, entity_name, dataset_filter)})/count(*)
|
||||
from {entity_name} {self._setup_where_clause([dataset_filter])}"""
|
||||
|
||||
def min_length_sql(
|
||||
self, field_name: str, entity_name: str, dataset_filter: Optional[str]
|
||||
) -> str:
|
||||
return f"""select min(length({field_name}))
|
||||
from {entity_name} {self._setup_where_clause([dataset_filter])}"""
|
||||
|
||||
def max_length_sql(
|
||||
self, field_name: str, entity_name: str, dataset_filter: Optional[str]
|
||||
) -> str:
|
||||
return f"""select max(length({field_name}))
|
||||
from {entity_name} {self._setup_where_clause([dataset_filter])}"""
|
||||
|
||||
def empty_count_sql(
|
||||
self, field_name: str, entity_name: str, dataset_filter: Optional[str]
|
||||
) -> str:
|
||||
where_clause = self._setup_where_clause(
|
||||
[dataset_filter, f"({field_name} is null or trim({field_name})='')"]
|
||||
)
|
||||
return f"""select count(*)
|
||||
from {entity_name} {where_clause}"""
|
||||
|
||||
def empty_percentage_sql(
|
||||
self, field_name: str, entity_name: str, dataset_filter: Optional[str]
|
||||
) -> str:
|
||||
return f"""select ({self.empty_count_sql(field_name, entity_name, dataset_filter)})/count(*)
|
||||
from {entity_name} {self._setup_where_clause([dataset_filter])}"""
|
||||
|
||||
def _setup_where_clause(self, filters: List[Optional[str]]) -> str:
|
||||
where_clause = " and ".join(f for f in filters if f)
|
||||
return f"where {where_clause}" if where_clause else ""
|
||||
|
||||
def metric_sql(self, assertion: FieldMetricAssertion) -> str:
|
||||
metric_sql_mapping = {
|
||||
FieldMetric.UNIQUE_COUNT: self.unique_count_sql,
|
||||
FieldMetric.UNIQUE_PERCENTAGE: self.unique_percentage_sql,
|
||||
FieldMetric.NULL_COUNT: self.null_count_sql,
|
||||
FieldMetric.NULL_PERCENTAGE: self.null_percentage_sql,
|
||||
FieldMetric.MIN: self.min_sql,
|
||||
FieldMetric.MAX: self.max_sql,
|
||||
FieldMetric.MEAN: self.mean_sql,
|
||||
FieldMetric.MEDIAN: self.median_sql,
|
||||
FieldMetric.STDDEV: self.stddev_sql,
|
||||
FieldMetric.NEGATIVE_COUNT: self.negative_count_sql,
|
||||
FieldMetric.NEGATIVE_PERCENTAGE: self.negative_percentage_sql,
|
||||
FieldMetric.ZERO_COUNT: self.zero_count_sql,
|
||||
FieldMetric.ZERO_PERCENTAGE: self.zero_percentage_sql,
|
||||
FieldMetric.MIN_LENGTH: self.min_length_sql,
|
||||
FieldMetric.MAX_LENGTH: self.max_length_sql,
|
||||
FieldMetric.EMPTY_COUNT: self.empty_count_sql,
|
||||
FieldMetric.EMPTY_PERCENTAGE: self.empty_percentage_sql,
|
||||
}
|
||||
|
||||
entity_name = ".".join(get_entity_name(assertion))
|
||||
|
||||
return metric_sql_mapping[assertion.metric](
|
||||
assertion.field,
|
||||
entity_name,
|
||||
(
|
||||
assertion.filters.sql
|
||||
if assertion.filters and assertion.filters.sql
|
||||
else None
|
||||
),
|
||||
)
|
||||
@ -0,0 +1,283 @@
|
||||
from functools import singledispatchmethod
|
||||
from typing import List, Optional
|
||||
|
||||
from datahub.api.entities.assertion.assertion_operator import (
|
||||
BetweenOperator,
|
||||
ContainsOperator,
|
||||
EndsWithOperator,
|
||||
EqualToOperator,
|
||||
GreaterThanOperator,
|
||||
GreaterThanOrEqualToOperator,
|
||||
InOperator,
|
||||
IsFalseOperator,
|
||||
IsNullOperator,
|
||||
IsTrueOperator,
|
||||
LessThanOperator,
|
||||
LessThanOrEqualToOperator,
|
||||
MatchesRegexOperator,
|
||||
NotEqualToOperator,
|
||||
NotInOperator,
|
||||
NotNullOperator,
|
||||
Operators,
|
||||
StartsWithOperator,
|
||||
)
|
||||
from datahub.api.entities.assertion.field_assertion import (
|
||||
FieldTransform,
|
||||
FieldValuesAssertion,
|
||||
)
|
||||
from datahub.integrations.assertion.common import get_entity_name
|
||||
|
||||
|
||||
class SnowflakeFieldValuesMetricSQLGenerator:
|
||||
@singledispatchmethod
|
||||
def values_metric_sql(
|
||||
self,
|
||||
operators: Operators,
|
||||
entity_name: str,
|
||||
transformed_field: str,
|
||||
where_clause: str,
|
||||
) -> str:
|
||||
"""
|
||||
Generates SQL that would return boolean value for each table row.
|
||||
1 if FAIL and 0 if PASS. Note the unusual reversal of 1 and 0.
|
||||
This is deliberate, as metric represents number of failing rows.
|
||||
"""
|
||||
raise ValueError(f"Unsupported values metric operator type {type(operators)} ")
|
||||
|
||||
@values_metric_sql.register
|
||||
def _(
|
||||
self,
|
||||
operators: InOperator,
|
||||
entity_name: str,
|
||||
transformed_field: str,
|
||||
where_clause: str,
|
||||
) -> str:
|
||||
return f"""select case when {transformed_field} in {tuple(operators.value)} then 0 else 1 end
|
||||
from {entity_name} {where_clause}"""
|
||||
|
||||
@values_metric_sql.register
|
||||
def _(
|
||||
self,
|
||||
operators: NotInOperator,
|
||||
entity_name: str,
|
||||
transformed_field: str,
|
||||
where_clause: str,
|
||||
) -> str:
|
||||
return f"""select case when {transformed_field} not in {tuple(operators.value)} then 0 else 1 end
|
||||
from {entity_name} {where_clause}"""
|
||||
|
||||
@values_metric_sql.register
|
||||
def _(
|
||||
self,
|
||||
operators: EqualToOperator,
|
||||
entity_name: str,
|
||||
transformed_field: str,
|
||||
where_clause: str,
|
||||
) -> str:
|
||||
return f"""select case when {transformed_field} = {operators.value} then 0 else 1 end
|
||||
from {entity_name} {where_clause}"""
|
||||
|
||||
@values_metric_sql.register
|
||||
def _(
|
||||
self,
|
||||
operators: NotEqualToOperator,
|
||||
entity_name: str,
|
||||
transformed_field: str,
|
||||
where_clause: str,
|
||||
) -> str:
|
||||
return f"""select case when {transformed_field} != {operators.value} then 0 else 1 end
|
||||
from {entity_name} {where_clause}"""
|
||||
|
||||
@values_metric_sql.register
|
||||
def _(
|
||||
self,
|
||||
operators: BetweenOperator,
|
||||
entity_name: str,
|
||||
transformed_field: str,
|
||||
where_clause: str,
|
||||
) -> str:
|
||||
return f"""select case when {transformed_field} between {operators.min} and {operators.max} then 0 else 1 end
|
||||
from {entity_name} {where_clause}"""
|
||||
|
||||
@values_metric_sql.register
|
||||
def _(
|
||||
self,
|
||||
operators: LessThanOperator,
|
||||
entity_name: str,
|
||||
transformed_field: str,
|
||||
where_clause: str,
|
||||
) -> str:
|
||||
return f"""select case when {transformed_field} < {operators.value} then 0 else 1 end
|
||||
from {entity_name} {where_clause}"""
|
||||
|
||||
@values_metric_sql.register
|
||||
def _(
|
||||
self,
|
||||
operators: LessThanOrEqualToOperator,
|
||||
entity_name: str,
|
||||
transformed_field: str,
|
||||
where_clause: str,
|
||||
) -> str:
|
||||
return f"""select case when {transformed_field} <= {operators.value} then 0 else 1 end
|
||||
from {entity_name} {where_clause}"""
|
||||
|
||||
@values_metric_sql.register
|
||||
def _(
|
||||
self,
|
||||
operators: GreaterThanOperator,
|
||||
entity_name: str,
|
||||
transformed_field: str,
|
||||
where_clause: str,
|
||||
) -> str:
|
||||
return f"""select case when {transformed_field} > {operators.value} then 0 else 1 end
|
||||
from {entity_name} {where_clause}"""
|
||||
|
||||
@values_metric_sql.register
|
||||
def _(
|
||||
self,
|
||||
operators: GreaterThanOrEqualToOperator,
|
||||
entity_name: str,
|
||||
transformed_field: str,
|
||||
where_clause: str,
|
||||
) -> str:
|
||||
return f"""select case when {transformed_field} >= {operators.value} then 0 else 1 end
|
||||
from {entity_name} {where_clause}"""
|
||||
|
||||
@values_metric_sql.register
|
||||
def _(
|
||||
self,
|
||||
operators: IsNullOperator,
|
||||
entity_name: str,
|
||||
transformed_field: str,
|
||||
where_clause: str,
|
||||
) -> str:
|
||||
return f"""select case when {transformed_field} is null then 0 else 1 end
|
||||
from {entity_name} {where_clause}"""
|
||||
|
||||
@values_metric_sql.register
|
||||
def _(
|
||||
self,
|
||||
operators: NotNullOperator,
|
||||
entity_name: str,
|
||||
transformed_field: str,
|
||||
where_clause: str,
|
||||
) -> str:
|
||||
return f"""select case when {transformed_field} is not null then 0 else 1 end
|
||||
from {entity_name} {where_clause}"""
|
||||
|
||||
@values_metric_sql.register
|
||||
def _(
|
||||
self,
|
||||
operators: IsTrueOperator,
|
||||
entity_name: str,
|
||||
transformed_field: str,
|
||||
where_clause: str,
|
||||
) -> str:
|
||||
return f"""select case when {transformed_field} then 0 else 1 end
|
||||
from {entity_name} {where_clause}"""
|
||||
|
||||
@values_metric_sql.register
|
||||
def _(
|
||||
self,
|
||||
operators: IsFalseOperator,
|
||||
entity_name: str,
|
||||
transformed_field: str,
|
||||
where_clause: str,
|
||||
) -> str:
|
||||
return f"""select case when not {transformed_field} then 0 else 1 end
|
||||
from {entity_name} {where_clause}"""
|
||||
|
||||
@values_metric_sql.register
|
||||
def _(
|
||||
self,
|
||||
operators: ContainsOperator,
|
||||
entity_name: str,
|
||||
transformed_field: str,
|
||||
where_clause: str,
|
||||
) -> str:
|
||||
return f"""select case when contains({transformed_field},'{operators.value}') then 0 else 1 end
|
||||
from {entity_name} {where_clause}"""
|
||||
|
||||
@values_metric_sql.register
|
||||
def _(
|
||||
self,
|
||||
operators: StartsWithOperator,
|
||||
entity_name: str,
|
||||
transformed_field: str,
|
||||
where_clause: str,
|
||||
) -> str:
|
||||
return f"""select case when startswith({transformed_field},'{operators.value}') then 0 else 1 end
|
||||
from {entity_name} {where_clause}"""
|
||||
|
||||
@values_metric_sql.register
|
||||
def _(
|
||||
self,
|
||||
operators: EndsWithOperator,
|
||||
entity_name: str,
|
||||
transformed_field: str,
|
||||
where_clause: str,
|
||||
) -> str:
|
||||
return f"""select case when endswith({transformed_field},'{operators.value}') then 0 else 1 end
|
||||
from {entity_name} {where_clause}"""
|
||||
|
||||
@values_metric_sql.register
|
||||
def _(
|
||||
self,
|
||||
operators: MatchesRegexOperator,
|
||||
entity_name: str,
|
||||
transformed_field: str,
|
||||
where_clause: str,
|
||||
) -> str:
|
||||
return f"""select case when REGEXP_LIKE({transformed_field},'{operators.value}') then 0 else 1 end
|
||||
from {entity_name} {where_clause}"""
|
||||
|
||||
def _setup_where_clause(self, filters: List[Optional[str]]) -> str:
|
||||
where_clause = " and ".join(f for f in filters if f)
|
||||
return f"where {where_clause}" if where_clause else ""
|
||||
|
||||
def _setup_field_transform(
|
||||
self, field: str, transform: Optional[FieldTransform]
|
||||
) -> str:
|
||||
if transform is None:
|
||||
return field
|
||||
elif transform is FieldTransform.LENGTH:
|
||||
return f"length({field})"
|
||||
raise ValueError(f"Unsupported transform type {transform}")
|
||||
|
||||
def metric_sql(self, assertion: FieldValuesAssertion) -> str:
|
||||
"""
|
||||
Note that this applies negative operator in order to check whether or not
|
||||
number of invalid value rows are less than configured failThreshold.
|
||||
|
||||
Args:
|
||||
assertion (FieldValuesAssertion): _description_
|
||||
|
||||
Returns:
|
||||
str: _description_
|
||||
"""
|
||||
entity_name = ".".join(get_entity_name(assertion))
|
||||
|
||||
dataset_filter = (
|
||||
assertion.filters.sql
|
||||
if assertion.filters and assertion.filters.sql
|
||||
else None
|
||||
)
|
||||
where_clause = self._setup_where_clause(
|
||||
[
|
||||
dataset_filter,
|
||||
f"{assertion.field} is not null" if assertion.exclude_nulls else None,
|
||||
]
|
||||
)
|
||||
transformed_field = self._setup_field_transform(
|
||||
assertion.field, assertion.field_transform
|
||||
)
|
||||
# this sql would return boolean value for each table row. 1 if fail and 0 if pass.
|
||||
sql = self.values_metric_sql(
|
||||
assertion.operator, entity_name, transformed_field, where_clause
|
||||
)
|
||||
|
||||
# metric would be number of failing rows OR percentage of failing rows.
|
||||
if assertion.failure_threshold.type == "count":
|
||||
return f"select sum($1) as metric from ({sql})"
|
||||
else: # percentage
|
||||
return f"select sum($1)/count(*) as metric from ({sql})"
|
||||
@ -0,0 +1,68 @@
|
||||
from functools import singledispatchmethod
|
||||
|
||||
from datahub.api.entities.assertion.assertion_operator import (
|
||||
BetweenOperator,
|
||||
EqualToOperator,
|
||||
GreaterThanOperator,
|
||||
GreaterThanOrEqualToOperator,
|
||||
IsFalseOperator,
|
||||
IsNullOperator,
|
||||
IsTrueOperator,
|
||||
LessThanOperator,
|
||||
LessThanOrEqualToOperator,
|
||||
NotNullOperator,
|
||||
Operators,
|
||||
)
|
||||
|
||||
|
||||
class SnowflakeMetricEvalOperatorSQLGenerator:
|
||||
@singledispatchmethod
|
||||
def operator_sql(self, operators: Operators, metric_sql: str) -> str:
|
||||
"""
|
||||
Generates Operator SQL that applies operator on `metric`
|
||||
and returns a numeric boolean value 1 if PASS, 0 if FAIL
|
||||
|
||||
"""
|
||||
raise ValueError(f"Unsupported metric operator type {type(operators)} ")
|
||||
|
||||
@operator_sql.register
|
||||
def _(self, operators: EqualToOperator, metric_sql: str) -> str:
|
||||
return f"select case when metric={operators.value} then 1 else 0 end from ({metric_sql})"
|
||||
|
||||
@operator_sql.register
|
||||
def _(self, operators: BetweenOperator, metric_sql: str) -> str:
|
||||
return f"select case when metric between {operators.min} and {operators.max} then 1 else 0 end from ({metric_sql})"
|
||||
|
||||
@operator_sql.register
|
||||
def _(self, operators: LessThanOperator, metric_sql: str) -> str:
|
||||
return f"select case when metric < {operators.value} then 1 else 0 end from ({metric_sql})"
|
||||
|
||||
@operator_sql.register
|
||||
def _(self, operators: LessThanOrEqualToOperator, metric_sql: str) -> str:
|
||||
return f"select case when metric <= {operators.value} then 1 else 0 end from ({metric_sql})"
|
||||
|
||||
@operator_sql.register
|
||||
def _(self, operators: GreaterThanOperator, metric_sql: str) -> str:
|
||||
return f"select case when metric > {operators.value} then 1 else 0 end from ({metric_sql})"
|
||||
|
||||
@operator_sql.register
|
||||
def _(self, operators: GreaterThanOrEqualToOperator, metric_sql: str) -> str:
|
||||
return f"select case when metric >= {operators.value} then 1 else 0 end from ({metric_sql})"
|
||||
|
||||
@operator_sql.register
|
||||
def _(self, operators: NotNullOperator, metric_sql: str) -> str:
|
||||
return (
|
||||
f"select case when metric is not null then 1 else 0 end from ({metric_sql})"
|
||||
)
|
||||
|
||||
@operator_sql.register
|
||||
def _(self, operators: IsNullOperator, metric_sql: str) -> str:
|
||||
return f"select case when metric is null then 1 else 0 end from ({metric_sql})"
|
||||
|
||||
@operator_sql.register
|
||||
def _(self, operators: IsTrueOperator, metric_sql: str) -> str:
|
||||
return f"select case when metric then 1 else 0 end from ({metric_sql})"
|
||||
|
||||
@operator_sql.register
|
||||
def _(self, operators: IsFalseOperator, metric_sql: str) -> str:
|
||||
return f"select case when not metric then 1 else 0 end from ({metric_sql})"
|
||||
@ -0,0 +1,97 @@
|
||||
from dataclasses import dataclass
|
||||
from functools import singledispatchmethod
|
||||
|
||||
from datahub.api.entities.assertion.assertion import BaseEntityAssertion
|
||||
from datahub.api.entities.assertion.field_assertion import (
|
||||
FieldMetricAssertion,
|
||||
FieldValuesAssertion,
|
||||
)
|
||||
from datahub.api.entities.assertion.freshness_assertion import (
|
||||
FixedIntervalFreshnessAssertion,
|
||||
FreshnessSourceType,
|
||||
)
|
||||
from datahub.api.entities.assertion.sql_assertion import (
|
||||
SqlMetricAssertion,
|
||||
SqlMetricChangeAssertion,
|
||||
)
|
||||
from datahub.api.entities.assertion.volume_assertion import (
|
||||
RowCountChangeVolumeAssertion,
|
||||
RowCountTotalVolumeAssertion,
|
||||
)
|
||||
from datahub.integrations.assertion.common import get_entity_name
|
||||
from datahub.integrations.assertion.snowflake.field_metric_sql_generator import (
|
||||
SnowflakeFieldMetricSQLGenerator,
|
||||
)
|
||||
from datahub.integrations.assertion.snowflake.field_values_metric_sql_generator import (
|
||||
SnowflakeFieldValuesMetricSQLGenerator,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class SnowflakeMetricSQLGenerator:
|
||||
field_metric_sql_generator: SnowflakeFieldMetricSQLGenerator
|
||||
field_values_metric_sql_generator: SnowflakeFieldValuesMetricSQLGenerator
|
||||
|
||||
@singledispatchmethod
|
||||
def metric_sql(
|
||||
self,
|
||||
assertion: BaseEntityAssertion,
|
||||
) -> str:
|
||||
"""Generates Metric SQL that typically returns a numeric metric"""
|
||||
raise ValueError(f"Unsupported assertion type {type(assertion)} ")
|
||||
|
||||
@metric_sql.register
|
||||
def _(self, assertion: RowCountChangeVolumeAssertion) -> str:
|
||||
raise ValueError(f"Unsupported assertion type {type(assertion)} ")
|
||||
|
||||
@metric_sql.register
|
||||
def _(self, assertion: SqlMetricChangeAssertion) -> str:
|
||||
raise ValueError(f"Unsupported assertion type {type(assertion)} ")
|
||||
|
||||
@metric_sql.register
|
||||
def _(self, assertion: FixedIntervalFreshnessAssertion) -> str:
|
||||
entity_name = ".".join(get_entity_name(assertion))
|
||||
if assertion.filters and assertion.filters.sql:
|
||||
where_clause = f"where {assertion.filters.sql}"
|
||||
else:
|
||||
where_clause = ""
|
||||
|
||||
if (
|
||||
assertion.source_type == FreshnessSourceType.LAST_MODIFIED_COLUMN
|
||||
and assertion.last_modified_field
|
||||
):
|
||||
return f"""select timediff(
|
||||
second,
|
||||
max({assertion.last_modified_field}::TIMESTAMP_LTZ),
|
||||
SNOWFLAKE.CORE.DATA_METRIC_SCHEDULED_TIME()
|
||||
) as metric from {entity_name} {where_clause}"""
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Unsupported freshness source type {assertion.source_type} "
|
||||
)
|
||||
|
||||
@metric_sql.register
|
||||
def _(self, assertion: RowCountTotalVolumeAssertion) -> str:
|
||||
|
||||
# Can not use information schema here due to error -
|
||||
# Data metric function body cannot refer to the non-deterministic function 'CURRENT_DATABASE_MAIN_METASTORE_ID'.
|
||||
|
||||
entity_name = ".".join(get_entity_name(assertion))
|
||||
if assertion.filters and assertion.filters.sql:
|
||||
where_clause = f"where {assertion.filters.sql}"
|
||||
else:
|
||||
where_clause = ""
|
||||
return f"select count(*) as metric from {entity_name} {where_clause}"
|
||||
|
||||
@metric_sql.register
|
||||
def _(self, assertion: SqlMetricAssertion) -> str:
|
||||
return f"select $1 as metric from ({assertion.statement})"
|
||||
|
||||
@metric_sql.register
|
||||
def _(self, assertion: FieldMetricAssertion) -> str:
|
||||
sql = self.field_metric_sql_generator.metric_sql(assertion)
|
||||
return f"select $1 as metric from ({sql})"
|
||||
|
||||
@metric_sql.register
|
||||
def _(self, assertion: FieldValuesAssertion) -> str:
|
||||
return self.field_values_metric_sql_generator.metric_sql(assertion)
|
||||
@ -0,0 +1,76 @@
|
||||
version: 1
|
||||
namespace: test-config-id-1
|
||||
assertions:
|
||||
# Freshness Assertion
|
||||
- entity: urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.public.test_assertions_all_times,PROD)
|
||||
type: freshness
|
||||
lookback_interval: "1 hour"
|
||||
last_modified_field: col_timestamp
|
||||
schedule:
|
||||
type: cron
|
||||
cron: 0 * * * *
|
||||
meta:
|
||||
entity_qualified_name: TEST_DB.PUBLIC.TEST_ASSERTIONS_ALL_TIMES
|
||||
entity_schema:
|
||||
- col: col_date
|
||||
native_type: DATE
|
||||
# Volume Assertion
|
||||
- type: volume
|
||||
entity: urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.public.test_assertions_all_times,PROD)
|
||||
metric: row_count
|
||||
condition:
|
||||
type: less_than_or_equal_to
|
||||
value: 1000
|
||||
schedule:
|
||||
type: cron
|
||||
cron: 0 * * * *
|
||||
meta:
|
||||
entity_qualified_name: TEST_DB.PUBLIC.TEST_ASSERTIONS_ALL_TIMES
|
||||
entity_schema:
|
||||
- col: col_date
|
||||
native_type: DATE
|
||||
# Field Metric Assertion
|
||||
- type: field
|
||||
entity: urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.public.test_assertions_all_times,PROD)
|
||||
field: col_date
|
||||
metric: null_count
|
||||
condition:
|
||||
type: equal_to
|
||||
value: 0
|
||||
schedule:
|
||||
type: cron
|
||||
cron: 0 * * * *
|
||||
meta:
|
||||
entity_qualified_name: TEST_DB.PUBLIC.TEST_ASSERTIONS_ALL_TIMES
|
||||
entity_schema:
|
||||
- col: col_date
|
||||
native_type: DATE
|
||||
# Field Value Assertion
|
||||
- type: field
|
||||
entity: urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.public.purchase_event,PROD)
|
||||
field: quantity
|
||||
condition:
|
||||
type: between
|
||||
min: 0
|
||||
max: 10
|
||||
schedule:
|
||||
type: on_table_change
|
||||
meta:
|
||||
entity_qualified_name: TEST_DB.PUBLIC.PURCHASE_EVENT
|
||||
entity_schema:
|
||||
- col: quantity
|
||||
native_type: FLOAT
|
||||
# Custom SQL Metric Assertion
|
||||
- type: sql
|
||||
entity: urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.public.purchase_event,PROD)
|
||||
statement: select mode(quantity) from test_db.public.purchase_event
|
||||
condition:
|
||||
type: equal_to
|
||||
value: 5
|
||||
schedule:
|
||||
type: on_table_change
|
||||
meta:
|
||||
entity_qualified_name: TEST_DB.PUBLIC.PURCHASE_EVENT
|
||||
entity_schema:
|
||||
- col: quantity
|
||||
native_type: FLOAT
|
||||
@ -0,0 +1,13 @@
|
||||
from datahub.api.entities.assertion.assertion_config_spec import AssertionsConfigSpec
|
||||
|
||||
|
||||
def test_assertion_config_spec_parses_correct_type(pytestconfig):
|
||||
config_file = (
|
||||
pytestconfig.rootpath
|
||||
/ "tests/unit/api/entities/assertion/test_assertion_config.yml"
|
||||
)
|
||||
|
||||
config_spec = AssertionsConfigSpec.from_yaml(config_file)
|
||||
assert config_spec.version == 1
|
||||
assert config_spec.id == "test-config-id-1"
|
||||
assert len(config_spec.assertions) == 5
|
||||
@ -0,0 +1,35 @@
|
||||
|
||||
-- Start of Assertion 025cce4dd4123c0f007908011a9c64d7
|
||||
|
||||
ALTER TABLE TEST_DB.PUBLIC.TEST_ASSERTIONS_ALL_TIMES SET DATA_METRIC_SCHEDULE = 'USING CRON 0 * * * * UTC';
|
||||
ALTER TABLE TEST_DB.PUBLIC.TEST_ASSERTIONS_ALL_TIMES ADD DATA METRIC FUNCTION test_db.datahub_dmfs.datahub__025cce4dd4123c0f007908011a9c64d7 ON (col_date);
|
||||
|
||||
-- End of Assertion 025cce4dd4123c0f007908011a9c64d7
|
||||
|
||||
-- Start of Assertion 5c32eef47bd763fece7d21c7cbf6c659
|
||||
|
||||
ALTER TABLE TEST_DB.PUBLIC.TEST_ASSERTIONS_ALL_TIMES SET DATA_METRIC_SCHEDULE = 'USING CRON 0 * * * * UTC';
|
||||
ALTER TABLE TEST_DB.PUBLIC.TEST_ASSERTIONS_ALL_TIMES ADD DATA METRIC FUNCTION test_db.datahub_dmfs.datahub__5c32eef47bd763fece7d21c7cbf6c659 ON (col_date);
|
||||
|
||||
-- End of Assertion 5c32eef47bd763fece7d21c7cbf6c659
|
||||
|
||||
-- Start of Assertion 04be4145bd8de10bed3dfcb0cee57842
|
||||
|
||||
ALTER TABLE TEST_DB.PUBLIC.TEST_ASSERTIONS_ALL_TIMES SET DATA_METRIC_SCHEDULE = 'USING CRON 0 * * * * UTC';
|
||||
ALTER TABLE TEST_DB.PUBLIC.TEST_ASSERTIONS_ALL_TIMES ADD DATA METRIC FUNCTION test_db.datahub_dmfs.datahub__04be4145bd8de10bed3dfcb0cee57842 ON (col_date);
|
||||
|
||||
-- End of Assertion 04be4145bd8de10bed3dfcb0cee57842
|
||||
|
||||
-- Start of Assertion b065942d2bca8a4dbe90cc3ec2d9ca9f
|
||||
|
||||
ALTER TABLE TEST_DB.PUBLIC.PURCHASE_EVENT SET DATA_METRIC_SCHEDULE = 'TRIGGER_ON_CHANGES';
|
||||
ALTER TABLE TEST_DB.PUBLIC.PURCHASE_EVENT ADD DATA METRIC FUNCTION test_db.datahub_dmfs.datahub__b065942d2bca8a4dbe90cc3ec2d9ca9f ON (quantity);
|
||||
|
||||
-- End of Assertion b065942d2bca8a4dbe90cc3ec2d9ca9f
|
||||
|
||||
-- Start of Assertion 170dbd53f28eedbbaba52ebbf189f6b1
|
||||
|
||||
ALTER TABLE TEST_DB.PUBLIC.PURCHASE_EVENT SET DATA_METRIC_SCHEDULE = 'TRIGGER_ON_CHANGES';
|
||||
ALTER TABLE TEST_DB.PUBLIC.PURCHASE_EVENT ADD DATA METRIC FUNCTION test_db.datahub_dmfs.datahub__170dbd53f28eedbbaba52ebbf189f6b1 ON (quantity);
|
||||
|
||||
-- End of Assertion 170dbd53f28eedbbaba52ebbf189f6b1
|
||||
@ -0,0 +1,71 @@
|
||||
|
||||
-- Start of Assertion 025cce4dd4123c0f007908011a9c64d7
|
||||
|
||||
CREATE or REPLACE DATA METRIC FUNCTION
|
||||
test_db.datahub_dmfs.datahub__025cce4dd4123c0f007908011a9c64d7 (ARGT TABLE(col_date DATE))
|
||||
RETURNS NUMBER
|
||||
COMMENT = 'Created via DataHub for assertion urn:li:assertion:025cce4dd4123c0f007908011a9c64d7 of type freshness'
|
||||
AS
|
||||
$$
|
||||
select case when metric <= 3600 then 1 else 0 end from (select timediff(
|
||||
second,
|
||||
max(col_timestamp::TIMESTAMP_LTZ),
|
||||
SNOWFLAKE.CORE.DATA_METRIC_SCHEDULED_TIME()
|
||||
) as metric from TEST_DB.PUBLIC.TEST_ASSERTIONS_ALL_TIMES )
|
||||
$$;
|
||||
|
||||
-- End of Assertion 025cce4dd4123c0f007908011a9c64d7
|
||||
|
||||
-- Start of Assertion 5c32eef47bd763fece7d21c7cbf6c659
|
||||
|
||||
CREATE or REPLACE DATA METRIC FUNCTION
|
||||
test_db.datahub_dmfs.datahub__5c32eef47bd763fece7d21c7cbf6c659 (ARGT TABLE(col_date DATE))
|
||||
RETURNS NUMBER
|
||||
COMMENT = 'Created via DataHub for assertion urn:li:assertion:5c32eef47bd763fece7d21c7cbf6c659 of type volume'
|
||||
AS
|
||||
$$
|
||||
select case when metric <= 1000 then 1 else 0 end from (select count(*) as metric from TEST_DB.PUBLIC.TEST_ASSERTIONS_ALL_TIMES )
|
||||
$$;
|
||||
|
||||
-- End of Assertion 5c32eef47bd763fece7d21c7cbf6c659
|
||||
|
||||
-- Start of Assertion 04be4145bd8de10bed3dfcb0cee57842
|
||||
|
||||
CREATE or REPLACE DATA METRIC FUNCTION
|
||||
test_db.datahub_dmfs.datahub__04be4145bd8de10bed3dfcb0cee57842 (ARGT TABLE(col_date DATE))
|
||||
RETURNS NUMBER
|
||||
COMMENT = 'Created via DataHub for assertion urn:li:assertion:04be4145bd8de10bed3dfcb0cee57842 of type field'
|
||||
AS
|
||||
$$
|
||||
select case when metric=0 then 1 else 0 end from (select $1 as metric from (select count(*)
|
||||
from TEST_DB.PUBLIC.TEST_ASSERTIONS_ALL_TIMES where col_date is null))
|
||||
$$;
|
||||
|
||||
-- End of Assertion 04be4145bd8de10bed3dfcb0cee57842
|
||||
|
||||
-- Start of Assertion b065942d2bca8a4dbe90cc3ec2d9ca9f
|
||||
|
||||
CREATE or REPLACE DATA METRIC FUNCTION
|
||||
test_db.datahub_dmfs.datahub__b065942d2bca8a4dbe90cc3ec2d9ca9f (ARGT TABLE(quantity FLOAT))
|
||||
RETURNS NUMBER
|
||||
COMMENT = 'Created via DataHub for assertion urn:li:assertion:b065942d2bca8a4dbe90cc3ec2d9ca9f of type field'
|
||||
AS
|
||||
$$
|
||||
select case when metric <= 0 then 1 else 0 end from (select sum($1) as metric from (select case when quantity between 0 and 10 then 0 else 1 end
|
||||
from TEST_DB.PUBLIC.PURCHASE_EVENT where quantity is not null))
|
||||
$$;
|
||||
|
||||
-- End of Assertion b065942d2bca8a4dbe90cc3ec2d9ca9f
|
||||
|
||||
-- Start of Assertion 170dbd53f28eedbbaba52ebbf189f6b1
|
||||
|
||||
CREATE or REPLACE DATA METRIC FUNCTION
|
||||
test_db.datahub_dmfs.datahub__170dbd53f28eedbbaba52ebbf189f6b1 (ARGT TABLE(quantity FLOAT))
|
||||
RETURNS NUMBER
|
||||
COMMENT = 'Created via DataHub for assertion urn:li:assertion:170dbd53f28eedbbaba52ebbf189f6b1 of type sql'
|
||||
AS
|
||||
$$
|
||||
select case when metric=5 then 1 else 0 end from (select $1 as metric from (select mode(quantity) from test_db.public.purchase_event))
|
||||
$$;
|
||||
|
||||
-- End of Assertion 170dbd53f28eedbbaba52ebbf189f6b1
|
||||
42
metadata-ingestion/tests/unit/cli/assertion/test_compile.py
Normal file
42
metadata-ingestion/tests/unit/cli/assertion/test_compile.py
Normal file
@ -0,0 +1,42 @@
|
||||
import filecmp
|
||||
import os
|
||||
|
||||
from datahub.integrations.assertion.snowflake.compiler import (
|
||||
DMF_ASSOCIATIONS_FILE_NAME,
|
||||
DMF_DEFINITIONS_FILE_NAME,
|
||||
)
|
||||
from tests.test_helpers.click_helpers import run_datahub_cmd
|
||||
|
||||
|
||||
def test_compile_assertion_config_spec_for_snowflake(pytestconfig, tmp_path):
|
||||
config_file = (
|
||||
pytestconfig.rootpath
|
||||
/ "tests/unit/api/entities/assertion/test_assertion_config.yml"
|
||||
).resolve()
|
||||
|
||||
golden_file_path = pytestconfig.rootpath / "tests/unit/cli/assertion/"
|
||||
run_datahub_cmd(
|
||||
[
|
||||
"assertions",
|
||||
"compile",
|
||||
"-f",
|
||||
f"{config_file}",
|
||||
"-p",
|
||||
"snowflake",
|
||||
"-x",
|
||||
"DMF_SCHEMA=test_db.datahub_dmfs",
|
||||
"-o",
|
||||
tmp_path,
|
||||
],
|
||||
)
|
||||
|
||||
output_file_names = [
|
||||
DMF_DEFINITIONS_FILE_NAME,
|
||||
DMF_ASSOCIATIONS_FILE_NAME,
|
||||
]
|
||||
|
||||
for file_name in output_file_names:
|
||||
assert os.path.exists(tmp_path / file_name)
|
||||
assert filecmp.cmp(
|
||||
golden_file_path / file_name, tmp_path / file_name
|
||||
), f"{file_name} is not as expected"
|
||||
@ -2,6 +2,7 @@ namespace com.linkedin.assertion
|
||||
|
||||
import com.linkedin.common.CustomProperties
|
||||
import com.linkedin.common.ExternalReference
|
||||
import com.linkedin.common.AuditStamp
|
||||
|
||||
/**
|
||||
* Information about an assertion
|
||||
@ -66,10 +67,15 @@ record AssertionInfo includes CustomProperties, ExternalReference {
|
||||
volumeAssertion: optional VolumeAssertionInfo
|
||||
|
||||
/**
|
||||
* A SQL Assertion definition. This field is populated when the type is SQL.
|
||||
* A SQL Assertion definition. This field is populated when the type is SQL.
|
||||
*/
|
||||
sqlAssertion: optional SqlAssertionInfo
|
||||
|
||||
/**
|
||||
* A Field Assertion definition. This field is populated when the type is FIELD.
|
||||
*/
|
||||
fieldAssertion: optional FieldAssertionInfo
|
||||
|
||||
/**
|
||||
* An schema Assertion definition. This field is populated when the type is DATA_SCHEMA
|
||||
*/
|
||||
@ -83,6 +89,12 @@ record AssertionInfo includes CustomProperties, ExternalReference {
|
||||
*/
|
||||
source: optional AssertionSource
|
||||
|
||||
/**
|
||||
* The time at which the assertion was last updated and the actor who updated it.
|
||||
* This field is only present for Native assertions updated after this field was introduced.
|
||||
*/
|
||||
lastUpdated: optional AuditStamp
|
||||
|
||||
/**
|
||||
* An optional human-readable description of the assertion
|
||||
*/
|
||||
|
||||
@ -33,6 +33,14 @@ record AssertionResultError {
|
||||
*/
|
||||
UNSUPPORTED_PLATFORM
|
||||
/**
|
||||
* Error while executing a custom SQL assertion
|
||||
*/
|
||||
CUSTOM_SQL_ERROR
|
||||
/**
|
||||
* Error while executing a field assertion
|
||||
*/
|
||||
FIELD_ASSERTION_ERROR
|
||||
/**
|
||||
* Unknown error
|
||||
*/
|
||||
UNKNOWN_ERROR
|
||||
@ -42,4 +50,4 @@ record AssertionResultError {
|
||||
* Additional metadata depending on the type of error
|
||||
*/
|
||||
properties: optional map[string, string]
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,5 +1,7 @@
|
||||
namespace com.linkedin.assertion
|
||||
|
||||
import com.linkedin.common.AuditStamp
|
||||
|
||||
/**
|
||||
* The source of an assertion
|
||||
*/
|
||||
@ -24,4 +26,10 @@ record AssertionSource {
|
||||
*/
|
||||
INFERRED
|
||||
}
|
||||
|
||||
/**
|
||||
* The time at which the assertion was initially created and the author who created it.
|
||||
* This field is only present for Native assertions created after this field was introduced.
|
||||
*/
|
||||
created: optional AuditStamp
|
||||
}
|
||||
@ -34,6 +34,16 @@ enum AssertionStdOperator {
|
||||
*/
|
||||
EQUAL_TO
|
||||
|
||||
/**
|
||||
* Value being asserted is not equal to value. Requires 'value' parameter.
|
||||
*/
|
||||
NOT_EQUAL_TO
|
||||
|
||||
/**
|
||||
* Value being asserted is null. Requires no parameters.
|
||||
*/
|
||||
NULL
|
||||
|
||||
/**
|
||||
* Value being asserted is not null. Requires no parameters.
|
||||
*/
|
||||
@ -69,6 +79,16 @@ enum AssertionStdOperator {
|
||||
*/
|
||||
NOT_IN
|
||||
|
||||
/**
|
||||
* Value being asserted is true. Requires no parameters.
|
||||
*/
|
||||
IS_TRUE
|
||||
|
||||
/**
|
||||
* Value being asserted is false. Requires no parameters.
|
||||
*/
|
||||
IS_FALSE
|
||||
|
||||
/**
|
||||
* Other
|
||||
*/
|
||||
|
||||
@ -13,10 +13,29 @@ record AssertionStdParameter {
|
||||
* The type of the parameter
|
||||
*/
|
||||
type: enum AssertionStdParameterType {
|
||||
/**
|
||||
* A string value
|
||||
*/
|
||||
STRING
|
||||
|
||||
/**
|
||||
* A numeric value
|
||||
*/
|
||||
NUMBER
|
||||
|
||||
/**
|
||||
* A list of values. When used, value should be formatted as a serialized JSON array.
|
||||
*/
|
||||
LIST
|
||||
|
||||
/**
|
||||
* A set of values. When used, value should be formatted as a serialized JSON array.
|
||||
*/
|
||||
SET
|
||||
|
||||
/**
|
||||
* A value of unknown type
|
||||
*/
|
||||
UNKNOWN
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,57 @@
|
||||
namespace com.linkedin.assertion
|
||||
|
||||
import com.linkedin.common.Urn
|
||||
import com.linkedin.dataset.DatasetFilter
|
||||
|
||||
/**
|
||||
* Attributes defining a Field Assertion.
|
||||
**/
|
||||
record FieldAssertionInfo {
|
||||
/**
|
||||
* The type of the field assertion being monitored.
|
||||
*/
|
||||
@Searchable = {}
|
||||
type: enum FieldAssertionType {
|
||||
/**
|
||||
* An assertion used to validate the values contained with a field / column given a set of rows.
|
||||
*/
|
||||
FIELD_VALUES
|
||||
/**
|
||||
* An assertion used to validate the value of a common field / column metric (e.g. aggregation) such as null count + percentage,
|
||||
* min, max, median, and more.
|
||||
*/
|
||||
FIELD_METRIC
|
||||
}
|
||||
|
||||
/**
|
||||
* The entity targeted by this Field check.
|
||||
*/
|
||||
@Searchable = {
|
||||
"fieldType": "URN"
|
||||
}
|
||||
@Relationship = {
|
||||
"name": "Asserts",
|
||||
"entityTypes": [ "dataset" ]
|
||||
}
|
||||
entity: Urn
|
||||
|
||||
/**
|
||||
* The definition of an assertion that validates individual values of a field / column for a set of rows.
|
||||
* This type of assertion verifies that each column value meets a particular requirement.
|
||||
*/
|
||||
fieldValuesAssertion: optional FieldValuesAssertion
|
||||
|
||||
/**
|
||||
* The definition of an assertion that validates a common metric obtained about a field / column for a set of rows.
|
||||
* This type of assertion verifies that the value of a high-level metric obtained by aggregating over a column meets
|
||||
* expectations
|
||||
*/
|
||||
fieldMetricAssertion: optional FieldMetricAssertion
|
||||
|
||||
/**
|
||||
* A definition of the specific filters that should be applied, when performing monitoring.
|
||||
* If not provided, there is no filter, and the full table is under consideration.
|
||||
* If using DataHub Dataset Profiles as the assertion source type, the value of this field will be ignored.
|
||||
*/
|
||||
filter: optional DatasetFilter
|
||||
}
|
||||
@ -0,0 +1,39 @@
|
||||
namespace com.linkedin.assertion
|
||||
|
||||
import com.linkedin.schema.SchemaFieldSpec
|
||||
|
||||
/**
|
||||
* Attributes defining a field metric assertion, which asserts an expectation against
|
||||
* a common metric derived from the set of field / column values, for example:
|
||||
* max, min, median, null count, null percentage, unique count, unique percentage, and more.
|
||||
*/
|
||||
record FieldMetricAssertion {
|
||||
/**
|
||||
* The field under evaluation
|
||||
*/
|
||||
@Searchable = {
|
||||
"/path": {
|
||||
"fieldName": "fieldPath"
|
||||
}
|
||||
}
|
||||
field: SchemaFieldSpec
|
||||
|
||||
/**
|
||||
* The specific metric to assert against. This is the value that
|
||||
* will be obtained by applying a standard operation, such as an aggregation,
|
||||
* to the selected field.
|
||||
*/
|
||||
metric: FieldMetricType
|
||||
|
||||
/**
|
||||
* The predicate to evaluate against the metric for the field / column.
|
||||
* Depending on the operator, parameters may be required in order to successfully
|
||||
* evaluate the assertion against the metric value.
|
||||
*/
|
||||
operator: AssertionStdOperator
|
||||
|
||||
/**
|
||||
* Standard parameters required for the assertion. e.g. min_value, max_value, value, columns
|
||||
*/
|
||||
parameters: optional AssertionStdParameters
|
||||
}
|
||||
@ -0,0 +1,94 @@
|
||||
namespace com.linkedin.assertion
|
||||
|
||||
/**
|
||||
* A standard metric that can be derived from the set of values
|
||||
* for a specific field / column of a dataset / table.
|
||||
*/
|
||||
enum FieldMetricType {
|
||||
/**
|
||||
* The number of unique values found in the column value set
|
||||
*/
|
||||
UNIQUE_COUNT
|
||||
|
||||
/**
|
||||
* The percentage of unique values to total rows for the dataset
|
||||
*/
|
||||
UNIQUE_PERCENTAGE
|
||||
|
||||
/**
|
||||
* The number of null values found in the column value set
|
||||
*/
|
||||
NULL_COUNT
|
||||
|
||||
/**
|
||||
* The percentage of null values to total rows for the dataset
|
||||
*/
|
||||
NULL_PERCENTAGE
|
||||
|
||||
/**
|
||||
* The minimum value in the column set (applies to numeric columns)
|
||||
*/
|
||||
MIN
|
||||
|
||||
/**
|
||||
* The maximum value in the column set (applies to numeric columns)
|
||||
*/
|
||||
MAX
|
||||
|
||||
/**
|
||||
* The mean length found in the column set (applies to numeric columns)
|
||||
*/
|
||||
MEAN
|
||||
|
||||
/**
|
||||
* The median length found in the column set (applies to numeric columns)
|
||||
*/
|
||||
MEDIAN
|
||||
|
||||
/**
|
||||
* The stddev length found in the column set (applies to numeric columns)
|
||||
*/
|
||||
STDDEV
|
||||
|
||||
/**
|
||||
* The number of negative values found in the value set (applies to numeric columns)
|
||||
*/
|
||||
NEGATIVE_COUNT
|
||||
|
||||
/**
|
||||
* The percentage of negative values to total rows for the dataset (applies to numeric columns)
|
||||
*/
|
||||
NEGATIVE_PERCENTAGE
|
||||
|
||||
/**
|
||||
* The number of zero values found in the value set (applies to numeric columns)
|
||||
*/
|
||||
ZERO_COUNT
|
||||
|
||||
/**
|
||||
* The percentage of zero values to total rows for the dataset (applies to numeric columns)
|
||||
*/
|
||||
ZERO_PERCENTAGE
|
||||
|
||||
/**
|
||||
* The minimum length found in the column set (applies to string columns)
|
||||
*/
|
||||
MIN_LENGTH
|
||||
|
||||
/**
|
||||
* The maximum length found in the column set (applies to string columns)
|
||||
*/
|
||||
MAX_LENGTH
|
||||
|
||||
/**
|
||||
* The number of empty string values found in the value set (applies to string columns).
|
||||
* Note: This is a completely different metric different from NULL_COUNT!
|
||||
*/
|
||||
EMPTY_COUNT
|
||||
|
||||
/**
|
||||
* The percentage of empty string values to total rows for the dataset (applies to string columns)
|
||||
* Note: This is a completely different metric different from NULL_PERCENTAGE!
|
||||
*/
|
||||
EMPTY_PERCENTAGE
|
||||
}
|
||||
@ -0,0 +1,21 @@
|
||||
namespace com.linkedin.assertion
|
||||
|
||||
/**
|
||||
* Definition of a transform applied to the values of a column / field.
|
||||
* Note that the applicability of a field transform ultimately depends on the native type
|
||||
* of the field / column.
|
||||
*
|
||||
* Model has single field to permit extension.
|
||||
*/
|
||||
record FieldTransform {
|
||||
/**
|
||||
* The type of the field transform, e.g. the transformation
|
||||
* function / operator to apply.
|
||||
*/
|
||||
type: enum FieldTransformType {
|
||||
/**
|
||||
* Obtain the length of a string field / column (applicable to string types)
|
||||
*/
|
||||
LENGTH
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,83 @@
|
||||
namespace com.linkedin.assertion
|
||||
|
||||
import com.linkedin.schema.SchemaFieldSpec
|
||||
|
||||
/**
|
||||
* Attributes defining a field values assertion, which asserts that the values for a field / column
|
||||
* of a dataset / table matches a set of expectations.
|
||||
*
|
||||
* In other words, this type of assertion acts as a semantic constraint applied to fields for a specific column.
|
||||
*
|
||||
* TODO: We should display the "failed row count" to the user if the column fails the verification rules.
|
||||
* TODO: Determine whether we need an "operator" that can be applied to the field.
|
||||
*/
|
||||
record FieldValuesAssertion {
|
||||
/**
|
||||
* The field under evaluation
|
||||
*/
|
||||
@Searchable = {
|
||||
"/path": {
|
||||
"fieldName": "fieldPath"
|
||||
}
|
||||
}
|
||||
field: SchemaFieldSpec
|
||||
|
||||
/**
|
||||
* An optional transform to apply to field values
|
||||
* before evaluating the operator.
|
||||
*
|
||||
* If none is applied, the field value will be compared as is.
|
||||
*/
|
||||
transform: optional FieldTransform
|
||||
|
||||
/**
|
||||
* The predicate to evaluate against a single value of the field.
|
||||
* Depending on the operator, parameters may be required in order to successfully
|
||||
* evaluate the assertion against the field value.
|
||||
*/
|
||||
operator: AssertionStdOperator
|
||||
|
||||
/**
|
||||
* Standard parameters required for the assertion. e.g. min_value, max_value, value, columns
|
||||
*/
|
||||
parameters: optional AssertionStdParameters
|
||||
|
||||
/**
|
||||
* Additional customization about when the assertion
|
||||
* should be officially considered failing.
|
||||
*/
|
||||
failThreshold: record FieldValuesFailThreshold {
|
||||
|
||||
/**
|
||||
* The type of failure threshold. Either based on the number
|
||||
* of column values (rows) that fail the expectations, or the percentage
|
||||
* of the total rows under consideration.
|
||||
*/
|
||||
type: enum FieldValuesFailThresholdType {
|
||||
/*
|
||||
* The maximum number of column values (i.e. rows) that are allowed
|
||||
* to fail the defined expectations before the assertion officially fails.
|
||||
*/
|
||||
COUNT
|
||||
/*
|
||||
* The maximum percentage of rows that are allowed
|
||||
* to fail the defined column expectations before the assertion officially fails.
|
||||
*/
|
||||
PERCENTAGE
|
||||
} = "COUNT"
|
||||
|
||||
/**
|
||||
* By default this is 0, meaning that ALL column values (i.e. rows) must
|
||||
* meet the defined expectations.
|
||||
*/
|
||||
value: long = 0
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether to ignore or allow nulls when running the values assertion. (i.e.
|
||||
* consider only non-null values) using operators OTHER than the IS_NULL operator.
|
||||
*
|
||||
* Defaults to true, allowing null values.
|
||||
*/
|
||||
excludeNulls: boolean = true
|
||||
}
|
||||
@ -4,11 +4,13 @@ import com.linkedin.schema.SchemaFieldSpec
|
||||
|
||||
|
||||
/**
|
||||
* Lightweight spec used for referencing a particular schema field.
|
||||
**/
|
||||
* Lightweight spec used for referencing a particular schema field that is used to compute
|
||||
* a freshness signal or operation.
|
||||
* TODO: Since this is now leveraged across assertions & metrics / operations, we should consider moving this to a common package.
|
||||
*/
|
||||
record FreshnessFieldSpec includes SchemaFieldSpec {
|
||||
/**
|
||||
* The type of the field being used to verify the Freshness Assertion.
|
||||
* The type of the field being used to verify the Freshness of the asset.
|
||||
*/
|
||||
kind: optional FreshnessFieldKind
|
||||
}
|
||||
@ -25,5 +25,36 @@ record SchemaAssertionInfo {
|
||||
* Note that many of the fields of this model, especially those related to metadata (tags, terms)
|
||||
* will go unused in this context.
|
||||
*/
|
||||
schema: SchemaMetadata
|
||||
// @Relationship = {
|
||||
// "/foreignKeys/*/foreignFields/*": null,
|
||||
// "/foreignKeys/*/foreignDataset": null,
|
||||
// "/fields/*/globalTags/tags/*/tag": null,
|
||||
// "/fields/*/glossaryTerms/terms/*/urn": null
|
||||
// }
|
||||
// @Searchable = {
|
||||
// "/fields/*/fieldPath": null,
|
||||
// "/fields/*/description": null,
|
||||
// "/fields/*/label": null,
|
||||
// "/fields/*/globalTags/tags/*/tag": null,
|
||||
// "/fields/*/glossaryTerms/terms/*/urn": null
|
||||
// }
|
||||
schema: SchemaMetadata
|
||||
|
||||
/**
|
||||
* The required compatibility level for the schema assertion to pass.
|
||||
*/
|
||||
compatibility: optional enum SchemaAssertionCompatibility {
|
||||
/**
|
||||
* The actual schema must be exactly the same as the expected schema
|
||||
*/
|
||||
EXACT_MATCH,
|
||||
/**
|
||||
* The actual schema must be a superset of the expected schema
|
||||
*/
|
||||
SUPERSET,
|
||||
/**
|
||||
* The actual schema must be a subset of the expected schema
|
||||
*/
|
||||
SUBSET
|
||||
} = "EXACT_MATCH"
|
||||
}
|
||||
@ -8,7 +8,7 @@ import com.linkedin.dataset.DatasetFilter
|
||||
*/
|
||||
record VolumeAssertionInfo {
|
||||
/**
|
||||
* The type of the freshness assertion being monitored.
|
||||
* The type of the volume assertion being monitored.
|
||||
*/
|
||||
@Searchable = {}
|
||||
type: enum VolumeAssertionType {
|
||||
|
||||
@ -12,5 +12,9 @@ record DataQualityContract {
|
||||
* The assertion representing the Data Quality contract.
|
||||
* E.g. a table or column-level assertion.
|
||||
*/
|
||||
@Relationship = {
|
||||
"name": "IncludesDataQualityAssertion",
|
||||
"entityTypes": [ "assertion" ]
|
||||
}
|
||||
assertion: Urn
|
||||
}
|
||||
@ -9,5 +9,9 @@ record SchemaContract {
|
||||
/**
|
||||
* The assertion representing the schema contract.
|
||||
*/
|
||||
@Relationship = {
|
||||
"name": "IncludesSchemaAssertion",
|
||||
"entityTypes": [ "assertion" ]
|
||||
}
|
||||
assertion: Urn
|
||||
}
|
||||
|
||||
@ -22,6 +22,11 @@ record IncidentSource {
|
||||
* Manually created incident, via UI or API.
|
||||
*/
|
||||
MANUAL
|
||||
|
||||
/**
|
||||
* An assertion has failed, triggering the incident.
|
||||
*/
|
||||
ASSERTION_FAILURE
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@ -4,6 +4,36 @@ namespace com.linkedin.incident
|
||||
* A type of asset incident
|
||||
*/
|
||||
enum IncidentType {
|
||||
/**
|
||||
* An Freshness Assertion has failed, triggering the incident.
|
||||
* Raised on entities where assertions are configured to generate incidents.
|
||||
*/
|
||||
FRESHNESS
|
||||
|
||||
/**
|
||||
* An Volume Assertion has failed, triggering the incident.
|
||||
* Raised on entities where assertions are configured to generate incidents.
|
||||
*/
|
||||
VOLUME
|
||||
|
||||
/**
|
||||
* A Field Assertion has failed, triggering the incident.
|
||||
* Raised on entities where assertions are configured to generate incidents.
|
||||
*/
|
||||
FIELD
|
||||
|
||||
/**
|
||||
* A raw SQL-statement based assertion has failed, triggering the incident.
|
||||
* Raised on entities where assertions are configured to generate incidents.
|
||||
*/
|
||||
SQL
|
||||
|
||||
/**
|
||||
* A Data Schema assertion has failed, triggering the incident.
|
||||
* Raised on entities where assertions are configured to generate incidents.
|
||||
*/
|
||||
DATA_SCHEMA
|
||||
|
||||
/**
|
||||
* A misc. operational incident, e.g. failure to materialize a dataset.
|
||||
*/
|
||||
|
||||
@ -68,7 +68,7 @@ import org.springframework.context.annotation.Import;
|
||||
EntityRegistryFactory.class,
|
||||
DataHubTokenServiceFactory.class,
|
||||
GitVersionFactory.class,
|
||||
SiblingGraphServiceFactory.class
|
||||
SiblingGraphServiceFactory.class,
|
||||
})
|
||||
public class GraphQLEngineFactory {
|
||||
@Autowired
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user