From 7326bb9470dac3f11b6589fc43cd62bc3443a37c Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Thu, 13 Feb 2025 05:47:25 -0600 Subject: [PATCH] feat(urn-validation): Add UrnValidation PDL annotation (#12572) --- docs/modeling/extending-the-metadata-model.md | 22 ++ .../linkedin/metadata/models/AspectSpec.java | 11 + .../metadata/models/EntitySpecBuilder.java | 17 ++ .../models/UrnValidationFieldSpec.java | 14 + .../UrnValidationFieldSpecExtractor.java | 57 ++++ .../models/annotation/AnnotationUtils.java | 22 ++ .../annotation/UrnValidationAnnotation.java | 36 +++ .../PluginEntityRegistryLoaderTest.java | 1 + .../metadata/entity/EntityAspect.java | 1 + .../entity/ebean/batch/AspectsBatchImpl.java | 3 +- .../entity/ebean/batch/ChangeItemImpl.java | 2 +- .../entity/ebean/batch/DeleteItemImpl.java | 2 +- .../entity/validation/ValidationApiUtils.java | 151 +--------- .../aspect/utils/DefaultAspectsUtil.java | 2 +- .../validation/UrnAnnotationValidator.java | 150 ++++++++++ .../metadata/entity/EntityServiceImpl.java | 1 + .../linkedin/metadata/entity/EntityUtils.java | 2 +- .../UrnAnnotationValidatorTest.java | 278 ++++++++++++++++++ .../metadata/entity/EntityServiceTest.java | 1 + .../StructuredPropertyDefinition.pdl | 10 + .../SpringStandardPluginConfiguration.java | 23 +- .../v2/controller/EntityController.java | 2 +- .../v3/controller/EntityController.java | 2 +- metadata-utils/build.gradle | 1 + .../metadata/utils}/EntityApiUtils.java | 5 +- .../utils}/EntityRegistryUrnValidator.java | 3 +- .../utils}/RecordTemplateValidator.java | 2 +- .../metadata/utils/UrnValidationUtil.java | 259 ++++++++++++++++ .../metadata/utils/UrnValidationUtilTest.java | 47 ++- .../src/main/java/mock/MockAspectSpec.java | 3 + .../src/main/java/mock/MockEntitySpec.java | 1 + .../openapi/v3/structured_properties.json | 222 ++++++++++++++ 32 files changed, 1164 insertions(+), 189 deletions(-) create mode 100644 entity-registry/src/main/java/com/linkedin/metadata/models/UrnValidationFieldSpec.java create mode 100644 entity-registry/src/main/java/com/linkedin/metadata/models/UrnValidationFieldSpecExtractor.java create mode 100644 entity-registry/src/main/java/com/linkedin/metadata/models/annotation/UrnValidationAnnotation.java create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/aspect/validation/UrnAnnotationValidator.java create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/aspect/validation/UrnAnnotationValidatorTest.java rename {metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity => metadata-utils/src/main/java/com/linkedin/metadata/utils}/EntityApiUtils.java (91%) rename {metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/validation => metadata-utils/src/main/java/com/linkedin/metadata/utils}/EntityRegistryUrnValidator.java (97%) rename {metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/validation => metadata-utils/src/main/java/com/linkedin/metadata/utils}/RecordTemplateValidator.java (97%) create mode 100644 metadata-utils/src/main/java/com/linkedin/metadata/utils/UrnValidationUtil.java rename metadata-io/metadata-io-api/src/test/java/com/linkedin/metadata/entity/validation/ValidationApiUtilsTest.java => metadata-utils/src/test/java/com/linkedin/metadata/utils/UrnValidationUtilTest.java (75%) diff --git a/docs/modeling/extending-the-metadata-model.md b/docs/modeling/extending-the-metadata-model.md index b1a8655481..c58e047a6e 100644 --- a/docs/modeling/extending-the-metadata-model.md +++ b/docs/modeling/extending-the-metadata-model.md @@ -207,6 +207,7 @@ The Aspect has four key components: its properties, the @Aspect annotation, the the case of DashboardInfo, the `charts` field is an Array of Urns. The @Relationship annotation cannot be applied directly to an array of Urns. That’s why you see the use of an Annotation override (`"/*":`) to apply the @Relationship annotation to the Urn directly. Read more about overrides in the annotation docs further down on this page. +- **@UrnValidation**: This annotation can enforce constraints on Urn fields, including entity type restrictions and existence. After you create your Aspect, you need to attach to all the entities that it applies to. @@ -496,6 +497,27 @@ This annotation says that when we ingest an Entity with an Ownership Aspect, Dat between that entity and the CorpUser or CorpGroup who owns it. This will be queryable using the Relationships resource in both the forward and inverse directions. +#### @UrnValidation + +This annotation can be applied to Urn fields inside an aspect. The annotation can optionally perform one or more of the following: +- Enforce that the URN exists +- Enforce stricter URN validation +- Restrict the URN to specific entity types + +##### Example + +Using this example from StructuredPropertyDefinition, we are enforcing that the valueType URN must exist, +it must follow stricter Urn encoding logic, and it can only be of entity type `dataType`. + +``` + @UrnValidation = { + "exist": true, + "strict": true, + "entityTypes": [ "dataType" ], + } + valueType: Urn +``` + #### Annotating Collections & Annotation Overrides You will not always be able to apply annotations to a primitive field directly. This may be because the field is wrapped diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/AspectSpec.java b/entity-registry/src/main/java/com/linkedin/metadata/models/AspectSpec.java index a2ff81da56..62cc48ee65 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/AspectSpec.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/AspectSpec.java @@ -24,6 +24,7 @@ public class AspectSpec { private final Map _timeseriesFieldSpecs; private final Map _timeseriesFieldCollectionSpecs; private final Map _searchableRefFieldSpecs; + private final Map _urnValidationFieldSpecs; // Classpath & Pegasus-specific: Temporary. private final RecordDataSchema _schema; @@ -39,6 +40,7 @@ public class AspectSpec { @Nonnull final List timeseriesFieldSpecs, @Nonnull final List timeseriesFieldCollectionSpecs, @Nonnull final List searchableRefFieldSpecs, + @Nonnull final List urnValidationFieldSpecs, final RecordDataSchema schema, final Class aspectClass) { _aspectAnnotation = aspectAnnotation; @@ -76,6 +78,11 @@ public class AspectSpec { spec -> spec.getTimeseriesFieldCollectionAnnotation().getCollectionName(), spec -> spec, (val1, val2) -> val1)); + _urnValidationFieldSpecs = + urnValidationFieldSpecs.stream() + .collect( + Collectors.toMap( + spec -> spec.getPath().toString(), spec -> spec, (val1, val2) -> val1)); _schema = schema; _aspectClass = aspectClass; } @@ -112,6 +119,10 @@ public class AspectSpec { return _timeseriesFieldSpecs; } + public Map getUrnValidationFieldSpecMap() { + return _urnValidationFieldSpecs; + } + public Map getTimeseriesFieldCollectionSpecMap() { return _timeseriesFieldCollectionSpecs; } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/EntitySpecBuilder.java b/entity-registry/src/main/java/com/linkedin/metadata/models/EntitySpecBuilder.java index c79ea5de69..9f9f6f0ff7 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/EntitySpecBuilder.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/EntitySpecBuilder.java @@ -20,6 +20,7 @@ import com.linkedin.metadata.models.annotation.SearchableAnnotation; import com.linkedin.metadata.models.annotation.SearchableRefAnnotation; import com.linkedin.metadata.models.annotation.TimeseriesFieldAnnotation; import com.linkedin.metadata.models.annotation.TimeseriesFieldCollectionAnnotation; +import com.linkedin.metadata.models.annotation.UrnValidationAnnotation; import java.util.ArrayList; import java.util.Collections; import java.util.HashSet; @@ -48,6 +49,8 @@ public class EntitySpecBuilder { new PegasusSchemaAnnotationHandlerImpl(TimeseriesFieldAnnotation.ANNOTATION_NAME); public static SchemaAnnotationHandler _timeseriesFieldCollectionHandler = new PegasusSchemaAnnotationHandlerImpl(TimeseriesFieldCollectionAnnotation.ANNOTATION_NAME); + public static SchemaAnnotationHandler _urnValidationAnnotationHandler = + new PegasusSchemaAnnotationHandlerImpl(UrnValidationAnnotation.ANNOTATION_NAME); private final AnnotationExtractionMode _extractionMode; private final Set _entityNames = new HashSet<>(); @@ -226,6 +229,7 @@ public class EntitySpecBuilder { Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), + Collections.emptyList(), aspectRecordSchema, aspectClass); } @@ -299,6 +303,18 @@ public class EntitySpecBuilder { new DataSchemaRichContextTraverser(timeseriesFieldSpecExtractor); timeseriesFieldSpecTraverser.traverse(processedTimeseriesFieldResult.getResultSchema()); + // Extract UrnValidation aspects + final SchemaAnnotationProcessor.SchemaAnnotationProcessResult processedTimestampResult = + SchemaAnnotationProcessor.process( + Collections.singletonList(_urnValidationAnnotationHandler), + aspectRecordSchema, + new SchemaAnnotationProcessor.AnnotationProcessOption()); + final UrnValidationFieldSpecExtractor urnValidationFieldSpecExtractor = + new UrnValidationFieldSpecExtractor(); + final DataSchemaRichContextTraverser timestampFieldSpecTraverser = + new DataSchemaRichContextTraverser(urnValidationFieldSpecExtractor); + timestampFieldSpecTraverser.traverse(processedTimestampResult.getResultSchema()); + return new AspectSpec( aspectAnnotation, searchableFieldSpecExtractor.getSpecs(), @@ -307,6 +323,7 @@ public class EntitySpecBuilder { timeseriesFieldSpecExtractor.getTimeseriesFieldSpecs(), timeseriesFieldSpecExtractor.getTimeseriesFieldCollectionSpecs(), searchableRefFieldSpecExtractor.getSpecs(), + urnValidationFieldSpecExtractor.getUrnValidationFieldSpecs(), aspectRecordSchema, aspectClass); } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/UrnValidationFieldSpec.java b/entity-registry/src/main/java/com/linkedin/metadata/models/UrnValidationFieldSpec.java new file mode 100644 index 0000000000..b4bba0a8e8 --- /dev/null +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/UrnValidationFieldSpec.java @@ -0,0 +1,14 @@ +package com.linkedin.metadata.models; + +import com.linkedin.data.schema.DataSchema; +import com.linkedin.data.schema.PathSpec; +import com.linkedin.metadata.models.annotation.UrnValidationAnnotation; +import javax.annotation.Nonnull; +import lombok.Value; + +@Value +public class UrnValidationFieldSpec { + @Nonnull PathSpec path; + @Nonnull UrnValidationAnnotation urnValidationAnnotation; + @Nonnull DataSchema pegasusSchema; +} diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/UrnValidationFieldSpecExtractor.java b/entity-registry/src/main/java/com/linkedin/metadata/models/UrnValidationFieldSpecExtractor.java new file mode 100644 index 0000000000..7c5391b7b5 --- /dev/null +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/UrnValidationFieldSpecExtractor.java @@ -0,0 +1,57 @@ +package com.linkedin.metadata.models; + +import com.linkedin.data.schema.DataSchema; +import com.linkedin.data.schema.DataSchemaTraverse; +import com.linkedin.data.schema.PathSpec; +import com.linkedin.data.schema.annotation.SchemaVisitor; +import com.linkedin.data.schema.annotation.SchemaVisitorTraversalResult; +import com.linkedin.data.schema.annotation.TraverserContext; +import com.linkedin.metadata.models.annotation.UrnValidationAnnotation; +import java.util.ArrayList; +import java.util.List; +import lombok.Getter; + +@Getter +public class UrnValidationFieldSpecExtractor implements SchemaVisitor { + private final List urnValidationFieldSpecs = new ArrayList<>(); + + @Override + public void callbackOnContext(TraverserContext context, DataSchemaTraverse.Order order) { + if (context.getEnclosingField() == null) { + return; + } + + if (DataSchemaTraverse.Order.PRE_ORDER.equals(order)) { + final DataSchema currentSchema = context.getCurrentSchema().getDereferencedDataSchema(); + final PathSpec path = new PathSpec(context.getSchemaPathSpec()); + + // Check for @UrnValidation annotation in primary properties + final Object urnValidationAnnotationObj = + context.getEnclosingField().getProperties().get(UrnValidationAnnotation.ANNOTATION_NAME); + + // Check if it's either explicitly annotated with @UrnValidation + if (urnValidationAnnotationObj != null) { + addUrnValidationFieldSpec(currentSchema, path, urnValidationAnnotationObj); + } + } + } + + private void addUrnValidationFieldSpec( + DataSchema currentSchema, PathSpec path, Object annotationObj) { + UrnValidationAnnotation annotation = + UrnValidationAnnotation.fromPegasusAnnotationObject( + annotationObj, FieldSpecUtils.getSchemaFieldName(path), path.toString()); + + urnValidationFieldSpecs.add(new UrnValidationFieldSpec(path, annotation, currentSchema)); + } + + @Override + public VisitorContext getInitialVisitorContext() { + return null; + } + + @Override + public SchemaVisitorTraversalResult getSchemaVisitorTraversalResult() { + return new SchemaVisitorTraversalResult(); + } +} diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/annotation/AnnotationUtils.java b/entity-registry/src/main/java/com/linkedin/metadata/models/annotation/AnnotationUtils.java index 7aa5be69a0..205ebd7d85 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/annotation/AnnotationUtils.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/annotation/AnnotationUtils.java @@ -1,5 +1,8 @@ package com.linkedin.metadata.models.annotation; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; import java.util.Map; import java.util.Optional; import lombok.experimental.UtilityClass; @@ -13,4 +16,23 @@ public class AnnotationUtils { } return Optional.empty(); } + + List getFieldList( + final Map fieldMap, final String fieldName, final Class itemType) { + Object value = fieldMap.get(fieldName); + if (!(value instanceof List)) { + return Collections.emptyList(); + } + + List list = (List) value; + List result = new ArrayList<>(); + + for (Object item : list) { + if (itemType.isInstance(item)) { + result.add(itemType.cast(item)); + } + } + + return Collections.unmodifiableList(result); + } } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/annotation/UrnValidationAnnotation.java b/entity-registry/src/main/java/com/linkedin/metadata/models/annotation/UrnValidationAnnotation.java new file mode 100644 index 0000000000..4e7d388cf8 --- /dev/null +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/annotation/UrnValidationAnnotation.java @@ -0,0 +1,36 @@ +package com.linkedin.metadata.models.annotation; + +import com.linkedin.metadata.models.ModelValidationException; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import javax.annotation.Nonnull; +import lombok.Value; + +@Value +public class UrnValidationAnnotation { + public static final String ANNOTATION_NAME = "UrnValidation"; + boolean exist; + boolean strict; + List entityTypes; + + @Nonnull + public static UrnValidationAnnotation fromPegasusAnnotationObject( + @Nonnull final Object annotationObj, + @Nonnull final String schemaFieldName, + @Nonnull final String context) { + if (!Map.class.isAssignableFrom(annotationObj.getClass())) { + throw new ModelValidationException( + String.format( + "Failed to validate @%s annotation declared at %s: Invalid value type provided (Expected Map)", + ANNOTATION_NAME, context)); + } + + Map map = (Map) annotationObj; + final Optional exist = AnnotationUtils.getField(map, "exist", Boolean.class); + final Optional strict = AnnotationUtils.getField(map, "strict", Boolean.class); + final List entityTypes = AnnotationUtils.getFieldList(map, "entityTypes", String.class); + + return new UrnValidationAnnotation(exist.orElse(true), strict.orElse(true), entityTypes); + } +} diff --git a/entity-registry/src/test/java/com/linkedin/metadata/models/registry/PluginEntityRegistryLoaderTest.java b/entity-registry/src/test/java/com/linkedin/metadata/models/registry/PluginEntityRegistryLoaderTest.java index 47c29405a7..e04b9022c6 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/models/registry/PluginEntityRegistryLoaderTest.java +++ b/entity-registry/src/test/java/com/linkedin/metadata/models/registry/PluginEntityRegistryLoaderTest.java @@ -100,6 +100,7 @@ public class PluginEntityRegistryLoaderTest { Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), + Collections.emptyList(), (RecordDataSchema) DataSchemaFactory.getInstance().getAspectSchema("datasetKey").get(), DataSchemaFactory.getInstance().getAspectClass("datasetKey").get()); diff --git a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/EntityAspect.java b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/EntityAspect.java index 2b67d5e92f..2b2f960ebb 100644 --- a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/EntityAspect.java +++ b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/EntityAspect.java @@ -9,6 +9,7 @@ import com.linkedin.entity.EnvelopedAspect; import com.linkedin.metadata.aspect.SystemAspect; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.metadata.utils.EntityApiUtils; import com.linkedin.mxe.GenericAspect; import com.linkedin.mxe.SystemMetadata; import java.sql.Timestamp; diff --git a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java index 9b993ab5fc..cf52615d44 100644 --- a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java +++ b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java @@ -13,6 +13,7 @@ import com.linkedin.metadata.aspect.batch.ChangeMCP; import com.linkedin.metadata.aspect.batch.MCPItem; import com.linkedin.metadata.aspect.plugins.hooks.MutationHook; import com.linkedin.metadata.aspect.plugins.validation.ValidationExceptionCollection; +import com.linkedin.metadata.entity.validation.ValidationException; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.mxe.MetadataChangeProposal; import com.linkedin.util.Pair; @@ -243,7 +244,7 @@ public class AspectsBatchImpl implements AspectsBatch { ValidationExceptionCollection exceptions = AspectsBatch.validateProposed(this.nonRepeatedItems, this.retrieverContext); if (!exceptions.isEmpty()) { - throw new IllegalArgumentException("Failed to validate MCP due to: " + exceptions); + throw new ValidationException("Failed to validate MCP due to: " + exceptions); } return new AspectsBatchImpl(this.items, this.nonRepeatedItems, this.retrieverContext); diff --git a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/ChangeItemImpl.java b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/ChangeItemImpl.java index 94f71ff089..f4ce017a79 100644 --- a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/ChangeItemImpl.java +++ b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/ChangeItemImpl.java @@ -14,11 +14,11 @@ import com.linkedin.metadata.aspect.batch.ChangeMCP; import com.linkedin.metadata.aspect.batch.MCPItem; import com.linkedin.metadata.aspect.patch.template.common.GenericPatchTemplate; import com.linkedin.metadata.entity.AspectUtils; -import com.linkedin.metadata.entity.EntityApiUtils; import com.linkedin.metadata.entity.EntityAspect; import com.linkedin.metadata.entity.validation.ValidationApiUtils; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.metadata.utils.EntityApiUtils; import com.linkedin.metadata.utils.EntityKeyUtils; import com.linkedin.metadata.utils.GenericRecordUtils; import com.linkedin.metadata.utils.SystemMetadataUtils; diff --git a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/DeleteItemImpl.java b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/DeleteItemImpl.java index 935227e55b..52fedb16bb 100644 --- a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/DeleteItemImpl.java +++ b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/DeleteItemImpl.java @@ -8,11 +8,11 @@ import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.aspect.SystemAspect; import com.linkedin.metadata.aspect.batch.BatchItem; import com.linkedin.metadata.aspect.batch.ChangeMCP; -import com.linkedin.metadata.entity.EntityApiUtils; import com.linkedin.metadata.entity.EntityAspect; import com.linkedin.metadata.entity.validation.ValidationApiUtils; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.metadata.utils.EntityApiUtils; import com.linkedin.mxe.MetadataChangeProposal; import com.linkedin.mxe.SystemMetadata; import java.util.Objects; diff --git a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/validation/ValidationApiUtils.java b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/validation/ValidationApiUtils.java index 5e1f09fcc6..f301be3115 100644 --- a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/validation/ValidationApiUtils.java +++ b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/validation/ValidationApiUtils.java @@ -1,24 +1,17 @@ package com.linkedin.metadata.entity.validation; import com.linkedin.common.urn.Urn; -import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.schema.validation.ValidationResult; import com.linkedin.data.template.RecordTemplate; -import com.linkedin.metadata.Constants; import com.linkedin.metadata.aspect.AspectRetriever; -import com.linkedin.metadata.entity.EntityApiUtils; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.registry.EntityRegistry; -import java.net.URISyntaxException; -import java.net.URLDecoder; -import java.net.URLEncoder; -import java.nio.charset.StandardCharsets; -import java.util.List; -import java.util.Set; +import com.linkedin.metadata.utils.EntityApiUtils; +import com.linkedin.metadata.utils.EntityRegistryUrnValidator; +import com.linkedin.metadata.utils.RecordTemplateValidator; +import com.linkedin.metadata.utils.UrnValidationUtil; import java.util.function.Consumer; -import java.util.stream.Collectors; -import java.util.stream.Stream; import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.extern.slf4j.Slf4j; @@ -26,12 +19,6 @@ import lombok.extern.slf4j.Slf4j; @Slf4j public class ValidationApiUtils { public static final String STRICT_URN_VALIDATION_ENABLED = "STRICT_URN_VALIDATION_ENABLED"; - public static final int URN_NUM_BYTES_LIMIT = 512; - // Related to BrowsePathv2 - public static final String URN_DELIMITER_SEPARATOR = "␟"; - // https://datahubproject.io/docs/what/urn/#restrictions - public static final Set ILLEGAL_URN_COMPONENT_CHARACTERS = Set.of("(", ")"); - public static final Set ILLEGAL_URN_TUPLE_CHARACTERS = Set.of(","); /** * Validates a {@link RecordTemplate} and throws {@link ValidationException} if validation fails. @@ -50,7 +37,7 @@ public class ValidationApiUtils { } public static void validateUrn(@Nonnull EntityRegistry entityRegistry, @Nonnull final Urn urn) { - validateUrn( + UrnValidationUtil.validateUrn( entityRegistry, urn, Boolean.TRUE.equals( @@ -58,83 +45,6 @@ public class ValidationApiUtils { System.getenv().getOrDefault(STRICT_URN_VALIDATION_ENABLED, "false")))); } - public static void validateUrn( - @Nonnull EntityRegistry entityRegistry, @Nonnull final Urn urn, boolean strict) { - EntityRegistryUrnValidator validator = new EntityRegistryUrnValidator(entityRegistry); - validator.setCurrentEntitySpec(entityRegistry.getEntitySpec(urn.getEntityType())); - RecordTemplateValidator.validate( - EntityApiUtils.buildKeyAspect(entityRegistry, urn), - validationResult -> { - throw new IllegalArgumentException( - "Invalid urn: " + urn + "\n Cause: " + validationResult.getMessages()); - }, - validator); - - if (urn.toString().trim().length() != urn.toString().length()) { - throw new IllegalArgumentException( - "Error: cannot provide an URN with leading or trailing whitespace"); - } - if (!Constants.SCHEMA_FIELD_ENTITY_NAME.equals(urn.getEntityType()) - && URLEncoder.encode(urn.toString()).length() > URN_NUM_BYTES_LIMIT) { - throw new IllegalArgumentException( - "Error: cannot provide an URN longer than " - + Integer.toString(URN_NUM_BYTES_LIMIT) - + " bytes (when URL encoded)"); - } - - if (urn.toString().contains(URN_DELIMITER_SEPARATOR)) { - throw new IllegalArgumentException( - "Error: URN cannot contain " + URN_DELIMITER_SEPARATOR + " character"); - } - - int totalParts = urn.getEntityKey().getParts().size(); - List illegalComponents = - urn.getEntityKey().getParts().stream() - .flatMap(part -> processUrnPartRecursively(part, totalParts)) - .collect(Collectors.toList()); - - if (!illegalComponents.isEmpty()) { - String message = - String.format( - "Illegal `%s` characters detected in URN %s component(s): %s", - ILLEGAL_URN_COMPONENT_CHARACTERS, urn, illegalComponents); - - if (strict) { - throw new IllegalArgumentException(message); - } else { - log.error(message); - } - } - - try { - Urn.createFromString(urn.toString()); - } catch (URISyntaxException e) { - throw new IllegalArgumentException(e); - } - } - - /** Recursively process URN parts with URL decoding */ - private static Stream processUrnPartRecursively(String urnPart, int totalParts) { - String decodedPart = - URLDecoder.decode(URLEncodingFixer.fixURLEncoding(urnPart), StandardCharsets.UTF_8); - if (decodedPart.startsWith("urn:li:")) { - // Recursively process nested URN after decoding - int nestedParts = UrnUtils.getUrn(decodedPart).getEntityKey().getParts().size(); - return UrnUtils.getUrn(decodedPart).getEntityKey().getParts().stream() - .flatMap(part -> processUrnPartRecursively(part, nestedParts)); - } - if (totalParts > 1) { - if (ILLEGAL_URN_TUPLE_CHARACTERS.stream().anyMatch(c -> urnPart.contains(c))) { - return Stream.of(urnPart); - } - } - if (ILLEGAL_URN_COMPONENT_CHARACTERS.stream().anyMatch(c -> urnPart.contains(c))) { - return Stream.of(urnPart); - } - - return Stream.empty(); - } - /** * Validates a {@link RecordTemplate} and logs a warning if validation fails. * @@ -174,7 +84,7 @@ public class ValidationApiUtils { validator.setCurrentEntitySpec(entitySpec); Consumer resultFunction = validationResult -> { - throw new IllegalArgumentException( + throw new ValidationException( "Invalid format for aspect: " + entitySpec.getName() + "\n Cause: " @@ -188,53 +98,4 @@ public class ValidationApiUtils { RecordTemplateValidator.validate(aspect, resultFunction, validator); } } - - /** - * Fixes malformed URL encoding by escaping unescaped % characters while preserving valid - * percent-encoded sequences. - */ - private static class URLEncodingFixer { - /** - * @param input The potentially malformed URL-encoded string - * @return A string with proper URL encoding that can be safely decoded - */ - public static String fixURLEncoding(String input) { - if (input == null) { - return null; - } - - StringBuilder result = new StringBuilder(input.length() * 2); - int i = 0; - - while (i < input.length()) { - char currentChar = input.charAt(i); - - if (currentChar == '%') { - if (i + 2 < input.length()) { - // Check if the next two characters form a valid hex pair - String hexPair = input.substring(i + 1, i + 3); - if (isValidHexPair(hexPair)) { - // This is a valid percent-encoded sequence, keep it as is - result.append(currentChar); - } else { - // Invalid sequence, escape the % character - result.append("%25"); - } - } else { - // % at the end of string, escape it - result.append("%25"); - } - } else { - result.append(currentChar); - } - i++; - } - - return result.toString(); - } - - private static boolean isValidHexPair(String pair) { - return pair.matches("[0-9A-Fa-f]{2}"); - } - } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtil.java b/metadata-io/src/main/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtil.java index b98e2465e9..416d656f87 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtil.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtil.java @@ -23,12 +23,12 @@ import com.linkedin.metadata.aspect.batch.AspectsBatch; import com.linkedin.metadata.aspect.batch.BatchItem; import com.linkedin.metadata.aspect.batch.MCPItem; import com.linkedin.metadata.aspect.validation.CreateIfNotExistsValidator; -import com.linkedin.metadata.entity.EntityApiUtils; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.utils.DataPlatformInstanceUtils; +import com.linkedin.metadata.utils.EntityApiUtils; import com.linkedin.metadata.utils.GenericRecordUtils; import com.linkedin.mxe.GenericAspect; import com.linkedin.mxe.MetadataChangeProposal; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/aspect/validation/UrnAnnotationValidator.java b/metadata-io/src/main/java/com/linkedin/metadata/aspect/validation/UrnAnnotationValidator.java new file mode 100644 index 0000000000..f4521a7e75 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/aspect/validation/UrnAnnotationValidator.java @@ -0,0 +1,150 @@ +package com.linkedin.metadata.aspect.validation; + +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.metadata.aspect.ReadItem; +import com.linkedin.metadata.aspect.RetrieverContext; +import com.linkedin.metadata.aspect.batch.BatchItem; +import com.linkedin.metadata.aspect.batch.ChangeMCP; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.aspect.plugins.validation.AspectPayloadValidator; +import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; +import com.linkedin.metadata.models.AspectSpec; +import com.linkedin.metadata.models.annotation.UrnValidationAnnotation; +import com.linkedin.metadata.utils.UrnValidationUtil; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import javax.annotation.Nonnull; +import lombok.Getter; +import lombok.Setter; +import lombok.experimental.Accessors; + +@Setter +@Getter +@Accessors(chain = true) +public class UrnAnnotationValidator extends AspectPayloadValidator { + @Nonnull private AspectPluginConfig config; + + @Override + protected Stream validateProposedAspects( + @Nonnull Collection mcpItems, + @Nonnull RetrieverContext retrieverContext) { + List typeSafeItems = new ArrayList<>(mcpItems); + + Map> byAspectSpec = + typeSafeItems.stream() + .filter( + item -> + item.getAspectSpec() != null + && item.getRecordTemplate() != null + && item.getRecordTemplate().data() != null) + .collect(Collectors.groupingBy(ReadItem::getAspectSpec, Collectors.toList())); + + Map> urnValidationEntries = + byAspectSpec.entrySet().stream() + .flatMap( + entry -> + UrnValidationUtil.findUrnValidationFields(entry.getValue(), entry.getKey()) + .entrySet() + .stream()) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + + // First check non-database validations + Map> nonExistenceFailures = + urnValidationEntries.entrySet().stream() + .flatMap( + itemEntry -> { + return itemEntry.getValue().stream() + .map( + validationEntry -> { + UrnValidationAnnotation annotation = validationEntry.getAnnotation(); + + if (annotation.isStrict()) { + try { + UrnValidationUtil.validateUrn( + retrieverContext.getAspectRetriever().getEntityRegistry(), + UrnUtils.getUrn(validationEntry.getUrn()), + true); + } catch (RuntimeException ex) { + return Map.entry( + itemEntry.getKey(), + AspectValidationException.forItem( + itemEntry.getKey(), ex.getMessage())); + } + } + Urn urn = UrnUtils.getUrn(validationEntry.getUrn()); + if (annotation.getEntityTypes() != null + && !annotation.getEntityTypes().isEmpty()) { + if (annotation.getEntityTypes().stream() + .noneMatch( + entityType -> entityType.equals(urn.getEntityType()))) { + return Map.entry( + itemEntry.getKey(), + AspectValidationException.forItem( + itemEntry.getKey(), + String.format( + "Invalid entity type urn validation failure (Required: %s). Path: %s Urn: %s", + validationEntry.getAnnotation().getEntityTypes(), + validationEntry.getFieldPath(), + urn))); + } + } + return null; + }); + }) + .filter(Objects::nonNull) + .collect( + Collectors.groupingBy( + Map.Entry::getKey, + Collectors.mapping(Map.Entry::getValue, Collectors.toSet()))); + + // Next check the database + Set checkUrns = + urnValidationEntries.entrySet().stream() + .filter(itemEntry -> !nonExistenceFailures.containsKey(itemEntry.getKey())) + .flatMap(itemEntry -> itemEntry.getValue().stream()) + .filter(validationEntry -> validationEntry.getAnnotation().isExist()) + .map(entry -> UrnUtils.getUrn(entry.getUrn())) + .collect(Collectors.toSet()); + Map missingUrns = + retrieverContext.getAspectRetriever().entityExists(checkUrns).entrySet().stream() + .filter(urnExistsEntry -> Boolean.FALSE.equals(urnExistsEntry.getValue())) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + Set existenceFailures = + urnValidationEntries.entrySet().stream() + .filter(itemEntry -> !nonExistenceFailures.containsKey(itemEntry.getKey())) + .flatMap( + itemEntry -> + itemEntry.getValue().stream() + .filter(validationEntry -> validationEntry.getAnnotation().isExist()) + .map( + validationEntry -> { + if (missingUrns.containsKey( + UrnUtils.getUrn(validationEntry.getUrn()))) { + return AspectValidationException.forItem( + itemEntry.getKey(), + String.format( + "Urn validation failure. Urn does not exist. Path: %s Urn: %s", + validationEntry.getFieldPath(), validationEntry.getUrn())); + } + return null; + }) + .filter(Objects::nonNull)) + .collect(Collectors.toSet()); + + return Stream.concat( + nonExistenceFailures.values().stream().flatMap(Set::stream), existenceFailures.stream()); + } + + @Override + protected Stream validatePreCommitAspects( + @Nonnull Collection changeMCPs, @Nonnull RetrieverContext retrieverContext) { + return Stream.empty(); + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java index 9ee4745e7f..99e822ef18 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java @@ -70,6 +70,7 @@ import com.linkedin.metadata.query.ListUrnsResult; import com.linkedin.metadata.run.AspectRowSummary; import com.linkedin.metadata.snapshot.Snapshot; import com.linkedin.metadata.utils.AuditStampUtils; +import com.linkedin.metadata.utils.EntityApiUtils; import com.linkedin.metadata.utils.GenericRecordUtils; import com.linkedin.metadata.utils.PegasusUtils; import com.linkedin.metadata.utils.metrics.MetricUtils; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityUtils.java index da48a2b76d..1471fdf8f1 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityUtils.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityUtils.java @@ -19,13 +19,13 @@ import com.linkedin.metadata.aspect.SystemAspect; import com.linkedin.metadata.aspect.batch.AspectsBatch; import com.linkedin.metadata.entity.ebean.EbeanAspectV2; import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; -import com.linkedin.metadata.entity.validation.RecordTemplateValidator; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.snapshot.Snapshot; import com.linkedin.metadata.utils.EntityKeyUtils; import com.linkedin.metadata.utils.PegasusUtils; +import com.linkedin.metadata.utils.RecordTemplateValidator; import com.linkedin.mxe.MetadataChangeProposal; import com.linkedin.util.Pair; import io.datahubproject.metadata.context.OperationContext; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/aspect/validation/UrnAnnotationValidatorTest.java b/metadata-io/src/test/java/com/linkedin/metadata/aspect/validation/UrnAnnotationValidatorTest.java new file mode 100644 index 0000000000..d552043f57 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/aspect/validation/UrnAnnotationValidatorTest.java @@ -0,0 +1,278 @@ +package com.linkedin.metadata.aspect.validation; + +import static org.mockito.Mockito.*; +import static org.testng.Assert.*; + +import com.linkedin.common.urn.Urn; +import com.linkedin.data.DataMap; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.aspect.RetrieverContext; +import com.linkedin.metadata.aspect.batch.BatchItem; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; +import com.linkedin.metadata.models.AspectSpec; +import com.linkedin.metadata.models.UrnValidationFieldSpec; +import com.linkedin.metadata.models.annotation.UrnValidationAnnotation; +import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.test.metadata.context.TestOperationContexts; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class UrnAnnotationValidatorTest { + + private static final OperationContext TEST_CONTEXT = + TestOperationContexts.systemContextNoSearchAuthorization(); + private static final AspectPluginConfig TEST_PLUGIN_CONFIG = + AspectPluginConfig.builder() + .className(UrnAnnotationValidator.class.getName()) + .enabled(true) + .supportedOperations(List.of("UPSERT")) + .supportedEntityAspectNames(List.of(AspectPluginConfig.EntityAspectName.ALL)) + .build(); + + @Mock private AspectSpec mockAspectSpec; + + @Mock private BatchItem mockBatchItem; + + @Mock private RetrieverContext mockRetrieverContext; + + @Mock private AspectRetriever mockAspectRetriever; + + @Mock private RecordTemplate mockRecordTemplate; + + private UrnAnnotationValidator validator; + + @BeforeMethod + public void setup() { + MockitoAnnotations.openMocks(this); + validator = new UrnAnnotationValidator(); + validator.setConfig(TEST_PLUGIN_CONFIG); + when(mockRetrieverContext.getAspectRetriever()).thenReturn(mockAspectRetriever); + when(mockAspectRetriever.getEntityRegistry()).thenReturn(TEST_CONTEXT.getEntityRegistry()); + } + + @Test + public void testValidateProposedAspects_WithStrictValidation() throws Exception { + // Arrange + Urn validUrn = + Urn.createFromString("urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleHdfsDataset,PROD)"); + + // Create DataMap with URN + DataMap dataMap = new DataMap(); + dataMap.put("urn", validUrn.toString()); + + // Set up mock UrnValidationAnnotation + UrnValidationAnnotation annotation = mock(UrnValidationAnnotation.class); + when(annotation.isStrict()).thenReturn(true); + when(annotation.getEntityTypes()).thenReturn(Collections.singletonList("dataset")); + + // Set up UrnValidationFieldSpec + UrnValidationFieldSpec fieldSpec = mock(UrnValidationFieldSpec.class); + when(fieldSpec.getUrnValidationAnnotation()).thenReturn(annotation); + + // Set up AspectSpec's UrnValidationFieldSpecMap + Map fieldSpecMap = new HashMap<>(); + fieldSpecMap.put("/urn", fieldSpec); + when(mockAspectSpec.getUrnValidationFieldSpecMap()).thenReturn(fieldSpecMap); + + // Set up BatchItem mocks + when(mockBatchItem.getAspectSpec()).thenReturn(mockAspectSpec); + when(mockBatchItem.getRecordTemplate()).thenReturn(mockRecordTemplate); + when(mockRecordTemplate.data()).thenReturn(dataMap); + + // Set up empty existence map for the strict validation test + when(mockAspectRetriever.entityExists(any())).thenReturn(Collections.emptyMap()); + + // Act + Stream result = + validator.validateProposedAspects( + Collections.singletonList(mockBatchItem), mockRetrieverContext); + + // Assert + List exceptions = result.collect(Collectors.toList()); + assertTrue(exceptions.isEmpty(), "No validation exceptions should be thrown for valid URN"); + } + + @Test + public void testValidateProposedAspects_WithFailedStrictValidation() throws Exception { + // Arrange + String invalidUrn = "urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleHdfsDataset,PROD"; + + // Create DataMap with URN + DataMap dataMap = new DataMap(); + dataMap.put("urn", invalidUrn); + + // Set up mock UrnValidationAnnotation + UrnValidationAnnotation annotation = mock(UrnValidationAnnotation.class); + when(annotation.isStrict()).thenReturn(true); + when(annotation.getEntityTypes()).thenReturn(Collections.singletonList("dataset")); + + // Set up UrnValidationFieldSpec + UrnValidationFieldSpec fieldSpec = mock(UrnValidationFieldSpec.class); + when(fieldSpec.getUrnValidationAnnotation()).thenReturn(annotation); + + // Set up AspectSpec's UrnValidationFieldSpecMap + Map fieldSpecMap = new HashMap<>(); + fieldSpecMap.put("/urn", fieldSpec); + when(mockAspectSpec.getUrnValidationFieldSpecMap()).thenReturn(fieldSpecMap); + + // Set up BatchItem mocks + when(mockBatchItem.getAspectSpec()).thenReturn(mockAspectSpec); + when(mockBatchItem.getRecordTemplate()).thenReturn(mockRecordTemplate); + when(mockRecordTemplate.data()).thenReturn(dataMap); + + // Set up empty existence map for the strict validation test + when(mockAspectRetriever.entityExists(any())).thenReturn(Collections.emptyMap()); + + // Act + Stream result = + validator.validateProposedAspects( + Collections.singletonList(mockBatchItem), mockRetrieverContext); + + // Assert + List exceptions = result.collect(Collectors.toList()); + assertFalse(exceptions.isEmpty(), "Validation exception should be thrown for invalid URN"); + assertTrue(exceptions.get(0).getMessage().contains("invalid urn")); + } + + @Test + public void testValidateProposedAspects_WithInvalidEntityType() throws Exception { + // Arrange + Urn invalidUrn = Urn.createFromString("urn:li:corpuser:johndoe"); + + // Create DataMap with URN + DataMap dataMap = new DataMap(); + dataMap.put("urn", invalidUrn.toString()); + + // Set up mock UrnValidationAnnotation + UrnValidationAnnotation annotation = mock(UrnValidationAnnotation.class); + when(annotation.isStrict()).thenReturn(true); + when(annotation.getEntityTypes()).thenReturn(Collections.singletonList("dataset")); + + // Set up UrnValidationFieldSpec + UrnValidationFieldSpec fieldSpec = mock(UrnValidationFieldSpec.class); + when(fieldSpec.getUrnValidationAnnotation()).thenReturn(annotation); + + // Set up AspectSpec's UrnValidationFieldSpecMap + Map fieldSpecMap = new HashMap<>(); + fieldSpecMap.put("/urn", fieldSpec); + when(mockAspectSpec.getUrnValidationFieldSpecMap()).thenReturn(fieldSpecMap); + + // Set up BatchItem mocks + when(mockBatchItem.getAspectSpec()).thenReturn(mockAspectSpec); + when(mockBatchItem.getRecordTemplate()).thenReturn(mockRecordTemplate); + when(mockRecordTemplate.data()).thenReturn(dataMap); + + // Set up empty existence map + when(mockAspectRetriever.entityExists(any())).thenReturn(Collections.emptyMap()); + + // Act + Stream result = + validator.validateProposedAspects( + Collections.singletonList(mockBatchItem), mockRetrieverContext); + + // Assert + List exceptions = result.collect(Collectors.toList()); + assertFalse( + exceptions.isEmpty(), "Validation exception should be thrown for invalid entity type"); + assertTrue(exceptions.get(0).getMessage().contains("Invalid entity type")); + } + + @Test + public void testValidateProposedAspects_WithExistenceCheck() throws Exception { + // Arrange + Urn existingUrn = + Urn.createFromString("urn:li:dataset:(urn:li:dataPlatform:hdfs,ExistingDataset,PROD)"); + + // Create DataMap with URN + DataMap dataMap = new DataMap(); + dataMap.put("urn", existingUrn.toString()); + + // Set up mock UrnValidationAnnotation + UrnValidationAnnotation annotation = mock(UrnValidationAnnotation.class); + when(annotation.isExist()).thenReturn(true); + when(annotation.getEntityTypes()).thenReturn(Collections.emptyList()); + + // Set up UrnValidationFieldSpec + UrnValidationFieldSpec fieldSpec = mock(UrnValidationFieldSpec.class); + when(fieldSpec.getUrnValidationAnnotation()).thenReturn(annotation); + + // Set up AspectSpec's UrnValidationFieldSpecMap + Map fieldSpecMap = new HashMap<>(); + fieldSpecMap.put("/urn", fieldSpec); + when(mockAspectSpec.getUrnValidationFieldSpecMap()).thenReturn(fieldSpecMap); + + // Set up BatchItem mocks + when(mockBatchItem.getAspectSpec()).thenReturn(mockAspectSpec); + when(mockBatchItem.getRecordTemplate()).thenReturn(mockRecordTemplate); + when(mockRecordTemplate.data()).thenReturn(dataMap); + + Map existenceMap = new HashMap<>(); + existenceMap.put(existingUrn, true); + when(mockAspectRetriever.entityExists(Collections.singleton(existingUrn))) + .thenReturn(existenceMap); + + // Act + Stream result = + validator.validateProposedAspects( + Collections.singletonList(mockBatchItem), mockRetrieverContext); + + // Assert + List exceptions = result.collect(Collectors.toList()); + assertTrue(exceptions.isEmpty(), "No validation exceptions should be thrown for existing URN"); + } + + @Test + public void testValidateProposedAspects_WithNonExistentUrn() throws Exception { + // Arrange + Urn nonExistentUrn = + Urn.createFromString("urn:li:dataset:(urn:li:dataPlatform:hdfs,NonExistentDataset,PROD)"); + + // Create DataMap with URN + DataMap dataMap = new DataMap(); + dataMap.put("urn", nonExistentUrn.toString()); + + // Set up mock UrnValidationAnnotation + UrnValidationAnnotation annotation = mock(UrnValidationAnnotation.class); + when(annotation.isExist()).thenReturn(true); + when(annotation.getEntityTypes()).thenReturn(Collections.emptyList()); + + // Set up UrnValidationFieldSpec + UrnValidationFieldSpec fieldSpec = mock(UrnValidationFieldSpec.class); + when(fieldSpec.getUrnValidationAnnotation()).thenReturn(annotation); + + // Set up AspectSpec's UrnValidationFieldSpecMap + Map fieldSpecMap = new HashMap<>(); + fieldSpecMap.put("/urn", fieldSpec); + when(mockAspectSpec.getUrnValidationFieldSpecMap()).thenReturn(fieldSpecMap); + + // Set up BatchItem mocks + when(mockBatchItem.getAspectSpec()).thenReturn(mockAspectSpec); + when(mockBatchItem.getRecordTemplate()).thenReturn(mockRecordTemplate); + when(mockRecordTemplate.data()).thenReturn(dataMap); + + Map existenceMap = new HashMap<>(); + existenceMap.put(nonExistentUrn, false); + when(mockAspectRetriever.entityExists(Collections.singleton(nonExistentUrn))) + .thenReturn(existenceMap); + + // Act + Stream result = + validator.validateProposedAspects( + Collections.singletonList(mockBatchItem), mockRetrieverContext); + + // Assert + List exceptions = result.collect(Collectors.toList()); + assertFalse(exceptions.isEmpty(), "Validation exception should be thrown for non-existent URN"); + assertTrue(exceptions.get(0).getMessage().contains("Urn does not exist")); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java index f2726af772..44e9b0db06 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java @@ -66,6 +66,7 @@ import com.linkedin.metadata.service.UpdateIndicesService; import com.linkedin.metadata.snapshot.CorpUserSnapshot; import com.linkedin.metadata.snapshot.Snapshot; import com.linkedin.metadata.utils.AuditStampUtils; +import com.linkedin.metadata.utils.EntityApiUtils; import com.linkedin.metadata.utils.EntityKeyUtils; import com.linkedin.metadata.utils.GenericRecordUtils; import com.linkedin.metadata.utils.metrics.MetricUtils; diff --git a/metadata-models/src/main/pegasus/com/linkedin/structured/StructuredPropertyDefinition.pdl b/metadata-models/src/main/pegasus/com/linkedin/structured/StructuredPropertyDefinition.pdl index 416e2c5c11..9e59337860 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/structured/StructuredPropertyDefinition.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/structured/StructuredPropertyDefinition.pdl @@ -24,6 +24,11 @@ record StructuredPropertyDefinition { * The value type of the property. Must be a dataType. * e.g. To indicate that the property is of type DATE, use urn:li:dataType:datahub.date */ + @UrnValidation = { + "exist": true, + "strict": true, + "entityTypes": [ "dataType" ], + } valueType: Urn /** @@ -59,6 +64,11 @@ record StructuredPropertyDefinition { "fieldName": "entityTypes" } } + @UrnValidation = { + "exist": true, + "strict": true, + "entityTypes": [ "entityType" ], + } entityTypes: array[Urn] /** diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/plugins/SpringStandardPluginConfiguration.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/plugins/SpringStandardPluginConfiguration.java index 7d0937663f..d9bd4f9880 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/plugins/SpringStandardPluginConfiguration.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/plugins/SpringStandardPluginConfiguration.java @@ -18,6 +18,7 @@ import com.linkedin.metadata.aspect.plugins.hooks.MutationHook; import com.linkedin.metadata.aspect.plugins.validation.AspectPayloadValidator; import com.linkedin.metadata.aspect.validation.ExecutionRequestResultValidator; import com.linkedin.metadata.aspect.validation.FieldPathValidator; +import com.linkedin.metadata.aspect.validation.UrnAnnotationValidator; import com.linkedin.metadata.dataproducts.sideeffects.DataProductUnsetSideEffect; import com.linkedin.metadata.entity.versioning.sideeffects.VersionSetSideEffect; import com.linkedin.metadata.entity.versioning.validation.VersionPropertiesValidator; @@ -63,12 +64,7 @@ public class SpringStandardPluginConfiguration { .className(IgnoreUnknownMutator.class.getName()) .enabled(ignoreUnknownEnabled && !extensionsEnabled) .supportedOperations(List.of("*")) - .supportedEntityAspectNames( - List.of( - AspectPluginConfig.EntityAspectName.builder() - .entityName("*") - .aspectName("*") - .build())) + .supportedEntityAspectNames(List.of(AspectPluginConfig.EntityAspectName.ALL)) .build()); } @@ -256,4 +252,19 @@ public class SpringStandardPluginConfiguration { .build())) .build()); } + + @Bean + public AspectPayloadValidator urnAnnotationValidator() { + return new UrnAnnotationValidator() + .setConfig( + AspectPluginConfig.builder() + .className(UrnAnnotationValidator.class.getName()) + .enabled(true) + .supportedOperations( + // Special note: RESTATE is not included to allow out of order restoration of + // aspects. + List.of("UPSERT", "UPDATE", "CREATE", "CREATE_ENTITY")) + .supportedEntityAspectNames(List.of(AspectPluginConfig.EntityAspectName.ALL)) + .build()); + } } diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java index bafa8c43f6..9f9571c792 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java @@ -18,7 +18,6 @@ import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.aspect.batch.AspectsBatch; import com.linkedin.metadata.aspect.batch.BatchItem; import com.linkedin.metadata.aspect.batch.ChangeMCP; -import com.linkedin.metadata.entity.EntityApiUtils; import com.linkedin.metadata.entity.IngestResult; import com.linkedin.metadata.entity.UpdateAspectResult; import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; @@ -28,6 +27,7 @@ import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.search.SearchEntity; import com.linkedin.metadata.search.SearchEntityArray; import com.linkedin.metadata.utils.AuditStampUtils; +import com.linkedin.metadata.utils.EntityApiUtils; import com.linkedin.metadata.utils.GenericRecordUtils; import com.linkedin.metadata.utils.SystemMetadataUtils; import com.linkedin.mxe.MetadataChangeProposal; diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java index 6236b1eb7a..71dee20d4c 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java @@ -26,7 +26,6 @@ import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.aspect.batch.AspectsBatch; import com.linkedin.metadata.aspect.batch.BatchItem; import com.linkedin.metadata.aspect.batch.ChangeMCP; -import com.linkedin.metadata.entity.EntityApiUtils; import com.linkedin.metadata.entity.IngestResult; import com.linkedin.metadata.entity.RollbackResult; import com.linkedin.metadata.entity.UpdateAspectResult; @@ -43,6 +42,7 @@ import com.linkedin.metadata.search.ScrollResult; import com.linkedin.metadata.search.SearchEntity; import com.linkedin.metadata.search.SearchEntityArray; import com.linkedin.metadata.utils.AuditStampUtils; +import com.linkedin.metadata.utils.EntityApiUtils; import com.linkedin.metadata.utils.GenericRecordUtils; import com.linkedin.metadata.utils.SearchUtil; import com.linkedin.mxe.MetadataChangeProposal; diff --git a/metadata-utils/build.gradle b/metadata-utils/build.gradle index 410641ef67..5507e43c08 100644 --- a/metadata-utils/build.gradle +++ b/metadata-utils/build.gradle @@ -34,6 +34,7 @@ dependencies { testImplementation project(':test-models') testImplementation project(path: ':test-models', configuration: 'testDataTemplate') testImplementation externalDependency.testng + testImplementation project(':metadata-operation-context') constraints { implementation(externalDependency.log4jCore) { diff --git a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/EntityApiUtils.java b/metadata-utils/src/main/java/com/linkedin/metadata/utils/EntityApiUtils.java similarity index 91% rename from metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/EntityApiUtils.java rename to metadata-utils/src/main/java/com/linkedin/metadata/utils/EntityApiUtils.java index fa6d9cae45..ba2358eaa9 100644 --- a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/EntityApiUtils.java +++ b/metadata-utils/src/main/java/com/linkedin/metadata/utils/EntityApiUtils.java @@ -1,4 +1,4 @@ -package com.linkedin.metadata.entity; +package com.linkedin.metadata.utils; import static com.linkedin.metadata.utils.SystemMetadataUtils.createDefaultSystemMetadata; @@ -9,9 +9,6 @@ import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.registry.EntityRegistry; -import com.linkedin.metadata.utils.EntityKeyUtils; -import com.linkedin.metadata.utils.GenericRecordUtils; -import com.linkedin.metadata.utils.PegasusUtils; import com.linkedin.mxe.MetadataChangeProposal; import com.linkedin.mxe.SystemMetadata; import javax.annotation.Nonnull; diff --git a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/validation/EntityRegistryUrnValidator.java b/metadata-utils/src/main/java/com/linkedin/metadata/utils/EntityRegistryUrnValidator.java similarity index 97% rename from metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/validation/EntityRegistryUrnValidator.java rename to metadata-utils/src/main/java/com/linkedin/metadata/utils/EntityRegistryUrnValidator.java index 3d7abee556..6f2c43d517 100644 --- a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/validation/EntityRegistryUrnValidator.java +++ b/metadata-utils/src/main/java/com/linkedin/metadata/utils/EntityRegistryUrnValidator.java @@ -3,7 +3,7 @@ // (powered by FernFlower decompiler) // -package com.linkedin.metadata.entity.validation; +package com.linkedin.metadata.utils; import com.linkedin.common.urn.Urn; import com.linkedin.data.message.Message; @@ -15,7 +15,6 @@ import com.linkedin.data.schema.validator.ValidatorContext; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.RelationshipFieldSpec; import com.linkedin.metadata.models.registry.EntityRegistry; -import com.linkedin.metadata.utils.EntityKeyUtils; import java.lang.reflect.InvocationTargetException; import java.net.URISyntaxException; import java.util.List; diff --git a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/validation/RecordTemplateValidator.java b/metadata-utils/src/main/java/com/linkedin/metadata/utils/RecordTemplateValidator.java similarity index 97% rename from metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/validation/RecordTemplateValidator.java rename to metadata-utils/src/main/java/com/linkedin/metadata/utils/RecordTemplateValidator.java index 12e39f0349..fb2e0d553d 100644 --- a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/validation/RecordTemplateValidator.java +++ b/metadata-utils/src/main/java/com/linkedin/metadata/utils/RecordTemplateValidator.java @@ -1,4 +1,4 @@ -package com.linkedin.metadata.entity.validation; +package com.linkedin.metadata.utils; import com.linkedin.common.urn.UrnValidator; import com.linkedin.data.schema.validation.CoercionMode; diff --git a/metadata-utils/src/main/java/com/linkedin/metadata/utils/UrnValidationUtil.java b/metadata-utils/src/main/java/com/linkedin/metadata/utils/UrnValidationUtil.java new file mode 100644 index 0000000000..f5bbf3e093 --- /dev/null +++ b/metadata-utils/src/main/java/com/linkedin/metadata/utils/UrnValidationUtil.java @@ -0,0 +1,259 @@ +package com.linkedin.metadata.utils; + +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.data.DataList; +import com.linkedin.data.DataMap; +import com.linkedin.metadata.Constants; +import com.linkedin.metadata.aspect.batch.BatchItem; +import com.linkedin.metadata.models.AspectSpec; +import com.linkedin.metadata.models.UrnValidationFieldSpec; +import com.linkedin.metadata.models.annotation.UrnValidationAnnotation; +import com.linkedin.metadata.models.registry.EntityRegistry; +import java.net.URISyntaxException; +import java.net.URLDecoder; +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import javax.annotation.Nonnull; +import lombok.Value; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class UrnValidationUtil { + public static final int URN_NUM_BYTES_LIMIT = 512; + // Related to BrowsePathv2 + public static final String URN_DELIMITER_SEPARATOR = "␟"; + // https://datahubproject.io/docs/what/urn/#restrictions + public static final Set ILLEGAL_URN_COMPONENT_CHARACTERS = Set.of("(", ")"); + public static final Set ILLEGAL_URN_TUPLE_CHARACTERS = Set.of(","); + + private UrnValidationUtil() {} + + public static void validateUrn( + @Nonnull EntityRegistry entityRegistry, @Nonnull final Urn urn, boolean strict) { + EntityRegistryUrnValidator validator = new EntityRegistryUrnValidator(entityRegistry); + validator.setCurrentEntitySpec(entityRegistry.getEntitySpec(urn.getEntityType())); + RecordTemplateValidator.validate( + EntityApiUtils.buildKeyAspect(entityRegistry, urn), + validationResult -> { + throw new IllegalArgumentException( + "Invalid urn: " + urn + "\n Cause: " + validationResult.getMessages()); + }, + validator); + + if (urn.toString().trim().length() != urn.toString().length()) { + throw new IllegalArgumentException( + "Error: cannot provide an URN with leading or trailing whitespace"); + } + if (!Constants.SCHEMA_FIELD_ENTITY_NAME.equals(urn.getEntityType()) + && URLEncoder.encode(urn.toString()).length() > URN_NUM_BYTES_LIMIT) { + throw new IllegalArgumentException( + "Error: cannot provide an URN longer than " + + Integer.toString(URN_NUM_BYTES_LIMIT) + + " bytes (when URL encoded)"); + } + + if (urn.toString().contains(URN_DELIMITER_SEPARATOR)) { + throw new IllegalArgumentException( + "Error: URN cannot contain " + URN_DELIMITER_SEPARATOR + " character"); + } + + int totalParts = urn.getEntityKey().getParts().size(); + List illegalComponents = + urn.getEntityKey().getParts().stream() + .flatMap(part -> processUrnPartRecursively(part, totalParts)) + .collect(Collectors.toList()); + + if (!illegalComponents.isEmpty()) { + String message = + String.format( + "Illegal `%s` characters detected in URN %s component(s): %s", + ILLEGAL_URN_COMPONENT_CHARACTERS, urn, illegalComponents); + + if (strict) { + throw new IllegalArgumentException(message); + } else { + log.error(message); + } + } + + try { + Urn.createFromString(urn.toString()); + } catch (URISyntaxException e) { + throw new IllegalArgumentException(e); + } + } + + /** Recursively process URN parts with URL decoding */ + private static Stream processUrnPartRecursively(String urnPart, int totalParts) { + String decodedPart = + URLDecoder.decode(URLEncodingFixer.fixURLEncoding(urnPart), StandardCharsets.UTF_8); + if (decodedPart.startsWith("urn:li:")) { + // Recursively process nested URN after decoding + int nestedParts = UrnUtils.getUrn(decodedPart).getEntityKey().getParts().size(); + return UrnUtils.getUrn(decodedPart).getEntityKey().getParts().stream() + .flatMap(part -> processUrnPartRecursively(part, nestedParts)); + } + if (totalParts > 1) { + if (ILLEGAL_URN_TUPLE_CHARACTERS.stream().anyMatch(c -> urnPart.contains(c))) { + return Stream.of(urnPart); + } + } + if (ILLEGAL_URN_COMPONENT_CHARACTERS.stream().anyMatch(c -> urnPart.contains(c))) { + return Stream.of(urnPart); + } + + return Stream.empty(); + } + + /** + * Traverses a DataMap and finds all fields with UrnValidation annotations + * + * @param item The item to traverse + * @param aspectSpec The AspectSpec containing UrnValidation field specifications + * @return Set of UrnValidationEntry containing field paths, values and annotations + */ + @Nonnull + public static Set findUrnValidationFields( + @Nonnull T item, @Nonnull AspectSpec aspectSpec) { + + Set result = new HashSet<>(); + Map urnValidationSpecs = + aspectSpec.getUrnValidationFieldSpecMap(); + + if (item.getRecordTemplate() != null && item.getRecordTemplate().data() != null) { + // Traverse the DataMap recursively + traverseDataMap(item.getRecordTemplate().data(), "", urnValidationSpecs, result); + } + + return result; + } + + /** + * Traverses multiple DataMaps and finds all fields with UrnValidation annotations + * + * @param items Collection of items to traverse + * @param aspectSpec The AspectSpec containing UrnValidation field specifications + * @return Map of items to set of UrnValidationEntry containing field paths, values and + * annotations + */ + public static Map> findUrnValidationFields( + @Nonnull Collection items, @Nonnull AspectSpec aspectSpec) { + + Map> result = new HashMap<>(); + + for (T item : items) { + if (item != null) { + result.put(item, findUrnValidationFields(item, aspectSpec)); + } + } + + return result; + } + + private static void traverseDataMap( + DataMap dataMap, + String currentPath, + Map urnValidationSpecs, + Set result) { + + for (String key : dataMap.keySet()) { + // Standardize path construction to always start with "/" + String fieldPath; + if (currentPath.isEmpty()) { + fieldPath = "/" + key; + } else { + fieldPath = currentPath + "/" + key; + } + Object value = dataMap.get(key); + + // Check if current field has UrnValidation annotation + UrnValidationFieldSpec spec = urnValidationSpecs.get(fieldPath); + if (spec != null) { + if (value instanceof String) { + result.add( + new UrnValidationEntry(fieldPath, (String) value, spec.getUrnValidationAnnotation())); + } else if (value instanceof DataList) { + DataList list = (DataList) value; + for (Object item : list) { + if (item instanceof String) { + result.add( + new UrnValidationEntry( + fieldPath, (String) item, spec.getUrnValidationAnnotation())); + } + } + } + } + + // Recursively traverse nested DataMaps + if (value instanceof DataMap) { + traverseDataMap((DataMap) value, fieldPath, urnValidationSpecs, result); + } + } + } + + /** Container class for URN validation field information */ + @Value + public static class UrnValidationEntry { + String fieldPath; + String urn; + UrnValidationAnnotation annotation; + } + + /** + * Fixes malformed URL encoding by escaping unescaped % characters while preserving valid + * percent-encoded sequences. + */ + private static class URLEncodingFixer { + /** + * @param input The potentially malformed URL-encoded string + * @return A string with proper URL encoding that can be safely decoded + */ + public static String fixURLEncoding(String input) { + if (input == null) { + return null; + } + + StringBuilder result = new StringBuilder(input.length() * 2); + int i = 0; + + while (i < input.length()) { + char currentChar = input.charAt(i); + + if (currentChar == '%') { + if (i + 2 < input.length()) { + // Check if the next two characters form a valid hex pair + String hexPair = input.substring(i + 1, i + 3); + if (isValidHexPair(hexPair)) { + // This is a valid percent-encoded sequence, keep it as is + result.append(currentChar); + } else { + // Invalid sequence, escape the % character + result.append("%25"); + } + } else { + // % at the end of string, escape it + result.append("%25"); + } + } else { + result.append(currentChar); + } + i++; + } + + return result.toString(); + } + + private static boolean isValidHexPair(String pair) { + return pair.matches("[0-9A-Fa-f]{2}"); + } + } +} diff --git a/metadata-io/metadata-io-api/src/test/java/com/linkedin/metadata/entity/validation/ValidationApiUtilsTest.java b/metadata-utils/src/test/java/com/linkedin/metadata/utils/UrnValidationUtilTest.java similarity index 75% rename from metadata-io/metadata-io-api/src/test/java/com/linkedin/metadata/entity/validation/ValidationApiUtilsTest.java rename to metadata-utils/src/test/java/com/linkedin/metadata/utils/UrnValidationUtilTest.java index 2ab6a50945..da3832b858 100644 --- a/metadata-io/metadata-io-api/src/test/java/com/linkedin/metadata/entity/validation/ValidationApiUtilsTest.java +++ b/metadata-utils/src/test/java/com/linkedin/metadata/utils/UrnValidationUtilTest.java @@ -1,4 +1,4 @@ -package com.linkedin.metadata.entity.validation; +package com.linkedin.metadata.utils; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; @@ -7,44 +7,43 @@ import io.datahubproject.test.metadata.context.TestOperationContexts; import java.net.URISyntaxException; import org.testng.annotations.Test; -public class ValidationApiUtilsTest { +public class UrnValidationUtilTest { private static final EntityRegistry entityRegistry = TestOperationContexts.defaultEntityRegistry(); @Test public void testValidateDatasetUrn() { Urn validUrn = UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:hdfs,/path/to/data,PROD)"); - ValidationApiUtils.validateUrn(entityRegistry, validUrn, true); + UrnValidationUtil.validateUrn(entityRegistry, validUrn, true); // If no exception is thrown, test passes } @Test public void testSimpleUrnColon() { - ValidationApiUtils.validateUrn( - entityRegistry, UrnUtils.getUrn("urn:li:corpuser:foo:bar"), true); - ValidationApiUtils.validateUrn( + UrnValidationUtil.validateUrn(entityRegistry, UrnUtils.getUrn("urn:li:corpuser:foo:bar"), true); + UrnValidationUtil.validateUrn( entityRegistry, UrnUtils.getUrn("urn:li:dataPlatform:abc:def"), true); - ValidationApiUtils.validateUrn( + UrnValidationUtil.validateUrn( entityRegistry, UrnUtils.getUrn("urn:li:corpuser:foo:bar@example.com"), true); // If no exception is thrown, test passes } @Test public void testSimpleUrnComma() { - ValidationApiUtils.validateUrn(entityRegistry, UrnUtils.getUrn("urn:li:corpuser:,"), true); + UrnValidationUtil.validateUrn(entityRegistry, UrnUtils.getUrn("urn:li:corpuser:,"), true); // If no exception is thrown, test passes } @Test(expectedExceptions = IllegalArgumentException.class) public void testTupleUrnComma() { - ValidationApiUtils.validateUrn( + UrnValidationUtil.validateUrn( entityRegistry, UrnUtils.getUrn("urn:li:dashboard:(looker,dashboards,thelook)"), true); } @Test(expectedExceptions = IllegalArgumentException.class) public void testFabricTypeCasing() { // prod != PROD - ValidationApiUtils.validateUrn( + UrnValidationUtil.validateUrn( entityRegistry, UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:abc:def,table_name,prod)"), true); @@ -55,33 +54,33 @@ public class ValidationApiUtilsTest { Urn validUrn = Urn.createFromString( "urn:li:dataset:(urn:li:dataPlatform:s3,urn:li:dataset:%28urn:li:dataPlatform:s3%2Ctest-datalake-concepts/prog_maintenance%2CPROD%29,PROD)"); - ValidationApiUtils.validateUrn(entityRegistry, validUrn, true); + UrnValidationUtil.validateUrn(entityRegistry, validUrn, true); // If no exception is thrown, test passes } @Test(expectedExceptions = IllegalArgumentException.class) public void testFabricTypeParen() { Urn invalidUrn = UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:hdfs,/path/to/data,())"); - ValidationApiUtils.validateUrn(entityRegistry, invalidUrn, true); + UrnValidationUtil.validateUrn(entityRegistry, invalidUrn, true); } @Test(expectedExceptions = IllegalArgumentException.class) public void testUrnWithTrailingWhitespace() { Urn invalidUrn = UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:hdfs,/path/to/data,PROD) "); - ValidationApiUtils.validateUrn(entityRegistry, invalidUrn, true); + UrnValidationUtil.validateUrn(entityRegistry, invalidUrn, true); } @Test(expectedExceptions = IllegalArgumentException.class) public void testUrnWithIllegalDelimiter() { Urn invalidUrn = UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:hdfs␟path,PROD)"); - ValidationApiUtils.validateUrn(entityRegistry, invalidUrn, true); + UrnValidationUtil.validateUrn(entityRegistry, invalidUrn, true); } @Test(expectedExceptions = IllegalArgumentException.class) public void testComplexUrnWithParens1() { Urn invalidUrn = UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:hdfs,(illegal),PROD)"); - ValidationApiUtils.validateUrn(entityRegistry, invalidUrn, true); + UrnValidationUtil.validateUrn(entityRegistry, invalidUrn, true); } @Test(expectedExceptions = IllegalArgumentException.class) @@ -89,13 +88,13 @@ public class ValidationApiUtilsTest { Urn invalidUrn = UrnUtils.getUrn( "urn:li:dataJob:(urn:li:dataFlow:(mssql,1/2/3/4.c_n on %28LOCAL%29,PROD),1/2/3/4.c_n on (LOCAL))"); - ValidationApiUtils.validateUrn(entityRegistry, invalidUrn, true); + UrnValidationUtil.validateUrn(entityRegistry, invalidUrn, true); } @Test(expectedExceptions = IllegalArgumentException.class) public void testSimpleUrnWithParens() { Urn invalidUrn = UrnUtils.getUrn("urn:li:corpuser:(foo)123"); - ValidationApiUtils.validateUrn(entityRegistry, invalidUrn, true); + UrnValidationUtil.validateUrn(entityRegistry, invalidUrn, true); } @Test(expectedExceptions = IllegalArgumentException.class) @@ -108,7 +107,7 @@ public class ValidationApiUtilsTest { longPath.append(",PROD)"); Urn invalidUrn = UrnUtils.getUrn(longPath.toString()); - ValidationApiUtils.validateUrn(entityRegistry, invalidUrn, true); + UrnValidationUtil.validateUrn(entityRegistry, invalidUrn, true); } @Test @@ -117,20 +116,20 @@ public class ValidationApiUtilsTest { UrnUtils.getUrn( "urn:li:dataset:(urn:li:dataPlatform:bigquery,myproject.dataset.table,PROD)"); - ValidationApiUtils.validateUrn(entityRegistry, validUrn, true); + UrnValidationUtil.validateUrn(entityRegistry, validUrn, true); // If no exception is thrown, test passes } @Test(expectedExceptions = NullPointerException.class) public void testUrnNull() { - ValidationApiUtils.validateUrn(entityRegistry, null, true); + UrnValidationUtil.validateUrn(entityRegistry, null, true); } @Test public void testValidPartialUrlEncode() { Urn validUrn = UrnUtils.getUrn("urn:li:assertion:123=-%28__% weekly__%29"); - ValidationApiUtils.validateUrn(entityRegistry, validUrn, true); + UrnValidationUtil.validateUrn(entityRegistry, validUrn, true); // If no exception is thrown, test passes } @@ -140,7 +139,7 @@ public class ValidationApiUtilsTest { UrnUtils.getUrn( "urn:li:dataset:(urn:li:dataPlatform:s3,urn:li:dataset:%28urn:li:dataPlatform:s3%2Ctest-datalake-concepts%prog_maintenance%2CPROD%29,PROD)"); - ValidationApiUtils.validateUrn(entityRegistry, validUrn, true); + UrnValidationUtil.validateUrn(entityRegistry, validUrn, true); // If no exception is thrown, test passes } @@ -149,14 +148,14 @@ public class ValidationApiUtilsTest { Urn validUrn = UrnUtils.getUrn("urn:li:dashboard:(looker,dashboards.thelook::cohort_data_tool)"); - ValidationApiUtils.validateUrn(entityRegistry, validUrn, true); + UrnValidationUtil.validateUrn(entityRegistry, validUrn, true); // If no exception is thrown, test passes } @Test public void testNoTupleComma() { Urn invalidUrn = UrnUtils.getUrn("urn:li:corpuser:,"); - ValidationApiUtils.validateUrn(entityRegistry, invalidUrn, true); + UrnValidationUtil.validateUrn(entityRegistry, invalidUrn, true); // If no exception is thrown, test passes } } diff --git a/mock-entity-registry/src/main/java/mock/MockAspectSpec.java b/mock-entity-registry/src/main/java/mock/MockAspectSpec.java index 8be6f83832..85bd2f130f 100644 --- a/mock-entity-registry/src/main/java/mock/MockAspectSpec.java +++ b/mock-entity-registry/src/main/java/mock/MockAspectSpec.java @@ -9,6 +9,7 @@ import com.linkedin.metadata.models.SearchableFieldSpec; import com.linkedin.metadata.models.SearchableRefFieldSpec; import com.linkedin.metadata.models.TimeseriesFieldCollectionSpec; import com.linkedin.metadata.models.TimeseriesFieldSpec; +import com.linkedin.metadata.models.UrnValidationFieldSpec; import com.linkedin.metadata.models.annotation.AspectAnnotation; import java.util.List; import javax.annotation.Nonnull; @@ -22,6 +23,7 @@ public class MockAspectSpec extends AspectSpec { @Nonnull List timeseriesFieldSpecs, @Nonnull List timeseriesFieldCollectionSpecs, @Nonnull final List searchableRefFieldSpecs, + @Nonnull final List urnValidationFieldSpecs, RecordDataSchema schema, Class aspectClass) { super( @@ -32,6 +34,7 @@ public class MockAspectSpec extends AspectSpec { timeseriesFieldSpecs, timeseriesFieldCollectionSpecs, searchableRefFieldSpecs, + urnValidationFieldSpecs, schema, aspectClass); } diff --git a/mock-entity-registry/src/main/java/mock/MockEntitySpec.java b/mock-entity-registry/src/main/java/mock/MockEntitySpec.java index f34faea89a..bfcfe4b9bd 100644 --- a/mock-entity-registry/src/main/java/mock/MockEntitySpec.java +++ b/mock-entity-registry/src/main/java/mock/MockEntitySpec.java @@ -90,6 +90,7 @@ public class MockEntitySpec implements EntitySpec { Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), + Collections.emptyList(), type.schema(), (Class) type.getClass().asSubclass(RecordTemplate.class)); } diff --git a/smoke-test/tests/openapi/v3/structured_properties.json b/smoke-test/tests/openapi/v3/structured_properties.json index b000c5da0a..4ebbf92f95 100644 --- a/smoke-test/tests/openapi/v3/structured_properties.json +++ b/smoke-test/tests/openapi/v3/structured_properties.json @@ -20,6 +20,48 @@ "method": "delete" } }, + { + "request": { + "url": "/openapi/v3/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.nonExistentValueType", + "description": "Remove test structured property for nonExistentValueType", + "method": "delete" + } + }, + { + "request": { + "url": "/openapi/v3/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.invalidEntityTypeValueType", + "description": "Remove test structured property for invalidEntityTypeValueType", + "method": "delete" + } + }, + { + "request": { + "url": "/openapi/v3/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.invalidUrnValueType", + "description": "Remove test structured property for invalidUrnValueType", + "method": "delete" + } + }, + { + "request": { + "url": "/openapi/v3/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.nonExistentEntityTypes", + "description": "Remove test structured property for nonExistentEntityTypes", + "method": "delete" + } + }, + { + "request": { + "url": "/openapi/v3/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.invalidEntityTypeEntityTypes", + "description": "Remove test structured property for invalidEntityTypeEntityTypes", + "method": "delete" + } + }, + { + "request": { + "url": "/openapi/v3/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.invalidUrnEntityTypes", + "description": "Remove test structured property for invalidUrnEntityTypes", + "method": "delete" + } + }, { "request": { "url": "/openapi/v3/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.privacy.v3.retentionTime/propertyDefinition", @@ -327,5 +369,185 @@ } } } + }, + { + "request": { + "url": "/openapi/v3/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.nonExistentValueType/propertyDefinition", + "description": "Create nonExistent ValueType structured property definition", + "params": { + "createIfNotExists": "false" + }, + "json": { + "value": { + "qualifiedName": "io.acryl.nonExistentValueType", + "valueType": "urn:li:dataType:datahub.DOEST_NOT_EXIST", + "description": "Invalid value type", + "displayName": "Invalid Value Type", + "entityTypes": [ + "urn:li:entityType:datahub.dataset", + "urn:li:entityType:datahub.dataFlow" + ] + } + } + }, + "response": { + "status_codes": [400], + "exclude_regex_paths": [ + "root\\['message'\\]" + ], + "json": { + "error": "Validation Error" + } + } + }, + { + "request": { + "url": "/openapi/v3/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.invalidEntityTypeValueType/propertyDefinition", + "description": "Create invalidEntity ValueType structured property definition", + "params": { + "createIfNotExists": "false" + }, + "json": { + "value": { + "qualifiedName": "io.acryl.invalidEntityTypeValueType", + "valueType": "urn:li:container:datahub.string", + "description": "Invalid value type", + "displayName": "Invalid Value Type", + "entityTypes": [ + "urn:li:entityType:datahub.dataset", + "urn:li:entityType:datahub.dataFlow" + ] + } + } + }, + "response": { + "status_codes": [400], + "exclude_regex_paths": [ + "root\\['message'\\]" + ], + "json": { + "error": "Validation Error" + } + } + }, + { + "request": { + "url": "/openapi/v3/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.invalidUrnValueType/propertyDefinition", + "description": "Create invalidUrn ValueType structured property definition", + "params": { + "createIfNotExists": "false" + }, + "json": { + "value": { + "qualifiedName": "io.acryl.invalidUrnValueType", + "valueType": "urn:li:dataType:(datahub.string", + "description": "Invalid value type", + "displayName": "Invalid Value Type", + "entityTypes": [ + "urn:li:entityType:datahub.dataset", + "urn:li:entityType:datahub.dataFlow" + ] + } + } + }, + "response": { + "status_codes": [400], + "exclude_regex_paths": [ + "root\\['message'\\]" + ], + "json": { + "error": "Validation Error" + } + } + }, + { + "request": { + "url": "/openapi/v3/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.nonExistentEntityTypes/propertyDefinition", + "description": "Create nonExistent EntityTypes structured property definition", + "params": { + "createIfNotExists": "false" + }, + "json": { + "value": { + "qualifiedName": "io.acryl.nonExistentEntityTypes", + "description": "Invalid EntityTypes", + "displayName": "Invalid EntityTypes", + "valueType": "urn:li:dataType:datahub.string", + "entityTypes": [ + "urn:li:entityType:datahub.dataset", + "urn:li:entityType:datahub.DOEST_NOT_EXIST" + ] + } + } + }, + "response": { + "status_codes": [400], + "exclude_regex_paths": [ + "root\\['message'\\]" + ], + "json": { + "error": "Validation Error" + } + } + }, + { + "request": { + "url": "/openapi/v3/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.invalidEntityTypeEntityTypes/propertyDefinition", + "description": "Create invalidEntity EntityTypes structured property definition", + "params": { + "createIfNotExists": "false" + }, + "json": { + "value": { + "qualifiedName": "io.acryl.invalidEntityTypeEntityTypes", + "valueType": "urn:li:dataType:datahub.string", + "description": "Invalid EntityTypes", + "displayName": "Invalid EntityTypes", + "entityTypes": [ + "urn:li:entityType:datahub.dataset", + "urn:li:container:datahub.dataFlow" + ] + } + } + }, + "response": { + "status_codes": [400], + "exclude_regex_paths": [ + "root\\['message'\\]" + ], + "json": { + "error": "Validation Error" + } + } + }, + { + "request": { + "url": "/openapi/v3/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.invalidUrnEntityTypes/propertyDefinition", + "description": "Create invalidUrn EntityTypes structured property definition", + "params": { + "createIfNotExists": "false" + }, + "json": { + "value": { + "qualifiedName": "io.acryl.invalidUrnEntityTypes", + "valueType": "urn:li:dataType:datahub.string", + "description": "Invalid EntityTypes", + "displayName": "Invalid EntityTypes", + "entityTypes": [ + "urn:li:entityType:datahub.dataset", + "urn:li:entityType:(datahub.dataFlow" + ] + } + } + }, + "response": { + "status_codes": [400], + "exclude_regex_paths": [ + "root\\['message'\\]" + ], + "json": { + "error": "Validation Error" + } + } } ] \ No newline at end of file