package com.linkedin.metadata.models; import com.linkedin.data.schema.ArrayDataSchema; import com.linkedin.data.schema.DataSchema; import com.linkedin.data.schema.RecordDataSchema; import com.linkedin.data.schema.TyperefDataSchema; import com.linkedin.data.schema.UnionDataSchema; import com.linkedin.data.schema.annotation.DataSchemaRichContextTraverser; import com.linkedin.data.schema.annotation.PegasusSchemaAnnotationHandlerImpl; import com.linkedin.data.schema.annotation.SchemaAnnotationHandler; import com.linkedin.data.schema.annotation.SchemaAnnotationProcessor; import com.linkedin.metadata.models.annotation.AspectAnnotation; import com.linkedin.metadata.models.annotation.EntityAnnotation; import com.linkedin.metadata.models.annotation.RelationshipAnnotation; import com.linkedin.metadata.models.annotation.SearchableAnnotation; import java.util.ArrayList; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.stream.Collectors; import javax.annotation.Nonnull; import lombok.extern.slf4j.Slf4j; @Slf4j public class EntitySpecBuilder { private static final String URN_FIELD_NAME = "urn"; private static final String ASPECTS_FIELD_NAME = "aspects"; public static SchemaAnnotationHandler _searchHandler = new PegasusSchemaAnnotationHandlerImpl(SearchableAnnotation.ANNOTATION_NAME); public static SchemaAnnotationHandler _relationshipHandler = new PegasusSchemaAnnotationHandlerImpl(RelationshipAnnotation.ANNOTATION_NAME); private final AnnotationExtractionMode _extractionMode; private final Set _entityNames = new HashSet<>(); private final Set _relationshipFieldSpecs = new HashSet<>(); public enum AnnotationExtractionMode { /** * Extract all annotations types, the default. */ DEFAULT, /** * Skip annotations on aspect record fields, only * parse entity + aspect annotations. */ IGNORE_ASPECT_FIELDS } public EntitySpecBuilder() { this(AnnotationExtractionMode.DEFAULT); } public EntitySpecBuilder(final AnnotationExtractionMode extractionMode) { _extractionMode = extractionMode; } public List buildEntitySpecs(@Nonnull final DataSchema snapshotSchema) { final UnionDataSchema snapshotUnionSchema = (UnionDataSchema) snapshotSchema.getDereferencedDataSchema(); final List unionMembers = snapshotUnionSchema.getMembers(); final List entitySpecs = new ArrayList<>(); for (final UnionDataSchema.Member member : unionMembers) { final EntitySpec entitySpec = buildEntitySpec(member.getType()); if (entitySpec != null) { entitySpecs.add(entitySpec); } } // Now validate that all relationships point to valid entities. for (final RelationshipFieldSpec spec : _relationshipFieldSpecs) { if (!_entityNames.containsAll(spec.getValidDestinationTypes().stream().map(String::toLowerCase).collect( Collectors.toList()))) { failValidation( String.format("Found invalid relationship with name %s at path %s. Invalid entityType(s) provided.", spec.getRelationshipName(), spec.getPath().toString())); } } return entitySpecs; } public EntitySpec buildEntitySpec(@Nonnull final DataSchema entitySnapshotSchema) { // 0. Validate the Snapshot definition final RecordDataSchema entitySnapshotRecordSchema = validateSnapshot(entitySnapshotSchema); // 1. Parse information about the entity from the "entity" annotation. final Object entityAnnotationObj = entitySnapshotRecordSchema.getProperties().get(EntityAnnotation.ANNOTATION_NAME); if (entityAnnotationObj != null) { EntityAnnotation entityAnnotation = EntityAnnotation.fromSchemaProperty(entityAnnotationObj, entitySnapshotRecordSchema.getFullName()); final ArrayDataSchema aspectArraySchema = (ArrayDataSchema) entitySnapshotRecordSchema.getField(ASPECTS_FIELD_NAME).getType().getDereferencedDataSchema(); final UnionDataSchema aspectUnionSchema = (UnionDataSchema) aspectArraySchema.getItems().getDereferencedDataSchema(); final List unionMembers = aspectUnionSchema.getMembers(); final List aspectSpecs = new ArrayList<>(); for (final UnionDataSchema.Member member : unionMembers) { final AspectSpec spec = buildAspectSpec(member.getType()); aspectSpecs.add(spec); } final EntitySpec entitySpec = new EntitySpec( aspectSpecs, entityAnnotation, entitySnapshotRecordSchema, (TyperefDataSchema) aspectArraySchema.getItems()); validateEntitySpec(entitySpec); return entitySpec; } failValidation(String.format("Could not build entity spec for entity with name %s. Missing @%s annotation.", entitySnapshotRecordSchema.getName(), EntityAnnotation.ANNOTATION_NAME)); return null; } AspectSpec buildAspectSpec(@Nonnull final DataSchema aspectDataSchema) { final RecordDataSchema aspectRecordSchema = validateAspect(aspectDataSchema); final Object aspectAnnotationObj = aspectRecordSchema.getProperties().get(AspectAnnotation.ANNOTATION_NAME); if (aspectAnnotationObj != null) { final AspectAnnotation aspectAnnotation = AspectAnnotation.fromSchemaProperty(aspectAnnotationObj, aspectRecordSchema.getFullName()); if (AnnotationExtractionMode.IGNORE_ASPECT_FIELDS.equals(_extractionMode)) { // Short Circuit. return new AspectSpec( aspectAnnotation, Collections.emptyList(), Collections.emptyList(), aspectRecordSchema); } final SchemaAnnotationProcessor.SchemaAnnotationProcessResult processedSearchResult = SchemaAnnotationProcessor.process(Collections.singletonList(_searchHandler), aspectRecordSchema, new SchemaAnnotationProcessor.AnnotationProcessOption()); final SchemaAnnotationProcessor.SchemaAnnotationProcessResult processedRelationshipResult = SchemaAnnotationProcessor.process(Collections.singletonList(_relationshipHandler), aspectRecordSchema, new SchemaAnnotationProcessor.AnnotationProcessOption()); // Extract Searchable Field Specs final SearchableFieldSpecExtractor searchableFieldSpecExtractor = new SearchableFieldSpecExtractor(); final DataSchemaRichContextTraverser searchableFieldSpecTraverser = new DataSchemaRichContextTraverser(searchableFieldSpecExtractor); searchableFieldSpecTraverser.traverse(processedSearchResult.getResultSchema()); // Extract Relationship Field Specs final RelationshipFieldSpecExtractor relationshipFieldSpecExtractor = new RelationshipFieldSpecExtractor(); final DataSchemaRichContextTraverser relationshipFieldSpecTraverser = new DataSchemaRichContextTraverser(relationshipFieldSpecExtractor); relationshipFieldSpecTraverser.traverse(processedRelationshipResult.getResultSchema()); // Capture the list of entity names from relationships extracted. _relationshipFieldSpecs.addAll(relationshipFieldSpecExtractor.getSpecs()); return new AspectSpec( aspectAnnotation, searchableFieldSpecExtractor.getSpecs(), relationshipFieldSpecExtractor.getSpecs(), aspectRecordSchema); } failValidation(String.format("Could not build aspect spec for aspect with name %s. Missing @Aspect annotation.", aspectRecordSchema.getName())); return null; } private void validateEntitySpec(EntitySpec entitySpec) { if (entitySpec.getKeyAspectSpec() == null) { failValidation(String.format("Did not find required Key Aspect with name %s in aspects for Entity %s in list of aspects.", entitySpec.getKeyAspectName(), entitySpec.getName())); } validateKeyAspect(entitySpec.getKeyAspectSpec()); // Validate aspect specs Set aspectNames = new HashSet<>(); for (final AspectSpec aspectSpec : entitySpec.getAspectSpecs()) { if (aspectNames.contains(aspectSpec.getName())) { failValidation(String.format("Could not build entity spec for entity with name %s." + " Found multiple Aspects with the same name %s", entitySpec.getName(), aspectSpec.getName())); } aspectNames.add(aspectSpec.getName()); } // Validate entity name if (_entityNames.contains(entitySpec.getName().toLowerCase())) { // Duplicate entity found. failValidation(String.format("Could not build entity spec for entity with name %s." + " Found multiple Entity Snapshots with the same name.", entitySpec.getName())); } _entityNames.add(entitySpec.getName().toLowerCase()); } private RecordDataSchema validateSnapshot(@Nonnull final DataSchema entitySnapshotSchema) { // 0. Validate that schema is a Record if (entitySnapshotSchema.getType() != DataSchema.Type.RECORD) { failValidation(String.format("Failed to validate entity snapshot schema of type %s. Schema must be of record type.", entitySnapshotSchema.getType().toString())); } final RecordDataSchema entitySnapshotRecordSchema = (RecordDataSchema) entitySnapshotSchema; // 1. Validate Urn field if (entitySnapshotRecordSchema.getField(URN_FIELD_NAME) == null || entitySnapshotRecordSchema.getField(URN_FIELD_NAME).getType().getDereferencedType() != DataSchema.Type.STRING) { failValidation(String.format("Failed to validate entity snapshot schema with name %s. Invalid urn field.", entitySnapshotRecordSchema.getName())); } // 2. Validate Aspect Array if (entitySnapshotRecordSchema.getField(ASPECTS_FIELD_NAME) == null || entitySnapshotRecordSchema.getField(ASPECTS_FIELD_NAME).getType().getDereferencedType() != DataSchema.Type.ARRAY) { failValidation(String.format("Failed to validate entity snapshot schema with name %s. Invalid aspects field found. " + "'aspects' should be an array of union type.", entitySnapshotRecordSchema.getName())); } // 3. Validate Aspect Union final ArrayDataSchema aspectArray = (ArrayDataSchema) entitySnapshotRecordSchema.getField(ASPECTS_FIELD_NAME) .getType() .getDereferencedDataSchema(); if (aspectArray.getItems().getType() != DataSchema.Type.TYPEREF || aspectArray.getItems().getDereferencedType() != DataSchema.Type.UNION) { failValidation(String.format("Failed to validate entity snapshot schema with name %s. Invalid aspects field field. " + "'aspects' should be an array of union type.", entitySnapshotRecordSchema.getName())); } return entitySnapshotRecordSchema; } private RecordDataSchema validateAspect(@Nonnull final DataSchema aspectSchema) { // Validate that schema is a Record if (aspectSchema.getType() != DataSchema.Type.RECORD) { failValidation(String.format("Failed to validate aspect schema of type %s. Schema must be of record type.", aspectSchema.getType().toString())); } return (RecordDataSchema) aspectSchema; } private void validateKeyAspect(@Nonnull final AspectSpec keyAspect) { // Validate that schema is a Record RecordDataSchema schema = keyAspect.getPegasusSchema(); // Validate that each field is a string or enum. for (RecordDataSchema.Field field : schema.getFields()) { if (!DataSchema.Type.STRING.equals(field.getType().getDereferencedType()) && !DataSchema.Type.ENUM.equals(field.getType().getDereferencedType()) ) { failValidation( String.format("Failed to validate key aspect nameed %s. Key " + "aspects must only contain fields of STRING or ENUM type. Found %s.", keyAspect.getName(), field.getType().toString())); } } } private void failValidation(@Nonnull final String message) { throw new ModelValidationException(message); } }