From c2928a1a6d3b3918c03659ce647ed491bdd9b885 Mon Sep 17 00:00:00 2001 From: RyanHolstien Date: Tue, 8 Jul 2025 10:12:23 -0500 Subject: [PATCH] fix(mysql): modifies hashing and equals behavior for primary keys to match mysql behavior (#13984) --- .../metadata/entity/ebean/EbeanAspectDao.java | 5 +-- .../metadata/entity/ebean/EbeanAspectV2.java | 31 ++++++++++++++-- .../entity/EbeanEntityServiceTest.java | 35 +++++++++++++++++++ 3 files changed, 66 insertions(+), 5 deletions(-) diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java index 766c31b183..13b3a1cc6e 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java @@ -303,14 +303,15 @@ public class EbeanAspectDao implements AspectDao, AspectMigrationsDao { int position = 0; + List keyList = new ArrayList<>(keys); final int totalPageCount = QueryUtils.getTotalPageCount(keys.size(), keysCount); final List finalResult = - batchGetSelectString(new ArrayList<>(keys), keysCount, position, forUpdate); + batchGetSelectString(keyList, keysCount, position, forUpdate); while (QueryUtils.hasMore(position, keysCount, totalPageCount)) { position += keysCount; final List oneStatementResult = - batchGetSelectString(new ArrayList<>(keys), keysCount, position, forUpdate); + batchGetSelectString(keyList, keysCount, position, forUpdate); finalResult.addAll(oneStatementResult); } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectV2.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectV2.java index 82d6483533..af9356f42c 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectV2.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectV2.java @@ -12,9 +12,9 @@ import jakarta.persistence.Lob; import jakarta.persistence.Table; import java.io.Serializable; import java.sql.Timestamp; +import java.util.Objects; import javax.annotation.Nonnull; import lombok.AllArgsConstructor; -import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.NoArgsConstructor; import lombok.Setter; @@ -43,13 +43,17 @@ public class EbeanAspectV2 extends Model { /** Key for an aspect in the table. */ @Embeddable @Getter - @AllArgsConstructor @NoArgsConstructor - @EqualsAndHashCode public static class PrimaryKey implements Serializable { private static final long serialVersionUID = 1L; + public PrimaryKey(@Nonnull String urn, @Nonnull String aspect, long version) { + this.urn = urn.stripTrailing(); + this.aspect = aspect.stripTrailing(); + this.version = version; + } + @Nonnull @Index @Column(name = URN_COLUMN, length = 500, nullable = false) @@ -71,6 +75,27 @@ public class EbeanAspectV2 extends Model { public EntityAspectIdentifier toAspectIdentifier() { return new EntityAspectIdentifier(getUrn(), getAspect(), getVersion()); } + + // Custom Equals and Hash code that trims to handle MySQL PAD SPACE: + // https://dev.mysql.com/doc/refman/8.4/en/charset-binary-collations.html#charset-binary-collations-trailing-space-comparisons + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + PrimaryKey that = (PrimaryKey) o; + return version == that.version + && Objects.equals(urn.stripTrailing(), that.urn.stripTrailing()) + && Objects.equals(aspect.stripTrailing(), that.aspect.stripTrailing()); + } + + @Override + public int hashCode() { + return Objects.hash(urn.stripTrailing(), aspect.stripTrailing(), version); + } } @Nonnull @EmbeddedId @Index protected PrimaryKey key; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java index cec87c730b..eb220807b1 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java @@ -70,6 +70,7 @@ import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.IntStream; import org.apache.commons.lang3.tuple.Triple; +import org.testcontainers.shaded.com.google.common.collect.ImmutableSet; import org.testng.annotations.BeforeClass; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; @@ -482,6 +483,40 @@ public class EbeanEntityServiceTest "Expected version 0 with systemMeta version 3 accounting for the the collision"); } + // NOTE: This is not currently super useful because H2 always treats spaces as significant + @Test + public void testSystemMetadataDuplicateKeyWhitespace() throws Exception { + Urn entityUrn = UrnUtils.getUrn("urn:li:corpuser:duplicateKeyTest"); + SystemMetadata systemMetadata = AspectGenerationUtils.createSystemMetadata(); + ChangeItemImpl item = + ChangeItemImpl.builder() + .urn(entityUrn) + .aspectName(STATUS_ASPECT_NAME) + .recordTemplate(new Status().setRemoved(true)) + .systemMetadata(systemMetadata) + .auditStamp(TEST_AUDIT_STAMP) + .build(TestOperationContexts.emptyActiveUsersAspectRetriever(null)); + _entityServiceImpl.ingestAspects( + opContext, + AspectsBatchImpl.builder() + .retrieverContext(opContext.getRetrieverContext()) + .items(List.of(item)) + .build(opContext), + false, + true); + + Urn entityUrnWhitespace = UrnUtils.getUrn(entityUrn + " "); + Map> envelopedAspects = + _entityServiceImpl.getLatestEnvelopedAspects( + opContext, + ImmutableSet.of(entityUrn, entityUrnWhitespace), + ImmutableSet.of(STATUS_ASPECT_NAME), + false); + + assertEquals(envelopedAspects.get(entityUrn).size(), 1); + assertEquals(envelopedAspects.get(entityUrnWhitespace).size(), 0); + } + @Test public void dataGeneratorThreadingTest() { DataGenerator dataGenerator = new DataGenerator(opContext, _entityServiceImpl);