From 378ace104c4e8029f4034d61590ccd26a03f29e9 Mon Sep 17 00:00:00 2001 From: Pere Miquel Brull Date: Mon, 25 Aug 2025 17:45:39 +0200 Subject: [PATCH] MINOR - tag_usage performance (#22950) * MINOR WIP - tag_usage performance * Update EntityRepository.java remove unnecessary call * update query and indexes * Merge 1.9.3 migration queries from main branch - Add performance indexes for tag_usage and entity_relationship tables - Add entity relationship update for dataContract relations - Merge queries from both main and tag-usage-perf branches Co-authored-by: Pere Miquel Brull --------- Co-authored-by: claude[bot] <209825114+claude[bot]@users.noreply.github.com> Co-authored-by: Pere Miquel Brull --- .../native/1.9.3/mysql/schemaChanges.sql | 26 +++++++++++ .../native/1.9.3/postgres/schemaChanges.sql | 26 +++++++++++ .../service/jdbi3/CollectionDAO.java | 44 ++++++++----------- .../service/jdbi3/EntityRepository.java | 2 - 4 files changed, 70 insertions(+), 28 deletions(-) create mode 100644 bootstrap/sql/migrations/native/1.9.3/mysql/schemaChanges.sql create mode 100644 bootstrap/sql/migrations/native/1.9.3/postgres/schemaChanges.sql diff --git a/bootstrap/sql/migrations/native/1.9.3/mysql/schemaChanges.sql b/bootstrap/sql/migrations/native/1.9.3/mysql/schemaChanges.sql new file mode 100644 index 00000000000..7bae18311cf --- /dev/null +++ b/bootstrap/sql/migrations/native/1.9.3/mysql/schemaChanges.sql @@ -0,0 +1,26 @@ +ALTER TABLE tag_usage ADD INDEX idx_tag_usage_target_fqn_hash (targetFQNHash); +ALTER TABLE tag_usage ADD INDEX idx_tag_usage_tag_fqn_hash (tagFQNHash); +ALTER TABLE tag_usage ADD INDEX idx_tag_usage_source_target (source, targetFQNHash); +ALTER TABLE tag_usage ADD INDEX idx_tag_usage_target_source (targetFQNHash, source, tagFQN); +ALTER TABLE entity_relationship ADD INDEX idx_entity_relationship_from_relation (fromId, relation); +ALTER TABLE entity_relationship ADD INDEX idx_entity_relationship_to_relation (toId, relation); +ALTER TABLE entity_relationship ADD INDEX idx_entity_relationship_from_type_relation (fromId, fromEntity, relation); +ALTER TABLE entity_relationship ADD INDEX idx_entity_relationship_to_type_relation (toId, toEntity, relation); + +ALTER TABLE table_entity ADD INDEX idx_table_entity_deleted (deleted); +ALTER TABLE database_entity ADD INDEX idx_database_entity_deleted (deleted); +ALTER TABLE database_schema_entity ADD INDEX idx_database_schema_entity_deleted (deleted); +ALTER TABLE pipeline_entity ADD INDEX idx_pipeline_entity_deleted (deleted); +ALTER TABLE chart_entity ADD INDEX idx_chart_entity_deleted (deleted); +ALTER TABLE dashboard_entity ADD INDEX idx_dashboard_entity_deleted (deleted); +ALTER TABLE topic_entity ADD INDEX idx_topic_entity_deleted (deleted); +ALTER TABLE ml_model_entity ADD INDEX idx_ml_model_entity_deleted (deleted); +ALTER TABLE glossary_entity ADD INDEX idx_glossary_entity_deleted (deleted); +ALTER TABLE glossary_term_entity ADD INDEX idx_glossary_term_entity_deleted (deleted); +ALTER TABLE user_entity ADD INDEX idx_user_entity_deleted (deleted); +ALTER TABLE team_entity ADD INDEX idx_team_entity_deleted (deleted); + +-- Update the relation between table and dataContract to 0 (CONTAINS) +UPDATE entity_relationship +SET relation = 0 +WHERE fromEntity = 'table' AND toEntity = 'dataContract' AND relation = 10; diff --git a/bootstrap/sql/migrations/native/1.9.3/postgres/schemaChanges.sql b/bootstrap/sql/migrations/native/1.9.3/postgres/schemaChanges.sql new file mode 100644 index 00000000000..5fc9aca156f --- /dev/null +++ b/bootstrap/sql/migrations/native/1.9.3/postgres/schemaChanges.sql @@ -0,0 +1,26 @@ +CREATE INDEX IF NOT EXISTS idx_tag_usage_target_fqn_hash ON tag_usage(targetFQNHash); +CREATE INDEX IF NOT EXISTS idx_tag_usage_tag_fqn_hash ON tag_usage(tagFQNHash); +CREATE INDEX IF NOT EXISTS idx_tag_usage_source_target ON tag_usage(source, targetFQNHash); +CREATE INDEX IF NOT EXISTS idx_tag_usage_target_source ON tag_usage(targetFQNHash, source, tagFQN); +CREATE INDEX IF NOT EXISTS idx_entity_relationship_from_relation ON entity_relationship(fromId, relation); +CREATE INDEX IF NOT EXISTS idx_entity_relationship_to_relation ON entity_relationship(toId, relation); +CREATE INDEX IF NOT EXISTS idx_entity_relationship_from_type_relation ON entity_relationship(fromId, fromEntity, relation); +CREATE INDEX IF NOT EXISTS idx_entity_relationship_to_type_relation ON entity_relationship(toId, toEntity, relation); + +CREATE INDEX IF NOT EXISTS idx_table_entity_deleted ON table_entity(deleted); +CREATE INDEX IF NOT EXISTS idx_database_entity_deleted ON database_entity(deleted); +CREATE INDEX IF NOT EXISTS idx_database_schema_entity_deleted ON database_schema_entity(deleted); +CREATE INDEX IF NOT EXISTS idx_pipeline_entity_deleted ON pipeline_entity(deleted); +CREATE INDEX IF NOT EXISTS idx_chart_entity_deleted ON chart_entity(deleted); +CREATE INDEX IF NOT EXISTS idx_dashboard_entity_deleted ON dashboard_entity(deleted); +CREATE INDEX IF NOT EXISTS idx_topic_entity_deleted ON topic_entity(deleted); +CREATE INDEX IF NOT EXISTS idx_ml_model_entity_deleted ON ml_model_entity(deleted); +CREATE INDEX IF NOT EXISTS idx_glossary_entity_deleted ON glossary_entity(deleted); +CREATE INDEX IF NOT EXISTS idx_glossary_term_entity_deleted ON glossary_term_entity(deleted); +CREATE INDEX IF NOT EXISTS idx_user_entity_deleted ON user_entity(deleted); +CREATE INDEX IF NOT EXISTS idx_team_entity_deleted ON team_entity(deleted); + +-- Update the relation between table and dataContract to 0 (CONTAINS) +UPDATE entity_relationship +SET relation = 0 +WHERE fromEntity = 'table' AND toEntity = 'dataContract' AND relation = 10; diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/CollectionDAO.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/CollectionDAO.java index 5f0a68dbd46..0e18b54edcd 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/CollectionDAO.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/CollectionDAO.java @@ -4035,35 +4035,27 @@ public interface CollectionDAO { @ConnectionAwareSqlQuery( value = - "SELECT source, tagFQN, labelType, targetFQNHash, state, json " - + "FROM (" - + " SELECT tu.source, tu.tagFQN, tu.labelType, tu.targetFQNHash, tu.state, gterm.json " - + " FROM glossary_term_entity AS gterm " - + " JOIN tag_usage AS tu ON gterm.fqnHash = tu.tagFQNHash " - + " WHERE tu.source = 1 " - + " UNION ALL " - + " SELECT tu.source, tu.tagFQN, tu.labelType, tu.targetFQNHash, tu.state, ta.json " - + " FROM tag AS ta " - + " JOIN tag_usage AS tu ON ta.fqnHash = tu.tagFQNHash " - + " WHERE tu.source = 0 " - + ") AS combined_data " - + "WHERE combined_data.targetFQNHash LIKE :targetFQNHash", + "SELECT tu.source, tu.tagFQN, tu.labelType, tu.targetFQNHash, tu.state, " + + "CASE " + + " WHEN tu.source = 1 THEN gterm.json " + + " WHEN tu.source = 0 THEN ta.json " + + "END as json " + + "FROM tag_usage tu " + + "LEFT JOIN glossary_term_entity gterm ON tu.source = 1 AND gterm.fqnHash = tu.tagFQNHash " + + "LEFT JOIN tag ta ON tu.source = 0 AND ta.fqnHash = tu.tagFQNHash " + + "WHERE tu.targetFQNHash LIKE :targetFQNHash", connectionType = MYSQL) @ConnectionAwareSqlQuery( value = - "SELECT source, tagFQN, labelType, targetFQNHash, state, json " - + "FROM (" - + " SELECT tu.source, tu.tagFQN, tu.labelType, tu.targetFQNHash, tu.state, gterm.json " - + " FROM glossary_term_entity AS gterm " - + " JOIN tag_usage AS tu ON gterm.fqnHash = tu.tagFQNHash " - + " WHERE tu.source = 1 " - + " UNION ALL " - + " SELECT tu.source, tu.tagFQN, tu.labelType, tu.targetFQNHash, tu.state, ta.json " - + " FROM tag AS ta " - + " JOIN tag_usage AS tu ON ta.fqnHash = tu.tagFQNHash " - + " WHERE tu.source = 0 " - + ") AS combined_data " - + "WHERE combined_data.targetFQNHash LIKE :targetFQNHash", + "SELECT tu.source, tu.tagFQN, tu.labelType, tu.targetFQNHash, tu.state, " + + "CASE " + + " WHEN tu.source = 1 THEN gterm.json " + + " WHEN tu.source = 0 THEN ta.json " + + "END as json " + + "FROM tag_usage tu " + + "LEFT JOIN glossary_term_entity gterm ON tu.source = 1 AND gterm.fqnHash = tu.tagFQNHash " + + "LEFT JOIN tag ta ON tu.source = 0 AND ta.fqnHash = tu.tagFQNHash " + + "WHERE tu.targetFQNHash LIKE :targetFQNHash", connectionType = POSTGRES) @RegisterRowMapper(TagLabelRowMapperWithTargetFqnHash.class) List> getTagsInternalByPrefix( diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java index 6cec00c1437..6687f66390c 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java @@ -1403,8 +1403,6 @@ public abstract class EntityRepository { entityUpdater.update(); } if (entityUpdater.fieldsChanged()) { - // Refresh the entity fields from the database after the update - setFieldsInternal(updated, patchFields); setInheritedFields(updated, patchFields); // Restore inherited fields after a change } updated.setChangeDescription(entityUpdater.getIncrementalChangeDescription());