MINOR - tag_usage performance (#22950)

* MINOR WIP - tag_usage performance

* Update EntityRepository.java

remove unnecessary call

* update query and indexes

* Merge 1.9.3 migration queries from main branch

- Add performance indexes for tag_usage and entity_relationship tables
- Add entity relationship update for dataContract relations
- Merge queries from both main and tag-usage-perf branches

Co-authored-by: Pere Miquel Brull <pmbrull@users.noreply.github.com>

---------

Co-authored-by: claude[bot] <209825114+claude[bot]@users.noreply.github.com>
Co-authored-by: Pere Miquel Brull <pmbrull@users.noreply.github.com>
This commit is contained in:
Pere Miquel Brull 2025-08-25 17:45:39 +02:00
parent 5fefbe3ca1
commit 378ace104c
4 changed files with 70 additions and 28 deletions

View File

@ -0,0 +1,26 @@
ALTER TABLE tag_usage ADD INDEX idx_tag_usage_target_fqn_hash (targetFQNHash);
ALTER TABLE tag_usage ADD INDEX idx_tag_usage_tag_fqn_hash (tagFQNHash);
ALTER TABLE tag_usage ADD INDEX idx_tag_usage_source_target (source, targetFQNHash);
ALTER TABLE tag_usage ADD INDEX idx_tag_usage_target_source (targetFQNHash, source, tagFQN);
ALTER TABLE entity_relationship ADD INDEX idx_entity_relationship_from_relation (fromId, relation);
ALTER TABLE entity_relationship ADD INDEX idx_entity_relationship_to_relation (toId, relation);
ALTER TABLE entity_relationship ADD INDEX idx_entity_relationship_from_type_relation (fromId, fromEntity, relation);
ALTER TABLE entity_relationship ADD INDEX idx_entity_relationship_to_type_relation (toId, toEntity, relation);
ALTER TABLE table_entity ADD INDEX idx_table_entity_deleted (deleted);
ALTER TABLE database_entity ADD INDEX idx_database_entity_deleted (deleted);
ALTER TABLE database_schema_entity ADD INDEX idx_database_schema_entity_deleted (deleted);
ALTER TABLE pipeline_entity ADD INDEX idx_pipeline_entity_deleted (deleted);
ALTER TABLE chart_entity ADD INDEX idx_chart_entity_deleted (deleted);
ALTER TABLE dashboard_entity ADD INDEX idx_dashboard_entity_deleted (deleted);
ALTER TABLE topic_entity ADD INDEX idx_topic_entity_deleted (deleted);
ALTER TABLE ml_model_entity ADD INDEX idx_ml_model_entity_deleted (deleted);
ALTER TABLE glossary_entity ADD INDEX idx_glossary_entity_deleted (deleted);
ALTER TABLE glossary_term_entity ADD INDEX idx_glossary_term_entity_deleted (deleted);
ALTER TABLE user_entity ADD INDEX idx_user_entity_deleted (deleted);
ALTER TABLE team_entity ADD INDEX idx_team_entity_deleted (deleted);
-- Update the relation between table and dataContract to 0 (CONTAINS)
UPDATE entity_relationship
SET relation = 0
WHERE fromEntity = 'table' AND toEntity = 'dataContract' AND relation = 10;

View File

@ -0,0 +1,26 @@
CREATE INDEX IF NOT EXISTS idx_tag_usage_target_fqn_hash ON tag_usage(targetFQNHash);
CREATE INDEX IF NOT EXISTS idx_tag_usage_tag_fqn_hash ON tag_usage(tagFQNHash);
CREATE INDEX IF NOT EXISTS idx_tag_usage_source_target ON tag_usage(source, targetFQNHash);
CREATE INDEX IF NOT EXISTS idx_tag_usage_target_source ON tag_usage(targetFQNHash, source, tagFQN);
CREATE INDEX IF NOT EXISTS idx_entity_relationship_from_relation ON entity_relationship(fromId, relation);
CREATE INDEX IF NOT EXISTS idx_entity_relationship_to_relation ON entity_relationship(toId, relation);
CREATE INDEX IF NOT EXISTS idx_entity_relationship_from_type_relation ON entity_relationship(fromId, fromEntity, relation);
CREATE INDEX IF NOT EXISTS idx_entity_relationship_to_type_relation ON entity_relationship(toId, toEntity, relation);
CREATE INDEX IF NOT EXISTS idx_table_entity_deleted ON table_entity(deleted);
CREATE INDEX IF NOT EXISTS idx_database_entity_deleted ON database_entity(deleted);
CREATE INDEX IF NOT EXISTS idx_database_schema_entity_deleted ON database_schema_entity(deleted);
CREATE INDEX IF NOT EXISTS idx_pipeline_entity_deleted ON pipeline_entity(deleted);
CREATE INDEX IF NOT EXISTS idx_chart_entity_deleted ON chart_entity(deleted);
CREATE INDEX IF NOT EXISTS idx_dashboard_entity_deleted ON dashboard_entity(deleted);
CREATE INDEX IF NOT EXISTS idx_topic_entity_deleted ON topic_entity(deleted);
CREATE INDEX IF NOT EXISTS idx_ml_model_entity_deleted ON ml_model_entity(deleted);
CREATE INDEX IF NOT EXISTS idx_glossary_entity_deleted ON glossary_entity(deleted);
CREATE INDEX IF NOT EXISTS idx_glossary_term_entity_deleted ON glossary_term_entity(deleted);
CREATE INDEX IF NOT EXISTS idx_user_entity_deleted ON user_entity(deleted);
CREATE INDEX IF NOT EXISTS idx_team_entity_deleted ON team_entity(deleted);
-- Update the relation between table and dataContract to 0 (CONTAINS)
UPDATE entity_relationship
SET relation = 0
WHERE fromEntity = 'table' AND toEntity = 'dataContract' AND relation = 10;

View File

@ -4035,35 +4035,27 @@ public interface CollectionDAO {
@ConnectionAwareSqlQuery(
value =
"SELECT source, tagFQN, labelType, targetFQNHash, state, json "
+ "FROM ("
+ " SELECT tu.source, tu.tagFQN, tu.labelType, tu.targetFQNHash, tu.state, gterm.json "
+ " FROM glossary_term_entity AS gterm "
+ " JOIN tag_usage AS tu ON gterm.fqnHash = tu.tagFQNHash "
+ " WHERE tu.source = 1 "
+ " UNION ALL "
+ " SELECT tu.source, tu.tagFQN, tu.labelType, tu.targetFQNHash, tu.state, ta.json "
+ " FROM tag AS ta "
+ " JOIN tag_usage AS tu ON ta.fqnHash = tu.tagFQNHash "
+ " WHERE tu.source = 0 "
+ ") AS combined_data "
+ "WHERE combined_data.targetFQNHash LIKE :targetFQNHash",
"SELECT tu.source, tu.tagFQN, tu.labelType, tu.targetFQNHash, tu.state, "
+ "CASE "
+ " WHEN tu.source = 1 THEN gterm.json "
+ " WHEN tu.source = 0 THEN ta.json "
+ "END as json "
+ "FROM tag_usage tu "
+ "LEFT JOIN glossary_term_entity gterm ON tu.source = 1 AND gterm.fqnHash = tu.tagFQNHash "
+ "LEFT JOIN tag ta ON tu.source = 0 AND ta.fqnHash = tu.tagFQNHash "
+ "WHERE tu.targetFQNHash LIKE :targetFQNHash",
connectionType = MYSQL)
@ConnectionAwareSqlQuery(
value =
"SELECT source, tagFQN, labelType, targetFQNHash, state, json "
+ "FROM ("
+ " SELECT tu.source, tu.tagFQN, tu.labelType, tu.targetFQNHash, tu.state, gterm.json "
+ " FROM glossary_term_entity AS gterm "
+ " JOIN tag_usage AS tu ON gterm.fqnHash = tu.tagFQNHash "
+ " WHERE tu.source = 1 "
+ " UNION ALL "
+ " SELECT tu.source, tu.tagFQN, tu.labelType, tu.targetFQNHash, tu.state, ta.json "
+ " FROM tag AS ta "
+ " JOIN tag_usage AS tu ON ta.fqnHash = tu.tagFQNHash "
+ " WHERE tu.source = 0 "
+ ") AS combined_data "
+ "WHERE combined_data.targetFQNHash LIKE :targetFQNHash",
"SELECT tu.source, tu.tagFQN, tu.labelType, tu.targetFQNHash, tu.state, "
+ "CASE "
+ " WHEN tu.source = 1 THEN gterm.json "
+ " WHEN tu.source = 0 THEN ta.json "
+ "END as json "
+ "FROM tag_usage tu "
+ "LEFT JOIN glossary_term_entity gterm ON tu.source = 1 AND gterm.fqnHash = tu.tagFQNHash "
+ "LEFT JOIN tag ta ON tu.source = 0 AND ta.fqnHash = tu.tagFQNHash "
+ "WHERE tu.targetFQNHash LIKE :targetFQNHash",
connectionType = POSTGRES)
@RegisterRowMapper(TagLabelRowMapperWithTargetFqnHash.class)
List<Pair<String, TagLabel>> getTagsInternalByPrefix(

View File

@ -1403,8 +1403,6 @@ public abstract class EntityRepository<T extends EntityInterface> {
entityUpdater.update();
}
if (entityUpdater.fieldsChanged()) {
// Refresh the entity fields from the database after the update
setFieldsInternal(updated, patchFields);
setInheritedFields(updated, patchFields); // Restore inherited fields after a change
}
updated.setChangeDescription(entityUpdater.getIncrementalChangeDescription());