Sriharsha Chintalapani a846d3ad84
Improve Performance, Add Redis as optional cache (#23054)
* MINOR - cache settings YAML

* MINOR - cache settings YAML

* Remove Redis; batch fetch all realtions in one query

* Update generated TypeScript types

* Add advanced configs

* Fix tests

* Fix tests

* release 1.9.5

* fix include

* Fix Indexing strategy, add HikariCP configs

* add HikariCP configs to test config

* Add AWS Aurora related configs

* remove vacuum and relax defaults

* fix includes

* Use index

* Add Latency breakdowns on server side

* Update generated TypeScript types

* Add Latency breakdowns on server side

* Propagate fields properly

* Add Async Search calls

* Add Jetty Metrics

* disable gzip

* AWS JDBC Driver

* add pctile

* Add method to endpoint pctile

* handle patch properly in metrics

* tests

* update metrics

* bump flyway

* fix jetty metric handler

* default to postgres

* default to postgres

* ConnectionType with amazon

* Update connection

* Update connection

* Add Redis Cache support for all entities, CacheWarmupApp

* Fix aurora driver settings

* Fix aurora driver settings

* Fix aurora driver settings

* Fix aurora driver settings

* revert config

* Handle ReadOnly

* update config

* Revert "update config"

This reverts commit 9f5751c356de474b8b79797cbbd9d7650a1bb349.

* Revert "Handle ReadOnly"

This reverts commit e0c906365109ae014e253cdba9e31df935208abb.

* Revert "revert config"

This reverts commit e79c3d2d84051320d4864ddf9638dcc6cdef1980.

* Revert "Fix aurora driver settings"

This reverts commit 463e6ebf4b4e8209a7dbb3bf7818544fa68dca5f.

* Revert "Fix aurora driver settings"

This reverts commit 515d22b0e0d3b979cb2af7b87a977b6d4290e38f.

* Revert "Fix aurora driver settings"

This reverts commit 0a1226e9e1e5fc6e42499560373e26004b410e5e.

* Revert "Fix aurora driver settings"

This reverts commit d959976b1cadaace1a09da0ca3d78df490c5a689.

* Add Redis Cache support for all entities, CacheWarmupApp

* Update generated TypeScript types

* Redis SSL

* redis auth

* Fix cache warmup and lookup if cahce fails

* Fix cache of relations

* try search cache

* fix search cache

* fix cache response

* Revert "fix cache response"

This reverts commit 14602dc8c59c5ee6b9b21e20f6d1e3a01ea865a0.

* Revert "fix search cache"

This reverts commit 8eaa76bd7ef9d635e6a5b6d3cba4262057f9d4cd.

* Revert "try search cache"

This reverts commit 0582a1dc03a4353efb3d635957fd4df5dd626f00.

* clean commits

* clean drops

* clean

* clean

* clean

* remove hosts array for ES

* Update generated TypeScript types

* remove hosts array for ES

* format

* remove hosts array for ES

* Remove Embeddings for Table Index

* metrics improvements

* MINOR - Report status for tests that blow up

* Revert "MINOR - Report status for tests that blow up"

This reverts commit e831ac04e6e79aa04e6ccff259d1c5fc852c6ba3.

* Fix tests

* Address comments

* remove unused code

* fix postgres schema migration

* fix tests and improve caching startegy

* fix tests, making search sync

* Update generated TypeScript types

* Fix Failures due to merge conflicts

* Fix Tag Failures

* Fix Retryable Exception

---------

Co-authored-by: Pere Miquel Brull <peremiquelbrull@gmail.com>
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
Co-authored-by: mohitdeuex <mohit.y@deuexsolutions.com>
Co-authored-by: Mohit Yadav <105265192+mohityadav766@users.noreply.github.com>
2025-10-28 06:29:31 +05:30

188 lines
7.7 KiB
SQL

-- Performance optimization for tag_usage prefix queries
ALTER TABLE tag_usage
ADD COLUMN IF NOT EXISTS targetfqnhash_lower text
GENERATED ALWAYS AS (lower(targetFQNHash)) STORED;
ALTER TABLE tag_usage
ADD COLUMN IF NOT EXISTS tagfqn_lower text
GENERATED ALWAYS AS (lower(tagFQN)) STORED;
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_tag_usage_target_prefix_covering
ON tag_usage (source, targetfqnhash_lower text_pattern_ops)
INCLUDE (tagFQN, labelType, state)
WHERE state = 1; -- Only active tags
-- For exact match queries on targetFQNHash
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_tag_usage_target_exact
ON tag_usage (source, targetFQNHash, state)
INCLUDE (tagFQN, labelType);
-- For tagFQN prefix searches if needed
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_tag_usage_tagfqn_prefix_covering
ON tag_usage (source, tagfqn_lower text_pattern_ops)
INCLUDE (targetFQNHash, labelType, state)
WHERE state = 1;
-- For JOIN operations with classification and tag tables
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_tag_usage_join_source
ON tag_usage (tagFQNHash, source)
INCLUDE (targetFQNHash, tagFQN, labelType, state)
WHERE state = 1;
-- Only create if you need %contains% searches
CREATE EXTENSION IF NOT EXISTS pg_trgm;
-- GIN index for substring matches (LIKE '%foo%')
CREATE INDEX CONCURRENTLY IF NOT EXISTS gin_tag_usage_targetfqn_trgm
ON tag_usage USING GIN (targetFQNHash gin_trgm_ops)
WHERE state = 1;
-- Optimize autovacuum for tag_usage (high update frequency)
ALTER TABLE tag_usage SET (
autovacuum_vacuum_scale_factor = 0.05, -- Vacuum at 5% dead rows (default 20%)
autovacuum_analyze_scale_factor = 0.02, -- Analyze at 2% changed rows (default 10%)
autovacuum_vacuum_threshold = 50, -- Minimum rows before vacuum
autovacuum_analyze_threshold = 50, -- Minimum rows before analyze
fillfactor = 90 -- Leave 10% free space for HOT updates
);
-- Increase statistics target for frequently queried columns
ALTER TABLE tag_usage ALTER COLUMN targetFQNHash SET STATISTICS 1000;
ALTER TABLE tag_usage ALTER COLUMN targetfqnhash_lower SET STATISTICS 1000;
ALTER TABLE tag_usage ALTER COLUMN tagFQN SET STATISTICS 500;
ALTER TABLE tag_usage ALTER COLUMN tagfqn_lower SET STATISTICS 500;
ALTER TABLE tag_usage ALTER COLUMN source SET STATISTICS 100;
-- Add index for efficient bulk term count queries
-- The bulkGetTermCounts query uses: WHERE classificationHash IN (...) AND deleted = FALSE
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_tag_classification_deleted
ON tag (classificationHash, deleted);
-- Create new indexes with deleted column for efficient filtering
-- Using partial indexes (WHERE deleted = FALSE) for even better performance
CREATE INDEX IF NOT EXISTS idx_entity_relationship_from_deleted
ON entity_relationship(fromId, fromEntity, relation)
INCLUDE (toId, toEntity, relation)
WHERE deleted = FALSE;
CREATE INDEX IF NOT EXISTS idx_entity_relationship_to_deleted
ON entity_relationship(toId, toEntity, relation)
INCLUDE (fromId, fromEntity, relation)
WHERE deleted = FALSE;
-- Also add indexes for the specific queries that include fromEntity/toEntity filters
CREATE INDEX IF NOT EXISTS idx_entity_relationship_from_typed
ON entity_relationship(toId, toEntity, relation, fromEntity)
INCLUDE (fromEntity, toEntity)
WHERE deleted = FALSE;
-- Index for bidirectional lookups (used in UNION queries)
CREATE INDEX IF NOT EXISTS idx_entity_relationship_bidirectional
ON entity_relationship(fromId, toId, relation)
WHERE deleted = FALSE;
-- Add "Data Product Domain Validation" rule to existing entityRulesSettings configuration
UPDATE openmetadata_settings
SET json = jsonb_set(
json,
'{entitySemantics}',
(json->'entitySemantics') || jsonb_build_object(
'name', 'Data Product Domain Validation',
'description', 'Validates that Data Products assigned to an entity match the entity''s domains.',
'rule', '{"validateDataProductDomainMatch":[{"var":"dataProducts"},{"var":"domains"}]}',
'enabled', true,
'provider', 'system'
)::jsonb,
true
)
WHERE configtype = 'entityRulesSettings'
AND json->'entitySemantics' IS NOT NULL
AND NOT EXISTS (
SELECT 1
FROM jsonb_array_elements(json->'entitySemantics') AS rule
WHERE rule->>'name' = 'Data Product Domain Validation'
);
-- Add generated column for customUnitOfMeasurement
ALTER TABLE metric_entity
ADD COLUMN customUnitOfMeasurement VARCHAR(256)
GENERATED ALWAYS AS ((json->>'customUnitOfMeasurement')::VARCHAR(256)) STORED;
-- Add index on the column
CREATE INDEX idx_metric_custom_unit ON metric_entity(customUnitOfMeasurement);
-- Fetch updated searchSettings
DELETE FROM openmetadata_settings WHERE configType = 'searchSettings';
-- Create notification_template_entity table following OpenMetadata patterns
CREATE TABLE IF NOT EXISTS notification_template_entity (
id VARCHAR(36) GENERATED ALWAYS AS (json ->> 'id') STORED NOT NULL,
name VARCHAR(256) GENERATED ALWAYS AS (json ->> 'name') STORED NOT NULL,
fqnHash VARCHAR(768) NOT NULL,
json JSONB NOT NULL,
updatedAt BIGINT GENERATED ALWAYS AS ((json ->> 'updatedAt')::bigint) STORED NOT NULL,
updatedBy VARCHAR(256) GENERATED ALWAYS AS (json ->> 'updatedBy') STORED NOT NULL,
deleted BOOLEAN GENERATED ALWAYS AS ((json ->> 'deleted')::boolean) STORED,
provider VARCHAR(32) GENERATED ALWAYS AS (json ->> 'provider') STORED,
PRIMARY KEY (id),
UNIQUE (fqnHash)
);
CREATE INDEX IF NOT EXISTS idx_notification_template_name ON notification_template_entity(name);
CREATE INDEX IF NOT EXISTS idx_notification_template_provider ON notification_template_entity(provider);
-- Optimize table listing queries by indexing the schema hash prefix
ALTER TABLE table_entity
ADD COLUMN IF NOT EXISTS databaseSchemaHash VARCHAR(768)
GENERATED ALWAYS AS (
rtrim(
split_part(fqnhash, '.', 1) || '.' ||
split_part(fqnhash, '.', 2) || '.' ||
split_part(fqnhash, '.', 3),
'.'
)
) STORED;
CREATE INDEX IF NOT EXISTS idx_table_entity_schema_listing
ON table_entity (deleted, databaseSchemaHash, name, id);
-- Optimize stored procedure listing queries by indexing the schema hash prefix
ALTER TABLE stored_procedure_entity
ADD COLUMN IF NOT EXISTS databaseSchemaHash VARCHAR(768)
GENERATED ALWAYS AS (
rtrim(
split_part(fqnhash, '.', 1) || '.' ||
split_part(fqnhash, '.', 2) || '.' ||
split_part(fqnhash, '.', 3),
'.'
)
) STORED;
DROP INDEX IF EXISTS idx_stored_procedure_entity_deleted_name_id;
CREATE INDEX IF NOT EXISTS idx_stored_procedure_schema_listing
ON stored_procedure_entity (deleted, databaseSchemaHash, name, id);
-- Recognizer Feedback Storage
-- Store user feedback on auto-applied tags to improve recognition accuracy
CREATE TABLE IF NOT EXISTS recognizer_feedback_entity (
id VARCHAR(36) GENERATED ALWAYS AS (json ->> 'id') STORED NOT NULL,
entityLink VARCHAR(512) GENERATED ALWAYS AS (json ->> 'entityLink') STORED NOT NULL,
tagFQN VARCHAR(256) GENERATED ALWAYS AS (json ->> 'tagFQN') STORED NOT NULL,
feedbackType VARCHAR(50) GENERATED ALWAYS AS (json ->> 'feedbackType') STORED NOT NULL,
status VARCHAR(20) GENERATED ALWAYS AS (json ->> 'status') STORED,
createdBy VARCHAR(256) GENERATED ALWAYS AS (json ->> 'createdBy') STORED NOT NULL,
createdAt BIGINT GENERATED ALWAYS AS ((json ->> 'createdAt')::bigint) STORED NOT NULL,
json JSONB NOT NULL,
PRIMARY KEY (id)
);
CREATE INDEX IF NOT EXISTS idx_feedback_entity ON recognizer_feedback_entity(entityLink);
CREATE INDEX IF NOT EXISTS idx_feedback_tag ON recognizer_feedback_entity(tagFQN);
CREATE INDEX IF NOT EXISTS idx_feedback_status ON recognizer_feedback_entity(status);
CREATE INDEX IF NOT EXISTS idx_feedback_created ON recognizer_feedback_entity(createdAt);
ALTER TABLE tag_usage
ADD COLUMN reason TEXT;