2025-10-27 17:59:31 -07:00
|
|
|
-- Performance optimization for tag_usage prefix queries
|
|
|
|
|
ALTER TABLE tag_usage
|
|
|
|
|
ADD COLUMN IF NOT EXISTS targetfqnhash_lower text
|
|
|
|
|
GENERATED ALWAYS AS (lower(targetFQNHash)) STORED;
|
|
|
|
|
|
|
|
|
|
ALTER TABLE tag_usage
|
|
|
|
|
ADD COLUMN IF NOT EXISTS tagfqn_lower text
|
|
|
|
|
GENERATED ALWAYS AS (lower(tagFQN)) STORED;
|
|
|
|
|
|
|
|
|
|
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_tag_usage_target_prefix_covering
|
|
|
|
|
ON tag_usage (source, targetfqnhash_lower text_pattern_ops)
|
|
|
|
|
INCLUDE (tagFQN, labelType, state)
|
|
|
|
|
WHERE state = 1; -- Only active tags
|
|
|
|
|
|
|
|
|
|
-- For exact match queries on targetFQNHash
|
|
|
|
|
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_tag_usage_target_exact
|
|
|
|
|
ON tag_usage (source, targetFQNHash, state)
|
|
|
|
|
INCLUDE (tagFQN, labelType);
|
|
|
|
|
|
|
|
|
|
-- For tagFQN prefix searches if needed
|
|
|
|
|
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_tag_usage_tagfqn_prefix_covering
|
|
|
|
|
ON tag_usage (source, tagfqn_lower text_pattern_ops)
|
|
|
|
|
INCLUDE (targetFQNHash, labelType, state)
|
|
|
|
|
WHERE state = 1;
|
|
|
|
|
|
|
|
|
|
-- For JOIN operations with classification and tag tables
|
|
|
|
|
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_tag_usage_join_source
|
|
|
|
|
ON tag_usage (tagFQNHash, source)
|
|
|
|
|
INCLUDE (targetFQNHash, tagFQN, labelType, state)
|
|
|
|
|
WHERE state = 1;
|
|
|
|
|
|
|
|
|
|
-- Only create if you need %contains% searches
|
|
|
|
|
CREATE EXTENSION IF NOT EXISTS pg_trgm;
|
|
|
|
|
|
|
|
|
|
-- GIN index for substring matches (LIKE '%foo%')
|
|
|
|
|
CREATE INDEX CONCURRENTLY IF NOT EXISTS gin_tag_usage_targetfqn_trgm
|
|
|
|
|
ON tag_usage USING GIN (targetFQNHash gin_trgm_ops)
|
|
|
|
|
WHERE state = 1;
|
|
|
|
|
|
|
|
|
|
-- Optimize autovacuum for tag_usage (high update frequency)
|
|
|
|
|
ALTER TABLE tag_usage SET (
|
|
|
|
|
autovacuum_vacuum_scale_factor = 0.05, -- Vacuum at 5% dead rows (default 20%)
|
|
|
|
|
autovacuum_analyze_scale_factor = 0.02, -- Analyze at 2% changed rows (default 10%)
|
|
|
|
|
autovacuum_vacuum_threshold = 50, -- Minimum rows before vacuum
|
|
|
|
|
autovacuum_analyze_threshold = 50, -- Minimum rows before analyze
|
|
|
|
|
fillfactor = 90 -- Leave 10% free space for HOT updates
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
-- Increase statistics target for frequently queried columns
|
|
|
|
|
ALTER TABLE tag_usage ALTER COLUMN targetFQNHash SET STATISTICS 1000;
|
|
|
|
|
ALTER TABLE tag_usage ALTER COLUMN targetfqnhash_lower SET STATISTICS 1000;
|
|
|
|
|
ALTER TABLE tag_usage ALTER COLUMN tagFQN SET STATISTICS 500;
|
|
|
|
|
ALTER TABLE tag_usage ALTER COLUMN tagfqn_lower SET STATISTICS 500;
|
|
|
|
|
ALTER TABLE tag_usage ALTER COLUMN source SET STATISTICS 100;
|
|
|
|
|
|
|
|
|
|
-- Add index for efficient bulk term count queries
|
|
|
|
|
-- The bulkGetTermCounts query uses: WHERE classificationHash IN (...) AND deleted = FALSE
|
|
|
|
|
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_tag_classification_deleted
|
|
|
|
|
ON tag (classificationHash, deleted);
|
|
|
|
|
|
|
|
|
|
-- Create new indexes with deleted column for efficient filtering
|
|
|
|
|
-- Using partial indexes (WHERE deleted = FALSE) for even better performance
|
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_entity_relationship_from_deleted
|
|
|
|
|
ON entity_relationship(fromId, fromEntity, relation)
|
|
|
|
|
INCLUDE (toId, toEntity, relation)
|
|
|
|
|
WHERE deleted = FALSE;
|
|
|
|
|
|
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_entity_relationship_to_deleted
|
|
|
|
|
ON entity_relationship(toId, toEntity, relation)
|
|
|
|
|
INCLUDE (fromId, fromEntity, relation)
|
|
|
|
|
WHERE deleted = FALSE;
|
|
|
|
|
|
|
|
|
|
-- Also add indexes for the specific queries that include fromEntity/toEntity filters
|
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_entity_relationship_from_typed
|
|
|
|
|
ON entity_relationship(toId, toEntity, relation, fromEntity)
|
|
|
|
|
INCLUDE (fromEntity, toEntity)
|
|
|
|
|
WHERE deleted = FALSE;
|
|
|
|
|
|
|
|
|
|
-- Index for bidirectional lookups (used in UNION queries)
|
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_entity_relationship_bidirectional
|
|
|
|
|
ON entity_relationship(fromId, toId, relation)
|
|
|
|
|
WHERE deleted = FALSE;
|
|
|
|
|
|
2025-08-29 17:28:29 +02:00
|
|
|
-- Add "Data Product Domain Validation" rule to existing entityRulesSettings configuration
|
|
|
|
|
UPDATE openmetadata_settings
|
|
|
|
|
SET json = jsonb_set(
|
|
|
|
|
json,
|
|
|
|
|
'{entitySemantics}',
|
|
|
|
|
(json->'entitySemantics') || jsonb_build_object(
|
|
|
|
|
'name', 'Data Product Domain Validation',
|
|
|
|
|
'description', 'Validates that Data Products assigned to an entity match the entity''s domains.',
|
|
|
|
|
'rule', '{"validateDataProductDomainMatch":[{"var":"dataProducts"},{"var":"domains"}]}',
|
|
|
|
|
'enabled', true,
|
|
|
|
|
'provider', 'system'
|
|
|
|
|
)::jsonb,
|
|
|
|
|
true
|
|
|
|
|
)
|
|
|
|
|
WHERE configtype = 'entityRulesSettings'
|
|
|
|
|
AND json->'entitySemantics' IS NOT NULL
|
|
|
|
|
AND NOT EXISTS (
|
|
|
|
|
SELECT 1
|
|
|
|
|
FROM jsonb_array_elements(json->'entitySemantics') AS rule
|
|
|
|
|
WHERE rule->>'name' = 'Data Product Domain Validation'
|
2025-09-11 15:14:38 +05:30
|
|
|
);
|
|
|
|
|
|
|
|
|
|
-- Add generated column for customUnitOfMeasurement
|
|
|
|
|
ALTER TABLE metric_entity
|
|
|
|
|
ADD COLUMN customUnitOfMeasurement VARCHAR(256)
|
|
|
|
|
GENERATED ALWAYS AS ((json->>'customUnitOfMeasurement')::VARCHAR(256)) STORED;
|
|
|
|
|
-- Add index on the column
|
2025-09-16 15:16:06 +05:30
|
|
|
CREATE INDEX idx_metric_custom_unit ON metric_entity(customUnitOfMeasurement);
|
|
|
|
|
|
|
|
|
|
-- Fetch updated searchSettings
|
2025-09-16 15:21:11 +02:00
|
|
|
DELETE FROM openmetadata_settings WHERE configType = 'searchSettings';
|
|
|
|
|
|
|
|
|
|
-- Create notification_template_entity table following OpenMetadata patterns
|
|
|
|
|
CREATE TABLE IF NOT EXISTS notification_template_entity (
|
|
|
|
|
id VARCHAR(36) GENERATED ALWAYS AS (json ->> 'id') STORED NOT NULL,
|
|
|
|
|
name VARCHAR(256) GENERATED ALWAYS AS (json ->> 'name') STORED NOT NULL,
|
|
|
|
|
fqnHash VARCHAR(768) NOT NULL,
|
|
|
|
|
json JSONB NOT NULL,
|
|
|
|
|
updatedAt BIGINT GENERATED ALWAYS AS ((json ->> 'updatedAt')::bigint) STORED NOT NULL,
|
|
|
|
|
updatedBy VARCHAR(256) GENERATED ALWAYS AS (json ->> 'updatedBy') STORED NOT NULL,
|
|
|
|
|
deleted BOOLEAN GENERATED ALWAYS AS ((json ->> 'deleted')::boolean) STORED,
|
|
|
|
|
provider VARCHAR(32) GENERATED ALWAYS AS (json ->> 'provider') STORED,
|
|
|
|
|
|
|
|
|
|
PRIMARY KEY (id),
|
|
|
|
|
UNIQUE (fqnHash)
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_notification_template_name ON notification_template_entity(name);
|
2025-09-28 13:17:23 -07:00
|
|
|
CREATE INDEX IF NOT EXISTS idx_notification_template_provider ON notification_template_entity(provider);
|
|
|
|
|
|
|
|
|
|
-- Optimize table listing queries by indexing the schema hash prefix
|
|
|
|
|
ALTER TABLE table_entity
|
2025-09-29 17:07:09 +05:30
|
|
|
ADD COLUMN IF NOT EXISTS databaseSchemaHash VARCHAR(768)
|
2025-09-28 13:17:23 -07:00
|
|
|
GENERATED ALWAYS AS (
|
2025-09-29 17:07:09 +05:30
|
|
|
rtrim(
|
|
|
|
|
split_part(fqnhash, '.', 1) || '.' ||
|
|
|
|
|
split_part(fqnhash, '.', 2) || '.' ||
|
|
|
|
|
split_part(fqnhash, '.', 3),
|
|
|
|
|
'.'
|
|
|
|
|
)
|
2025-09-28 13:17:23 -07:00
|
|
|
) STORED;
|
|
|
|
|
|
2025-09-29 17:07:09 +05:30
|
|
|
CREATE INDEX IF NOT EXISTS idx_table_entity_schema_listing
|
2025-09-28 13:17:23 -07:00
|
|
|
ON table_entity (deleted, databaseSchemaHash, name, id);
|
|
|
|
|
|
|
|
|
|
-- Optimize stored procedure listing queries by indexing the schema hash prefix
|
|
|
|
|
ALTER TABLE stored_procedure_entity
|
2025-09-29 17:07:09 +05:30
|
|
|
ADD COLUMN IF NOT EXISTS databaseSchemaHash VARCHAR(768)
|
2025-09-28 13:17:23 -07:00
|
|
|
GENERATED ALWAYS AS (
|
2025-09-29 17:07:09 +05:30
|
|
|
rtrim(
|
|
|
|
|
split_part(fqnhash, '.', 1) || '.' ||
|
|
|
|
|
split_part(fqnhash, '.', 2) || '.' ||
|
|
|
|
|
split_part(fqnhash, '.', 3),
|
|
|
|
|
'.'
|
|
|
|
|
)
|
2025-09-28 13:17:23 -07:00
|
|
|
) STORED;
|
|
|
|
|
|
|
|
|
|
DROP INDEX IF EXISTS idx_stored_procedure_entity_deleted_name_id;
|
|
|
|
|
|
2025-09-29 17:07:09 +05:30
|
|
|
CREATE INDEX IF NOT EXISTS idx_stored_procedure_schema_listing
|
2025-09-28 13:17:23 -07:00
|
|
|
ON stored_procedure_entity (deleted, databaseSchemaHash, name, id);
|
2025-09-29 22:17:18 -07:00
|
|
|
|
|
|
|
|
|
|
|
|
|
-- Recognizer Feedback Storage
|
|
|
|
|
-- Store user feedback on auto-applied tags to improve recognition accuracy
|
|
|
|
|
CREATE TABLE IF NOT EXISTS recognizer_feedback_entity (
|
|
|
|
|
id VARCHAR(36) GENERATED ALWAYS AS (json ->> 'id') STORED NOT NULL,
|
|
|
|
|
entityLink VARCHAR(512) GENERATED ALWAYS AS (json ->> 'entityLink') STORED NOT NULL,
|
|
|
|
|
tagFQN VARCHAR(256) GENERATED ALWAYS AS (json ->> 'tagFQN') STORED NOT NULL,
|
|
|
|
|
feedbackType VARCHAR(50) GENERATED ALWAYS AS (json ->> 'feedbackType') STORED NOT NULL,
|
|
|
|
|
status VARCHAR(20) GENERATED ALWAYS AS (json ->> 'status') STORED,
|
|
|
|
|
createdBy VARCHAR(256) GENERATED ALWAYS AS (json ->> 'createdBy') STORED NOT NULL,
|
|
|
|
|
createdAt BIGINT GENERATED ALWAYS AS ((json ->> 'createdAt')::bigint) STORED NOT NULL,
|
|
|
|
|
json JSONB NOT NULL,
|
|
|
|
|
PRIMARY KEY (id)
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_feedback_entity ON recognizer_feedback_entity(entityLink);
|
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_feedback_tag ON recognizer_feedback_entity(tagFQN);
|
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_feedback_status ON recognizer_feedback_entity(status);
|
2025-10-01 14:11:14 +02:00
|
|
|
CREATE INDEX IF NOT EXISTS idx_feedback_created ON recognizer_feedback_entity(createdAt);
|
|
|
|
|
|
|
|
|
|
ALTER TABLE tag_usage
|
|
|
|
|
ADD COLUMN reason TEXT;
|