mirror of
				https://github.com/open-metadata/OpenMetadata.git
				synced 2025-10-26 00:04:52 +00:00 
			
		
		
		
	 b3e0470882
			
		
	
	
		b3e0470882
		
			
		
	
	
	
	
		
			
			* - Migration Fixes * - Migration Fixes * - Typo fix * - Make warn to error * - Update Migration to show Query Status * - Fix Query in POstgres --------- Co-authored-by: Pere Miquel Brull <peremiquelbrull@gmail.com>
		
			
				
	
	
		
			291 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			SQL
		
	
	
	
	
	
			
		
		
	
	
			291 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			SQL
		
	
	
	
	
	
| -- column deleted not needed for entities that don't support soft delete
 | |
| ALTER TABLE query_entity DROP COLUMN deleted;
 | |
| ALTER TABLE event_subscription_entity DROP COLUMN deleted;
 | |
| 
 | |
| -- create domain entity table
 | |
| CREATE TABLE IF NOT EXISTS domain_entity (
 | |
|     id VARCHAR(36) GENERATED ALWAYS AS (json ->> 'id') STORED NOT NULL,
 | |
|     name VARCHAR(256) GENERATED ALWAYS AS (json ->> 'name') STORED NOT NULL,
 | |
|     fqnHash VARCHAR(256) NOT NULL,
 | |
|     json JSONB NOT NULL,
 | |
|     updatedAt BIGINT GENERATED ALWAYS AS ((json ->> 'updatedAt')::bigint) STORED NOT NULL,
 | |
|     updatedBy VARCHAR(256) GENERATED ALWAYS AS (json ->> 'updatedBy') STORED NOT NULL,
 | |
|     PRIMARY KEY (id),
 | |
|     UNIQUE (fqnHash)
 | |
|     );
 | |
| 
 | |
| -- create data product entity table
 | |
| CREATE TABLE IF NOT EXISTS data_product_entity (
 | |
|     id VARCHAR(36) GENERATED ALWAYS AS (json ->> 'id') STORED NOT NULL,
 | |
|     name VARCHAR(256) GENERATED ALWAYS AS (json ->> 'name') STORED NOT NULL,
 | |
|     fqnHash VARCHAR(256) NOT NULL,
 | |
|     json JSONB NOT NULL,
 | |
|     updatedAt BIGINT GENERATED ALWAYS AS ((json ->> 'updatedAt')::bigint) STORED NOT NULL,
 | |
|     updatedBy VARCHAR(256) GENERATED ALWAYS AS (json ->> 'updatedBy') STORED NOT NULL,
 | |
|     PRIMARY KEY (id),
 | |
|     UNIQUE (fqnHash)
 | |
|     );
 | |
| 
 | |
| -- create search service entity
 | |
| CREATE TABLE IF NOT EXISTS search_service_entity (
 | |
|     id VARCHAR(36) GENERATED ALWAYS AS (json ->> 'id') STORED NOT NULL,
 | |
|     nameHash VARCHAR(256)  NOT NULL,
 | |
|     name VARCHAR(256) GENERATED ALWAYS AS (json ->> 'name') STORED NOT NULL,
 | |
|     serviceType VARCHAR(256) GENERATED ALWAYS AS (json ->> 'serviceType') STORED NOT NULL,
 | |
|     json JSONB NOT NULL,
 | |
|     updatedAt BIGINT GENERATED ALWAYS AS ((json ->> 'updatedAt')::bigint) STORED NOT NULL,
 | |
|     updatedBy VARCHAR(256) GENERATED ALWAYS AS (json ->> 'updatedBy') STORED NOT NULL,
 | |
|     deleted BOOLEAN GENERATED ALWAYS AS ((json ->> 'deleted')::boolean) STORED,
 | |
|     PRIMARY KEY (id),
 | |
|     UNIQUE (nameHash)
 | |
|     );
 | |
| 
 | |
| -- create search index entity
 | |
| CREATE TABLE IF NOT EXISTS search_index_entity (
 | |
|     id VARCHAR(36) GENERATED ALWAYS AS (json ->> 'id') STORED NOT NULL,
 | |
|     name VARCHAR(256) GENERATED ALWAYS AS (json ->> 'name') STORED NOT NULL,
 | |
|     fqnHash VARCHAR(256) NOT NULL,
 | |
|     json JSONB NOT NULL,
 | |
|     updatedAt BIGINT GENERATED ALWAYS AS ((json ->> 'updatedAt')::bigint) STORED NOT NULL,
 | |
|     updatedBy VARCHAR(256) GENERATED ALWAYS AS (json ->> 'updatedBy') STORED NOT NULL,
 | |
|     deleted BOOLEAN GENERATED ALWAYS AS ((json ->> 'deleted')::boolean) STORED,
 | |
|     PRIMARY KEY (id),
 | |
|     UNIQUE (fqnHash)
 | |
|     );
 | |
| 
 | |
| -- We were hardcoding retries to 0. Since we are now using the IngestionPipeline to set them, keep existing ones to 0.
 | |
| UPDATE ingestion_pipeline_entity
 | |
| SET json = jsonb_set(json::jsonb, '{airflowConfig,retries}', '0', true);
 | |
| 
 | |
| -- create stored procedure entity
 | |
| CREATE TABLE IF NOT EXISTS stored_procedure_entity (
 | |
|     id VARCHAR(36) GENERATED ALWAYS AS (json ->> 'id') STORED NOT NULL,
 | |
|     name VARCHAR(256) GENERATED ALWAYS AS (json ->> 'name') STORED NOT NULL,
 | |
|     fqnHash VARCHAR(256) NOT NULL,
 | |
|     json JSONB NOT NULL,
 | |
|     updatedAt BIGINT GENERATED ALWAYS AS ((json ->> 'updatedAt')::bigint) STORED NOT NULL,
 | |
|     updatedBy VARCHAR(256) GENERATED ALWAYS AS (json ->> 'updatedBy') STORED NOT NULL,
 | |
|     deleted BOOLEAN GENERATED ALWAYS AS ((json ->> 'deleted')::boolean) STORED,
 | |
|     PRIMARY KEY (id),
 | |
|     UNIQUE (fqnHash)
 | |
|     );
 | |
| 
 | |
| -- Add index on fromId and fromEntity columns
 | |
| CREATE INDEX IF NOT EXISTS from_entity_type_index ON entity_relationship (fromId, fromEntity);
 | |
| 
 | |
| -- Add index on toId and toEntity columns
 | |
| CREATE INDEX IF NOT EXISTS to_entity_type_index ON entity_relationship (toId, toEntity);
 | |
| 
 | |
| ALTER TABLE tag DROP CONSTRAINT IF EXISTS tag_fqnhash_key;
 | |
| 
 | |
| ALTER TABLE tag ADD CONSTRAINT unique_fqnHash UNIQUE (fqnHash);
 | |
| 
 | |
| ALTER TABLE tag ADD CONSTRAINT tag_pk PRIMARY KEY (id);
 | |
| 
 | |
| 
 | |
| -- rename viewParsingTimeoutLimit for queryParsingTimeoutLimit
 | |
| UPDATE ingestion_pipeline_entity
 | |
| SET json = jsonb_set(
 | |
|   json::jsonb #- '{sourceConfig,config,viewParsingTimeoutLimit}',
 | |
|   '{sourceConfig,config,queryParsingTimeoutLimit}',
 | |
|   (json #> '{sourceConfig,config,viewParsingTimeoutLimit}')::jsonb,
 | |
|   true
 | |
| )
 | |
| WHERE json #>> '{pipelineType}' = 'metadata';
 | |
| 
 | |
| -- Rename sandboxDomain for instanceDomain
 | |
| UPDATE dbservice_entity
 | |
| SET json = jsonb_set(
 | |
|   json::jsonb #- '{connection,config,sandboxDomain}',
 | |
|   '{connection,config,instanceDomain}',
 | |
|   (json #> '{connection,config,sandboxDomain}')::jsonb,
 | |
|   true
 | |
| )
 | |
| WHERE serviceType = 'DomoDatabase';
 | |
| 
 | |
| UPDATE dashboard_service_entity
 | |
| SET json = jsonb_set(
 | |
|   json::jsonb #- '{connection,config,sandboxDomain}',
 | |
|   '{connection,config,instanceDomain}',
 | |
|   (json #> '{connection,config,sandboxDomain}')::jsonb,
 | |
|   true
 | |
| )
 | |
| WHERE serviceType = 'DomoDashboard';
 | |
| 
 | |
| UPDATE pipeline_service_entity
 | |
| SET json = jsonb_set(
 | |
|   json::jsonb #- '{connection,config,sandboxDomain}',
 | |
|   '{connection,config,instanceDomain}',
 | |
|   (json #> '{connection,config,sandboxDomain}')::jsonb,
 | |
|   true
 | |
| )
 | |
| WHERE serviceType = 'DomoPipeline';
 | |
| 
 | |
| -- Query Entity supports service, which requires FQN for name
 | |
| ALTER TABLE query_entity RENAME COLUMN nameHash TO fqnHash;
 | |
| 
 | |
| CREATE INDEX IF NOT EXISTS idx_name_query_entity ON query_entity (name);
 | |
| CREATE INDEX IF NOT EXISTS idx_name_bot_entity ON bot_entity (name);
 | |
| CREATE INDEX IF NOT EXISTS idx_name_chart_entity ON chart_entity (name);
 | |
| CREATE INDEX IF NOT EXISTS idx_name_classification_entity ON classification (name);
 | |
| CREATE INDEX IF NOT EXISTS idx_name_storage_container_entity ON storage_container_entity (name);
 | |
| CREATE INDEX IF NOT EXISTS idx_name_dashboard_data_model_entity ON dashboard_data_model_entity (name);
 | |
| CREATE INDEX IF NOT EXISTS idx_name_dashboard_entity ON dashboard_entity (name);
 | |
| CREATE INDEX IF NOT EXISTS idx_name_dashboard_service_entity ON dashboard_service_entity (name);
 | |
| CREATE INDEX IF NOT EXISTS idx_name_dashboard_insight_chart ON data_insight_chart (name);
 | |
| CREATE INDEX IF NOT EXISTS idx_name_database_entity ON database_entity (name);
 | |
| CREATE INDEX IF NOT EXISTS idx_name_database_schema_entity ON database_schema_entity  (name);
 | |
| CREATE INDEX IF NOT EXISTS idx_name_db_service_entity ON dbservice_entity (name);
 | |
| CREATE INDEX IF NOT EXISTS idx_name_event_subscription_entity ON event_subscription_entity (name);
 | |
| CREATE INDEX IF NOT EXISTS idx_name_glossary_entity ON glossary_entity (name);
 | |
| CREATE INDEX IF NOT EXISTS idx_name_glossary_term_entity ON glossary_term_entity (name);
 | |
| CREATE INDEX IF NOT EXISTS idx_name_ingestion_pipeline_entity ON ingestion_pipeline_entity (name);
 | |
| CREATE INDEX IF NOT EXISTS idx_name_kpi_entity ON kpi_entity  (name);
 | |
| CREATE INDEX IF NOT EXISTS idx_messaging_service_name_entity ON messaging_service_entity (name);
 | |
| CREATE INDEX IF NOT EXISTS idx_metadata_service_name_entity  ON metadata_service_entity (name);
 | |
| CREATE INDEX IF NOT EXISTS idx_metric_name_entity ON metric_entity (name);
 | |
| CREATE INDEX IF NOT EXISTS idx_ml_model_name_entity ON ml_model_entity (name);
 | |
| CREATE INDEX IF NOT EXISTS idx_ml_model_service_name_entity ON mlmodel_service_entity (name);
 | |
| CREATE INDEX IF NOT EXISTS idx_pipeline_name_entity ON pipeline_entity (name);
 | |
| CREATE INDEX IF NOT EXISTS idx_pipeline_service_name_entity ON pipeline_service_entity (name);
 | |
| CREATE INDEX IF NOT EXISTS idx_name_policy_entity ON policy_entity (name);
 | |
| CREATE INDEX IF NOT EXISTS idx_name_query_entity ON query_entity (name);
 | |
| CREATE INDEX IF NOT EXISTS idx_name_report_entity ON report_entity (name);
 | |
| CREATE INDEX IF NOT EXISTS idx_name_role_entity ON role_entity (name);
 | |
| CREATE INDEX IF NOT EXISTS idx_name_storage_service_entity ON storage_service_entity (name);
 | |
| CREATE INDEX IF NOT EXISTS idx_name_table_entity ON table_entity (name);
 | |
| CREATE INDEX IF NOT EXISTS idx_name_tag_entity ON tag (name);
 | |
| CREATE INDEX IF NOT EXISTS idx_name_team_entity ON team_entity (name);
 | |
| CREATE INDEX IF NOT EXISTS idx_name_test_case ON test_case (name);
 | |
| CREATE INDEX IF NOT EXISTS idx_name_test_connection_definition ON test_connection_definition (name);
 | |
| CREATE INDEX IF NOT EXISTS idx_name_test_definition ON test_definition (name);
 | |
| CREATE INDEX IF NOT EXISTS idx_name_test_suite ON test_suite (name);
 | |
| CREATE INDEX IF NOT EXISTS idx_name_topic_entity ON topic_entity (name);
 | |
| CREATE INDEX IF NOT EXISTS idx_name_type_entity ON type_entity (name);
 | |
| CREATE INDEX IF NOT EXISTS idx_name_user_entity ON user_entity (name);
 | |
| CREATE INDEX IF NOT EXISTS idx_name_web_analytic_event ON web_analytic_event (name);
 | |
| CREATE INDEX IF NOT EXISTS idx_name_automations_workflow ON automations_workflow (name);
 | |
| CREATE INDEX IF NOT EXISTS idx_name_domain_entity ON domain_entity (name);
 | |
| CREATE INDEX IF NOT EXISTS idx_name_data_product_entity ON data_product_entity (name);
 | |
| CREATE INDEX IF NOT EXISTS idx_name_data_search_service_entity ON search_service_entity (name);
 | |
| CREATE INDEX IF NOT EXISTS idx_name_data_search_index_entity ON search_index_entity (name);
 | |
| CREATE INDEX IF NOT EXISTS idx_name_data_stored_procedure_entity ON stored_procedure_entity (name);
 | |
| 
 | |
| CREATE TABLE IF NOT EXISTS persona_entity (
 | |
|   id VARCHAR(36) GENERATED ALWAYS AS (json ->> 'id') STORED NOT NULL,
 | |
|   name VARCHAR(256) GENERATED ALWAYS AS (json ->> 'name') STORED NOT NULL,
 | |
|   nameHash VARCHAR(256) NOT NULL,
 | |
|   json JSONB NOT NULL,
 | |
|   updatedAt BIGINT GENERATED ALWAYS AS ((json ->> 'updatedAt')::bigint) STORED NOT NULL,
 | |
|   updatedBy VARCHAR(256) GENERATED ALWAYS AS (json ->> 'updatedBy') STORED NOT NULL,
 | |
|   PRIMARY KEY (id),
 | |
|   UNIQUE (nameHash)
 | |
| );
 | |
| CREATE INDEX IF NOT EXISTS persona_name_index ON persona_entity USING btree (name);
 | |
| 
 | |
| CREATE TABLE IF NOT EXISTS doc_store (
 | |
|   id VARCHAR(36) GENERATED ALWAYS AS (json ->> 'id') STORED NOT NULL,
 | |
|   name VARCHAR(256) GENERATED ALWAYS AS (json ->> 'name') STORED NOT NULL,
 | |
|   entityType VARCHAR(256) GENERATED ALWAYS AS (json ->> 'entityType') STORED NOT NULL,
 | |
|   fqnHash VARCHAR(256) NOT NULL,
 | |
|   json JSONB NOT NULL,
 | |
|   updatedAt BIGINT GENERATED ALWAYS AS ((json ->> 'updatedAt')::bigint) STORED NOT NULL,
 | |
|   updatedBy VARCHAR(256) GENERATED ALWAYS AS (json ->> 'updatedBy') STORED NOT NULL,
 | |
|   PRIMARY KEY (id),
 | |
|   UNIQUE (fqnHash)
 | |
| );
 | |
| CREATE INDEX IF NOT EXISTS page_name_index ON doc_store USING btree (name);
 | |
| -- Remove Mark All Deleted Field
 | |
| UPDATE ingestion_pipeline_entity
 | |
| SET json = json::jsonb #- '{sourceConfig,config,markAllDeletedTables}'
 | |
| WHERE json #>> '{pipelineType}' = 'metadata';
 | |
| 
 | |
| 
 | |
| -- update entityReportData from pascale to camel case
 | |
| UPDATE report_data_time_series
 | |
| SET json = jsonb_set(
 | |
|   json::jsonb #- '{reportDataType}',
 | |
|   '{reportDataType}',
 | |
|   '"entityReportData"',
 | |
|   true
 | |
| ),
 | |
| entityFQNHash = MD5('entityReportData')
 | |
| WHERE json #>> '{reportDataType}' = 'EntityReportData';
 | |
| 
 | |
| -- update webAnalyticEntityViewReportData from pascale to camel case
 | |
| UPDATE report_data_time_series
 | |
| SET json = jsonb_set(
 | |
|   json::jsonb #- '{reportDataType}',
 | |
|   '{reportDataType}',
 | |
|   '"webAnalyticEntityViewReportData"',
 | |
|   true
 | |
| ),
 | |
| entityFQNHash = MD5('webAnalyticEntityViewReportData')
 | |
| WHERE json #>> '{reportDataType}' = 'WebAnalyticEntityViewReportData';
 | |
| 
 | |
| -- update webAnalyticUserActivityReportData from pascale to camel case
 | |
| UPDATE report_data_time_series
 | |
| SET json = jsonb_set(
 | |
|   json::jsonb #- '{reportDataType}',
 | |
|   '{reportDataType}',
 | |
|   '"webAnalyticUserActivityReportData"',
 | |
|   true
 | |
| ),
 | |
| entityFQNHash = MD5('webAnalyticUserActivityReportData')
 | |
| WHERE json #>> '{reportDataType}' = 'WebAnalyticUserActivityReportData';
 | |
| 
 | |
| CREATE TABLE IF NOT EXISTS installed_apps (
 | |
|     id VARCHAR(36) GENERATED ALWAYS AS (json ->> 'id') STORED NOT NULL,
 | |
|     nameHash VARCHAR(256) NOT NULL,
 | |
|     name VARCHAR(256) GENERATED ALWAYS AS (json ->> 'name') STORED NOT NULL,
 | |
|     json JSONB NOT NULL,
 | |
|     updatedAt BIGINT GENERATED ALWAYS AS ((json ->> 'updatedAt')::bigint) STORED NOT NULL,
 | |
|     updatedBy VARCHAR(256) GENERATED ALWAYS AS (json ->> 'updatedBy') STORED NOT NULL,
 | |
|     deleted BOOLEAN GENERATED ALWAYS AS ((json ->> 'deleted')::boolean) STORED,
 | |
|     PRIMARY KEY (id),
 | |
|     UNIQUE (nameHash)
 | |
|     );
 | |
| 
 | |
| CREATE TABLE IF NOT EXISTS apps_marketplace (
 | |
|     id VARCHAR(36) GENERATED ALWAYS AS (json ->> 'id') STORED NOT NULL,
 | |
|     nameHash VARCHAR(256) NOT NULL,
 | |
|     name VARCHAR(256) GENERATED ALWAYS AS (json ->> 'name') STORED NOT NULL,
 | |
|     json JSONB NOT NULL,
 | |
|     updatedAt BIGINT GENERATED ALWAYS AS ((json ->> 'updatedAt')::bigint) STORED NOT NULL,
 | |
|     updatedBy VARCHAR(256) GENERATED ALWAYS AS (json ->> 'updatedBy') STORED NOT NULL,
 | |
|     deleted BOOLEAN GENERATED ALWAYS AS ((json ->> 'deleted')::boolean) STORED,
 | |
|     PRIMARY KEY (id),
 | |
|     UNIQUE (nameHash)
 | |
|     );
 | |
|    
 | |
| CREATE TABLE IF NOT EXISTS apps_extension_time_series (
 | |
|     appId VARCHAR(36) GENERATED ALWAYS AS (json ->> 'appId') STORED NOT NULL,      
 | |
|     json JSONB NOT NULL,
 | |
|     timestamp BIGINT GENERATED ALWAYS AS ((json ->> 'timestamp')::bigint) STORED NOT NULL
 | |
| );  
 | |
| 
 | |
| 
 | |
| -- Adding back the PK queries from 1.1.5 to keep the correct VARCHAR length
 | |
| -- We don't have an ID, so we'll create a temp SERIAL number and use it for deletion
 | |
| ALTER TABLE entity_extension_time_series ADD COLUMN temp SERIAL;
 | |
| WITH CTE AS (
 | |
|   SELECT temp, ROW_NUMBER() OVER (PARTITION BY entityFQNHash, extension, timestamp ORDER BY entityFQNHash) RN FROM entity_extension_time_series)
 | |
| DELETE FROM entity_extension_time_series WHERE temp in (SELECT temp FROM CTE WHERE RN > 1);
 | |
| ALTER TABLE entity_extension_time_series DROP COLUMN temp;
 | |
| 
 | |
| ALTER TABLE entity_extension_time_series ALTER COLUMN entityFQNHash TYPE VARCHAR(768), ALTER COLUMN jsonSchema TYPE VARCHAR(256) , ALTER COLUMN extension TYPE VARCHAR(256);
 | |
| ALTER TABLE entity_extension_time_series DROP CONSTRAINT IF EXISTS entity_extension_time_series_constraint;
 | |
| ALTER TABLE entity_extension_time_series ADD CONSTRAINT entity_extension_time_series_constraint UNIQUE (entityFQNHash, extension, timestamp);
 | |
| 
 | |
| -- Airflow pipeline status set to millis
 | |
| UPDATE entity_extension_time_series ts
 | |
| SET json = jsonb_set(
 | |
| 	ts.json,
 | |
| 	'{timestamp}',
 | |
| 	to_jsonb(cast(ts.json #> '{timestamp}' as int8) *1000)
 | |
| )
 | |
| FROM pipeline_entity p
 | |
| WHERE ts.entityFQNHash  = p.fqnHash
 | |
|   and ts.extension = 'pipeline.pipelineStatus'
 | |
|   AND p.json #>> '{serviceType}' in ('Airflow', 'GluePipeline', 'Airbyte', 'Dagster', 'DomoPipeline')
 | |
| ; |