datahub/data-model/DDL/ETL_DDL/lineage_metadata.sql

93 lines
6.1 KiB
MySQL
Raw Normal View History

2015-11-19 14:39:21 -08:00
--
-- Copyright 2015 LinkedIn Corp. All rights reserved.
--
-- Licensed under the Apache License, Version 2.0 (the "License");
-- you may not use this file except in compliance with the License.
-- You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
--
-- created statements for lineage related tables
CREATE TABLE IF NOT EXISTS `stg_job_execution_data_lineage` (
`app_id` SMALLINT(5) UNSIGNED DEFAULT NULL,
`flow_exec_id` BIGINT(20) UNSIGNED DEFAULT NULL,
`job_exec_id` BIGINT(20) UNSIGNED DEFAULT NULL,
`job_exec_uuid` VARCHAR(100) DEFAULT NULL,
`job_name` VARCHAR(255) DEFAULT NULL,
`job_start_unixtime` BIGINT(20) DEFAULT NULL,
`job_finished_unixtime` BIGINT(20) DEFAULT NULL,
`db_id` SMALLINT(5) UNSIGNED DEFAULT NULL,
`abstracted_object_name` VARCHAR(255) DEFAULT NULL,
`full_object_name` VARCHAR(255) DEFAULT NULL,
`partition_start` VARCHAR(50) DEFAULT NULL,
`partition_end` VARCHAR(50) DEFAULT NULL,
`partition_type` VARCHAR(20) DEFAULT NULL,
`layout_id` SMALLINT(5) UNSIGNED DEFAULT NULL,
`storage_type` VARCHAR(16) DEFAULT NULL,
`source_target_type` ENUM('source', 'target', 'lookup', 'temp') NOT NULL,
`srl_no` SMALLINT(5) UNSIGNED NOT NULL DEFAULT '1'
COMMENT 'the sorted number of this record in all records of this job related operation',
`source_srl_no` SMALLINT(5) UNSIGNED DEFAULT NULL
COMMENT 'the related record of this record',
`operation` VARCHAR(64) DEFAULT NULL,
`record_count` BIGINT(20) UNSIGNED DEFAULT NULL,
`insert_count` BIGINT(20) UNSIGNED DEFAULT NULL,
`delete_count` BIGINT(20) UNSIGNED DEFAULT NULL,
`update_count` BIGINT(20) UNSIGNED DEFAULT NULL,
`flow_path` VARCHAR(1024) DEFAULT NULL,
`created_date` INT UNSIGNED,
`wh_etl_exec_id` INT(11) DEFAULT NULL
)
ENGINE = InnoDB
DEFAULT CHARSET = utf8;
CREATE TABLE IF NOT EXISTS `job_execution_data_lineage` (
`app_id` SMALLINT(5) UNSIGNED NOT NULL,
`flow_exec_id` BIGINT(20) UNSIGNED DEFAULT NULL,
`job_exec_id` BIGINT(20) UNSIGNED DEFAULT NULL
COMMENT 'in azkaban this is a smart key combined execution id and sort id of the job',
`job_exec_uuid` VARCHAR(100) DEFAULT NULL
COMMENT 'some scheduler do not have this value, e.g. Azkaban',
`job_name` VARCHAR(255) DEFAULT NULL,
`job_start_unixtime` BIGINT(20) DEFAULT NULL,
`job_finished_unixtime` BIGINT(20) DEFAULT NULL,
`db_id` SMALLINT(5) UNSIGNED DEFAULT NULL,
`abstracted_object_name` VARCHAR(255) NOT NULL,
`full_object_name` VARCHAR(255) DEFAULT NULL,
`partition_start` VARCHAR(50) DEFAULT NULL,
`partition_end` VARCHAR(50) DEFAULT NULL,
`partition_type` VARCHAR(20) DEFAULT NULL,
`layout_id` SMALLINT(5) UNSIGNED DEFAULT NULL
COMMENT 'layout of the dataset',
`storage_type` VARCHAR(16) DEFAULT NULL,
`source_target_type` ENUM('source', 'target', 'lookup', 'temp') NOT NULL,
`srl_no` SMALLINT(5) UNSIGNED NOT NULL DEFAULT '1'
COMMENT 'the sorted number of this record in all records of this job related operation',
`source_srl_no` SMALLINT(5) UNSIGNED DEFAULT NULL
COMMENT 'the related record of this record',
`operation` VARCHAR(64) DEFAULT NULL,
`record_count` BIGINT(20) UNSIGNED DEFAULT NULL,
`insert_count` BIGINT(20) UNSIGNED DEFAULT NULL,
`delete_count` BIGINT(20) UNSIGNED DEFAULT NULL,
`update_count` BIGINT(20) UNSIGNED DEFAULT NULL,
`flow_path` VARCHAR(1024) DEFAULT NULL,
`created_date` INT UNSIGNED,
`wh_etl_exec_id` INT(11) DEFAULT NULL,
PRIMARY KEY (`app_id`, `job_exec_id`, `srl_no`),
KEY `idx_flow_path` (`app_id`, `flow_path`(300)),
KEY `idx_job_attempt_data_lineage__object_name` (`abstracted_object_name`, `source_target_type`) USING BTREE
)
ENGINE = InnoDB
DEFAULT CHARSET = latin1
COMMENT = 'Lineage table' PARTITION BY HASH (app_id) PARTITIONS 8;