84 lines
3.4 KiB
MySQL
Raw Normal View History

2015-11-19 14:39:21 -08:00
--
-- Copyright 2015 LinkedIn Corp. All rights reserved.
--
-- Licensed under the Apache License, Version 2.0 (the "License");
-- you may not use this file except in compliance with the License.
-- You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
--
-- file name pattern to abstract from file level to directory level
CREATE TABLE filename_pattern
(
filename_pattern_id INT(11) NOT NULL AUTO_INCREMENT,
regex VARCHAR(100),
PRIMARY KEY (filename_pattern_id)
);
-- partitions pattern to abstract from partition level to dataset level
CREATE TABLE `dataset_partition_layout_pattern` (
`layout_id` INT(11) NOT NULL AUTO_INCREMENT,
`regex` VARCHAR(50) DEFAULT NULL,
`mask` VARCHAR(50) DEFAULT NULL,
`leading_path_index` SMALLINT(6) DEFAULT NULL,
`partition_index` SMALLINT(6) DEFAULT NULL,
`second_partition_index` SMALLINT(6) DEFAULT NULL,
`sort_id` INT(11) DEFAULT NULL,
`comments` VARCHAR(200) DEFAULT NULL,
`partition_pattern_group` VARCHAR(50) DEFAULT NULL,
PRIMARY KEY (`layout_id`)
)
ENGINE = InnoDB
DEFAULT CHARSET = utf8;
-- log lineage pattern to extract lineage from logs
CREATE TABLE `log_lineage_pattern` (
`pattern_id` INT(11) NOT NULL AUTO_INCREMENT,
`pattern_type` VARCHAR(20) DEFAULT NULL
COMMENT 'type of job that have this log pattern',
`regex` VARCHAR(200) NOT NULL,
`database_type` VARCHAR(20) DEFAULT NULL
COMMENT 'database type input by user, e.g. hdfs, voldermont...',
`database_name_index` INT(11) DEFAULT NULL,
`dataset_index` INT(11) NOT NULL
COMMENT 'the group id of dataset part in the regex',
`operation_type` VARCHAR(20) DEFAULT NULL
COMMENT 'read/write, input by user',
`record_count_index` INT(20) DEFAULT NULL
COMMENT 'all operations count',
`record_byte_index` INT(20) DEFAULT NULL,
`insert_count_index` INT(20) DEFAULT NULL,
`insert_byte_index` INT(20) DEFAULT NULL,
`delete_count_index` INT(20) DEFAULT NULL,
`delete_byte_index` INT(20) DEFAULT NULL,
`update_count_index` INT(20) DEFAULT NULL,
`update_byte_index` INT(20) DEFAULT NULL,
`comments` VARCHAR(200) DEFAULT NULL,
`source_target_type` ENUM('source', 'target') DEFAULT NULL,
PRIMARY KEY (`pattern_id`)
)
ENGINE = InnoDB
DEFAULT CHARSET = utf8;
-- patterns used to discover the hadoop id inside log
CREATE TABLE `log_reference_job_id_pattern` (
`pattern_id` INT(11) NOT NULL AUTO_INCREMENT,
`pattern_type` VARCHAR(20) DEFAULT NULL
COMMENT 'type of job that have this log pattern',
`regex` VARCHAR(200) NOT NULL,
`reference_job_id_index` INT(11) NOT NULL,
`is_active` TINYINT(1) DEFAULT '0',
`comments` VARCHAR(200) DEFAULT NULL,
PRIMARY KEY (`pattern_id`)
)
ENGINE = InnoDB
DEFAULT CHARSET = utf8;