mirror of
https://github.com/datahub-project/datahub.git
synced 2025-07-06 16:49:03 +00:00
84 lines
3.4 KiB
MySQL
84 lines
3.4 KiB
MySQL
![]() |
--
|
||
|
-- Copyright 2015 LinkedIn Corp. All rights reserved.
|
||
|
--
|
||
|
-- Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
-- you may not use this file except in compliance with the License.
|
||
|
-- You may obtain a copy of the License at
|
||
|
--
|
||
|
-- http://www.apache.org/licenses/LICENSE-2.0
|
||
|
--
|
||
|
-- Unless required by applicable law or agreed to in writing, software
|
||
|
-- distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
--
|
||
|
|
||
|
|
||
|
-- file name pattern to abstract from file level to directory level
|
||
|
CREATE TABLE filename_pattern
|
||
|
(
|
||
|
filename_pattern_id INT(11) NOT NULL AUTO_INCREMENT,
|
||
|
regex VARCHAR(100),
|
||
|
PRIMARY KEY (filename_pattern_id)
|
||
|
);
|
||
|
|
||
|
-- partitions pattern to abstract from partition level to dataset level
|
||
|
CREATE TABLE `dataset_partition_layout_pattern` (
|
||
|
`layout_id` INT(11) NOT NULL AUTO_INCREMENT,
|
||
|
`regex` VARCHAR(50) DEFAULT NULL,
|
||
|
`mask` VARCHAR(50) DEFAULT NULL,
|
||
|
`leading_path_index` SMALLINT(6) DEFAULT NULL,
|
||
|
`partition_index` SMALLINT(6) DEFAULT NULL,
|
||
|
`second_partition_index` SMALLINT(6) DEFAULT NULL,
|
||
|
`sort_id` INT(11) DEFAULT NULL,
|
||
|
`comments` VARCHAR(200) DEFAULT NULL,
|
||
|
`partition_pattern_group` VARCHAR(50) DEFAULT NULL,
|
||
|
PRIMARY KEY (`layout_id`)
|
||
|
)
|
||
|
ENGINE = InnoDB
|
||
|
DEFAULT CHARSET = utf8;
|
||
|
|
||
|
-- log lineage pattern to extract lineage from logs
|
||
|
CREATE TABLE `log_lineage_pattern` (
|
||
|
`pattern_id` INT(11) NOT NULL AUTO_INCREMENT,
|
||
|
`pattern_type` VARCHAR(20) DEFAULT NULL
|
||
|
COMMENT 'type of job that have this log pattern',
|
||
|
`regex` VARCHAR(200) NOT NULL,
|
||
|
`database_type` VARCHAR(20) DEFAULT NULL
|
||
|
COMMENT 'database type input by user, e.g. hdfs, voldermont...',
|
||
|
`database_name_index` INT(11) DEFAULT NULL,
|
||
|
`dataset_index` INT(11) NOT NULL
|
||
|
COMMENT 'the group id of dataset part in the regex',
|
||
|
`operation_type` VARCHAR(20) DEFAULT NULL
|
||
|
COMMENT 'read/write, input by user',
|
||
|
`record_count_index` INT(20) DEFAULT NULL
|
||
|
COMMENT 'all operations count',
|
||
|
`record_byte_index` INT(20) DEFAULT NULL,
|
||
|
`insert_count_index` INT(20) DEFAULT NULL,
|
||
|
`insert_byte_index` INT(20) DEFAULT NULL,
|
||
|
`delete_count_index` INT(20) DEFAULT NULL,
|
||
|
`delete_byte_index` INT(20) DEFAULT NULL,
|
||
|
`update_count_index` INT(20) DEFAULT NULL,
|
||
|
`update_byte_index` INT(20) DEFAULT NULL,
|
||
|
`comments` VARCHAR(200) DEFAULT NULL,
|
||
|
`source_target_type` ENUM('source', 'target') DEFAULT NULL,
|
||
|
PRIMARY KEY (`pattern_id`)
|
||
|
)
|
||
|
ENGINE = InnoDB
|
||
|
DEFAULT CHARSET = utf8;
|
||
|
|
||
|
-- patterns used to discover the hadoop id inside log
|
||
|
CREATE TABLE `log_reference_job_id_pattern` (
|
||
|
`pattern_id` INT(11) NOT NULL AUTO_INCREMENT,
|
||
|
`pattern_type` VARCHAR(20) DEFAULT NULL
|
||
|
COMMENT 'type of job that have this log pattern',
|
||
|
`regex` VARCHAR(200) NOT NULL,
|
||
|
`reference_job_id_index` INT(11) NOT NULL,
|
||
|
`is_active` TINYINT(1) DEFAULT '0',
|
||
|
`comments` VARCHAR(200) DEFAULT NULL,
|
||
|
PRIMARY KEY (`pattern_id`)
|
||
|
)
|
||
|
ENGINE = InnoDB
|
||
|
DEFAULT CHARSET = utf8;
|
||
|
|
||
|
|