2016-07-25 14:44:02 -07:00
|
|
|
--
|
|
|
|
-- Copyright 2015 LinkedIn Corp. All rights reserved.
|
|
|
|
--
|
|
|
|
-- Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
-- you may not use this file except in compliance with the License.
|
|
|
|
-- You may obtain a copy of the License at
|
|
|
|
--
|
|
|
|
-- http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
--
|
|
|
|
-- Unless required by applicable law or agreed to in writing, software
|
|
|
|
-- distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
--
|
|
|
|
|
|
|
|
|
2016-07-28 16:38:03 -07:00
|
|
|
-- creation statement for Kafka event related tables
|
|
|
|
-- Gobblin:
|
|
|
|
-- + GobblinTrackingEvent: compaction
|
|
|
|
-- + GobblinTrackingEvent_Distcp_Ng: distcp
|
|
|
|
-- + GobblinTrackingEvent_Lumos: rdbms/nosql
|
|
|
|
-- Hive Metastore
|
|
|
|
-- + MetastoreTableAudit
|
|
|
|
-- + MetastorePartitionAudit
|
|
|
|
-- Mapping {Kafka topic => stg table} is loaded in
|
|
|
|
-- backend-service/app/actors/KafkaConsumerMaster.java
|
|
|
|
-- Avro schemas of the Kafka event are available in
|
|
|
|
-- data-model/avro
|
2016-07-25 14:44:02 -07:00
|
|
|
|
|
|
|
-- staging table for Gobblin tracking event compaction
|
2016-07-28 16:38:03 -07:00
|
|
|
CREATE TABLE `stg_kafka_gobblin_compaction` (
|
2016-07-25 14:44:02 -07:00
|
|
|
`cluster` VARCHAR(20) NOT NULL,
|
|
|
|
`dataset` VARCHAR(100) NOT NULL,
|
|
|
|
`partition_type` VARCHAR(20) DEFAULT NULL,
|
|
|
|
`partition_name` VARCHAR(50) DEFAULT NULL,
|
|
|
|
`record_count` BIGINT(20) DEFAULT NULL,
|
|
|
|
`late_record_count` BIGINT(20) DEFAULT NULL,
|
|
|
|
`dedupe_status` VARCHAR(20) DEFAULT NULL,
|
|
|
|
`job_context` VARCHAR(50) DEFAULT NULL,
|
|
|
|
`project_name` VARCHAR(50) DEFAULT NULL,
|
|
|
|
`flow_name` VARCHAR(100) DEFAULT NULL,
|
|
|
|
`job_name` VARCHAR(100) DEFAULT NULL,
|
|
|
|
`flow_exec_id` INT(11) DEFAULT NULL,
|
|
|
|
`log_event_time` BIGINT(20) NOT NULL,
|
|
|
|
PRIMARY KEY (`dataset`,`cluster`,`log_event_time`)
|
|
|
|
)
|
|
|
|
ENGINE=InnoDB
|
|
|
|
DEFAULT CHARSET=latin1;
|
|
|
|
|
|
|
|
|
|
|
|
-- staging table for Gobblin tracking event lumos
|
2016-07-28 16:38:03 -07:00
|
|
|
CREATE TABLE `stg_kafka_gobblin_lumos` (
|
2016-07-25 14:44:02 -07:00
|
|
|
`cluster` VARCHAR(20) NOT NULL,
|
|
|
|
`dataset` VARCHAR(100) NOT NULL,
|
|
|
|
`location` VARCHAR(200) NOT NULL,
|
|
|
|
`partition_type` VARCHAR(20) DEFAULT NULL,
|
|
|
|
`partition_name` VARCHAR(50) DEFAULT NULL,
|
|
|
|
`subpartition_type` VARCHAR(20) DEFAULT NULL,
|
|
|
|
`subpartition_name` VARCHAR(50) DEFAULT NULL,
|
|
|
|
`max_data_date_epoch3` BIGINT(20) DEFAULT NULL,
|
|
|
|
`max_data_key` BIGINT(20) DEFAULT NULL,
|
|
|
|
`record_count` BIGINT(20) DEFAULT NULL,
|
|
|
|
`source_datacenter` VARCHAR(10) DEFAULT NULL,
|
|
|
|
`source_deployment_env` VARCHAR(10) DEFAULT NULL,
|
|
|
|
`source_database` VARCHAR(50) DEFAULT NULL,
|
|
|
|
`source_table` VARCHAR(50) DEFAULT NULL,
|
|
|
|
`job_context` VARCHAR(50) DEFAULT NULL,
|
|
|
|
`project_name` VARCHAR(100) DEFAULT NULL,
|
|
|
|
`flow_name` VARCHAR(100) DEFAULT NULL,
|
|
|
|
`job_name` VARCHAR(100) DEFAULT NULL,
|
|
|
|
`flow_exec_id` INT(11) DEFAULT NULL,
|
|
|
|
`log_event_time` BIGINT(20) NOT NULL,
|
|
|
|
PRIMARY KEY (`dataset`,`cluster`,`log_event_time`)
|
|
|
|
)
|
|
|
|
ENGINE=InnoDB
|
|
|
|
DEFAULT CHARSET=latin1;
|
|
|
|
|
|
|
|
|
|
|
|
-- staging table for Gobblin tracking event distcp_ng
|
2016-07-28 16:38:03 -07:00
|
|
|
CREATE TABLE `stg_kafka_gobblin_distcp` (
|
2016-07-25 14:44:02 -07:00
|
|
|
`cluster` VARCHAR(20) NOT NULL,
|
|
|
|
`dataset` VARCHAR(100) NOT NULL,
|
|
|
|
`partition_type` VARCHAR(20) DEFAULT NULL,
|
|
|
|
`partition_name` VARCHAR(50) DEFAULT NULL,
|
|
|
|
`upsteam_timestamp` BIGINT(20) DEFAULT NULL,
|
|
|
|
`origin_timestamp` BIGINT(20) DEFAULT NULL,
|
|
|
|
`source_path` VARCHAR(200) DEFAULT NULL,
|
|
|
|
`target_path` VARCHAR(200) DEFAULT NULL,
|
|
|
|
`job_context` VARCHAR(50) DEFAULT NULL,
|
|
|
|
`project_name` VARCHAR(100) DEFAULT NULL,
|
|
|
|
`flow_name` VARCHAR(100) DEFAULT NULL,
|
|
|
|
`job_name` VARCHAR(100) DEFAULT NULL,
|
|
|
|
`flow_exec_id` INT(11) DEFAULT NULL,
|
|
|
|
`log_event_time` BIGINT(20) NOT NULL,
|
|
|
|
PRIMARY KEY (`dataset`,`cluster`,`partition_name`,`log_event_time`)
|
|
|
|
)
|
|
|
|
ENGINE=InnoDB
|
|
|
|
DEFAULT CHARSET=latin1;
|
|
|
|
|
|
|
|
|
|
|
|
-- staging table for Metastore Audit Event, include TableAudit / PartitionAudit
|
2016-07-28 16:38:03 -07:00
|
|
|
CREATE TABLE `stg_kafka_metastore_audit` (
|
2016-07-25 14:44:02 -07:00
|
|
|
`server` VARCHAR(20) NOT NULL,
|
|
|
|
`instance` VARCHAR(20) NOT NULL,
|
|
|
|
`app_name` VARCHAR(50) NOT NULL,
|
|
|
|
`event_name` VARCHAR(50) NOT NULL,
|
|
|
|
`event_type` VARCHAR(30) NOT NULL,
|
|
|
|
`log_event_time` BIGINT(20) NOT NULL,
|
|
|
|
`metastore_thrift_uri` VARCHAR(200) DEFAULT NULL,
|
|
|
|
`metastore_version` VARCHAR(20) DEFAULT NULL,
|
|
|
|
`is_successful` VARCHAR(5) DEFAULT NULL,
|
|
|
|
`is_data_deleted` VARCHAR(5) DEFAULT NULL,
|
|
|
|
`db_name` VARCHAR(100) NOT NULL,
|
|
|
|
`table_name` VARCHAR(100) NOT NULL,
|
|
|
|
`time_partition` VARCHAR(50) NOT NULL,
|
|
|
|
`location` VARCHAR(200) DEFAULT NULL,
|
|
|
|
`owner` VARCHAR(100) DEFAULT NULL,
|
|
|
|
`create_time` BIGINT(20) DEFAULT NULL,
|
|
|
|
`last_access_time` BIGINT(20) DEFAULT NULL,
|
2016-07-28 16:38:03 -07:00
|
|
|
`old_info` MEDIUMTEXT CHAR SET utf8 DEFAULT NULL,
|
|
|
|
`new_info` MEDIUMTEXT CHAR SET utf8 DEFAULT NULL,
|
2016-07-25 14:44:02 -07:00
|
|
|
PRIMARY KEY (`db_name`,`table_name`,`time_partition`,`instance`,`log_event_time`,`event_type`)
|
|
|
|
)
|
|
|
|
ENGINE=InnoDB
|
|
|
|
DEFAULT CHARSET=latin1;
|