From 5c76f473135d30f64f64ac7dd9fa68f5fe293e5b Mon Sep 17 00:00:00 2001 From: Na Zhang Date: Mon, 26 Sep 2016 15:06:30 -0700 Subject: [PATCH 1/2] remove hive instance hardcode cluster name --- metadata-etl/src/main/resources/jython/HiveLoad.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/metadata-etl/src/main/resources/jython/HiveLoad.py b/metadata-etl/src/main/resources/jython/HiveLoad.py index 0bf5fc7ab3..b7aaec6b48 100644 --- a/metadata-etl/src/main/resources/jython/HiveLoad.py +++ b/metadata-etl/src/main/resources/jython/HiveLoad.py @@ -298,6 +298,8 @@ class HiveLoad: set sdi.dataset_id = d.id where sdi.abstract_dataset_urn = d.urn and sdi.db_id = {db_id}; + + # nzhang fix issue hive_instance.*.csv has hard-coded datacenter INSERT INTO dict_dataset_instance ( dataset_id, db_id, @@ -316,8 +318,8 @@ class HiveLoad: created_time, wh_etl_exec_id ) - select s.dataset_id, s.db_id, s.deployment_tier, s.data_center, - s.server_cluster, s.slice, s.status_id, s.native_name, s.logical_name, s.version, + select s.dataset_id, s.db_id, s.deployment_tier, c.data_center, c.cluster, + s.slice, s.status_id, s.native_name, s.logical_name, s.version, case when s.version regexp '[0-9]+\.[0-9]+\.[0-9]+' then cast(substring_index(s.version, '.', 1) as unsigned) * 100000000 + cast(substring_index(substring_index(s.version, '.', 2), '.', -1) as unsigned) * 10000 + @@ -326,6 +328,7 @@ class HiveLoad: end version_sort_id, s.schema_text, s.ddl_text, s.instance_created_time, s.created_time, s.wh_etl_exec_id from stg_dict_dataset_instance s join dict_dataset d on s.dataset_id = d.id + join cfg_database c on c.db_id = {db_id} where s.db_id = {db_id} on duplicate key update deployment_tier=s.deployment_tier, data_center=s.data_center, server_cluster=s.server_cluster, slice=s.slice, @@ -335,6 +338,7 @@ class HiveLoad: ; """.format(source_file=self.input_instance_file, db_id=self.db_id, wh_etl_exec_id=self.wh_etl_exec_id) + # didn't load into final table for now for state in load_cmd.split(";"): From dbaf053e76b8de1999edcab27afab280cab10872 Mon Sep 17 00:00:00 2001 From: Na Zhang Date: Wed, 19 Oct 2016 14:10:29 -0700 Subject: [PATCH 2/2] Add local test properties template for teradata and scm owners ETL --- .../src/main/resources/local_test.properties.template | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/metadata-etl/src/main/resources/local_test.properties.template b/metadata-etl/src/main/resources/local_test.properties.template index 7cca730769..6386050b93 100644 --- a/metadata-etl/src/main/resources/local_test.properties.template +++ b/metadata-etl/src/main/resources/local_test.properties.template @@ -30,6 +30,7 @@ teradata.metadata= teradata.sample.skip.list= teradata.sample_output= teradata.schema_output= +teradata.collect.sample.data.days= # HDFS properties hdfs.schema_location= @@ -154,3 +155,7 @@ svn.url.prefix= git.project.metadata= product.repo.metadata= product.repo.owner= + +# Database scm owner +database.scm.repo= +base.url.key=