From 9fb5b09bd2fa5760c5ebc72f11a7e880bc532c81 Mon Sep 17 00:00:00 2001 From: jbai Date: Wed, 20 Jul 2016 19:07:16 -0700 Subject: [PATCH] update dependency property name and fix the duplicated key issue when update cfg_object_name_map table --- .../src/main/resources/jython/HiveLoad.py | 20 ++++++------------- .../main/resources/jython/HiveTransform.py | 4 ++-- .../schemas/HiveDependencyInstanceRecord.java | 8 ++++---- 3 files changed, 12 insertions(+), 20 deletions(-) diff --git a/metadata-etl/src/main/resources/jython/HiveLoad.py b/metadata-etl/src/main/resources/jython/HiveLoad.py index 5352bba98d..afa217be10 100644 --- a/metadata-etl/src/main/resources/jython/HiveLoad.py +++ b/metadata-etl/src/main/resources/jython/HiveLoad.py @@ -310,9 +310,9 @@ class HiveLoad: logical_name, version, version_sort_id, - instance_created_time, schema_text, ddl_text, + instance_created_time, created_time, wh_etl_exec_id ) @@ -367,16 +367,18 @@ class HiveLoad: SET s.mapped_object_dataset_id = d.id WHERE s.mapped_object_urn = d.urn; -- create to be deleted table - CREATE TEMPORARY TABLE IF NOT EXISTS t_deleted_depend + DROP TEMPORARY table IF EXISTS t_deleted_depend; + + CREATE TEMPORARY TABLE t_deleted_depend AS ( - SELECT c.obj_name_map_id + SELECT DISTINCT c.obj_name_map_id FROM cfg_object_name_map c LEFT JOIN stg_cfg_object_name_map s ON c.object_dataset_id = s.object_dataset_id and CASE WHEN c.mapped_object_dataset_id is not null THEN c.mapped_object_dataset_id = s.mapped_object_dataset_id ELSE c.mapped_object_name = s.mapped_object_name END - WHERE s.object_name is null + WHERE s.object_name is not null and c.object_dataset_id is not null and c.map_phrase = 'depends on' and c.object_type in ('dalids', 'hive')); @@ -386,16 +388,6 @@ class HiveLoad: SELECT obj_name_map_id FROM t_deleted_depend ); - -- update exist depends - UPDATE cfg_object_name_map c, stg_cfg_object_name_map s - SET c.object_type = s.object_type, c.object_sub_type = s.object_sub_type, c.object_name = s.object_name, - c.map_phrase = s.map_phrase, c.is_identical_map = s.is_identical_map, - c.mapped_object_type = s.mapped_object_type, c.mapped_object_sub_type = s.mapped_object_sub_type, - c.mapped_object_name = s.mapped_object_name, c.description = s.description, - c.last_modified = s.last_modified - WHERE s.object_dataset_id is not null and s.object_dataset_id = c.object_dataset_id - and s.mapped_object_dataset_id is not null and s.mapped_object_dataset_id = c.mapped_object_dataset_id; - -- insert new depends INSERT INTO cfg_object_name_map ( diff --git a/metadata-etl/src/main/resources/jython/HiveTransform.py b/metadata-etl/src/main/resources/jython/HiveTransform.py index d36cba4b01..5bf4cb1328 100644 --- a/metadata-etl/src/main/resources/jython/HiveTransform.py +++ b/metadata-etl/src/main/resources/jython/HiveTransform.py @@ -127,7 +127,7 @@ class HiveTransform: if one_db_info['type'].lower() == 'dalids' else 'hive:///' + one_db_info['database'] + '/' + table['dataset_name'], 'depends on', - 'is used by', + 'Y', row_value[3], row_value[4], row_value[2], @@ -185,7 +185,7 @@ class HiveTransform: table['version'], table['create_time'], json.dumps(schema_json), - view_expanded_text, + json.dumps(view_expanded_text), dataset_urn) instance_file_writer.append(dataset_instance_record) diff --git a/wherehows-common/src/main/java/wherehows/common/schemas/HiveDependencyInstanceRecord.java b/wherehows-common/src/main/java/wherehows/common/schemas/HiveDependencyInstanceRecord.java index c062b070db..3105140b01 100644 --- a/wherehows-common/src/main/java/wherehows/common/schemas/HiveDependencyInstanceRecord.java +++ b/wherehows-common/src/main/java/wherehows/common/schemas/HiveDependencyInstanceRecord.java @@ -26,7 +26,7 @@ public class HiveDependencyInstanceRecord extends AbstractRecord { String objectSubType; String objectName; String mapPhrase; - String mapPhraseReversed; + String isIdentialMap; String mappedObjectType; String mappedObjectSubType; String mappedObjectName; @@ -35,7 +35,7 @@ public class HiveDependencyInstanceRecord extends AbstractRecord { String mappedObjectUrn; public HiveDependencyInstanceRecord(String objectType, String objectSubType, String objectName, String objectUrn, - String mapPhrase, String mapPhraseReversed, String mappedObjectType, + String mapPhrase, String isIdentialMap, String mappedObjectType, String mappedObjectSubType, String mappedObjectName, String mappedObjectUrn, String description) { this.objectType = objectType; @@ -43,7 +43,7 @@ public class HiveDependencyInstanceRecord extends AbstractRecord { this.objectName = objectName; this.objectUrn = objectUrn; this.mapPhrase = mapPhrase; - this.mapPhraseReversed = mapPhraseReversed; + this.isIdentialMap = isIdentialMap; this.mappedObjectType = mappedObjectType; this.mappedObjectSubType = mappedObjectSubType; this.mappedObjectName = mappedObjectName; @@ -59,7 +59,7 @@ public class HiveDependencyInstanceRecord extends AbstractRecord { allFields.add(objectName); allFields.add(objectUrn); allFields.add(mapPhrase); - allFields.add(mapPhraseReversed); + allFields.add(isIdentialMap); allFields.add(mappedObjectType); allFields.add(mappedObjectSubType); allFields.add(mappedObjectName);