diff --git a/metadata-etl/src/main/java/metadata/etl/Launcher.java b/metadata-etl/src/main/java/metadata/etl/Launcher.java index 7a9efb415d4..e51cfcf4052 100644 --- a/metadata-etl/src/main/java/metadata/etl/Launcher.java +++ b/metadata-etl/src/main/java/metadata/etl/Launcher.java @@ -60,6 +60,7 @@ public class Launcher { props.load(propFile); etlJobNameString = props.getProperty(JOB_NAME_KEY); refId = Integer.valueOf(props.getProperty(REF_ID_KEY)); + whEtlId = Integer.valueOf(props.getProperty(WH_ETL_EXEC_ID_KEY)); } catch (IOException e) { //logger.error("property file '{}' not found" , property_file); diff --git a/metadata-etl/src/main/resources/jython/HiveExtract.py b/metadata-etl/src/main/resources/jython/HiveExtract.py index f7c750746c2..0fb331a1aed 100644 --- a/metadata-etl/src/main/resources/jython/HiveExtract.py +++ b/metadata-etl/src/main/resources/jython/HiveExtract.py @@ -129,7 +129,6 @@ class HiveExtract: end) field_delim from SERDE_PARAMS sp join TBLS t on sp.SERDE_ID = t.SD_ID where sp.PARAM_KEY regexp 'schema.literal|schema.url|field.delim' - and sp.PARAM_VALUE regexp """ + r" '^(,|{|\\\\|\\|)' " + """ group by 1,2,3,4,5 ) et JOIN DBS d on et.DB_ID = d.DB_ID JOIN SDS s on et.SERDE_ID = s.SD_ID diff --git a/metadata-etl/src/main/resources/jython/HiveTransform.py b/metadata-etl/src/main/resources/jython/HiveTransform.py index 1cdbbff9224..88912b87843 100644 --- a/metadata-etl/src/main/resources/jython/HiveTransform.py +++ b/metadata-etl/src/main/resources/jython/HiveTransform.py @@ -76,17 +76,15 @@ class HiveTransform: if TableInfo.schema_literal in table and table[TableInfo.schema_literal] is not None: sort_id = 0 + urn = "hive:///%s/%s" % (one_db_info['database'], table['name']) try: schema_data = json.loads(table[TableInfo.schema_literal]) + schema_json = schema_data + acp = AvroColumnParser(schema_data, urn = urn) + result = acp.get_column_list_result() + field_detail_list += result except ValueError: self.logger.error("Schema json error for table : \n" + str(table)) - schema_json = schema_data - # extract fields to field record - urn = "hive:///%s/%s" % (one_db_info['database'], table['name']) - acp = AvroColumnParser(schema_data, urn = urn) - result = acp.get_column_list_result() - field_detail_list += result - elif TableInfo.field_list in table: # Convert to avro uri = "hive:///%s/%s" % (one_db_info['database'], table['name'])