mirror of
				https://github.com/datahub-project/datahub.git
				synced 2025-10-26 08:25:02 +00:00 
			
		
		
		
	Modify Oracle metadata ETL job, use Json dumps and remove unnecessary quotes
This commit is contained in:
		
							parent
							
								
									b4a718efd0
								
							
						
					
					
						commit
						dbbdb6e2fb
					
				| @ -199,7 +199,6 @@ class OracleExtract: | |||||||
|     ''' |     ''' | ||||||
|     schema_dict = {"fields": []} |     schema_dict = {"fields": []} | ||||||
|     table_record = {} |     table_record = {} | ||||||
|     field_record = {} |  | ||||||
|     table_idx = 0 |     table_idx = 0 | ||||||
|     field_idx = 0 |     field_idx = 0 | ||||||
| 
 | 
 | ||||||
| @ -211,14 +210,18 @@ class OracleExtract: | |||||||
|         # This is a new table. Let's push the previous table record into output_list |         # This is a new table. Let's push the previous table record into output_list | ||||||
|         if 'urn' in table_record: |         if 'urn' in table_record: | ||||||
|           schema_dict["num_fields"] = field_idx |           schema_dict["num_fields"] = field_idx | ||||||
|           table_record['columns'] = schema_dict |           table_record["columns"] = json.dumps(schema_dict) | ||||||
|           self.table_output_list.append(table_record) |           self.table_output_list.append(table_record) | ||||||
| 
 | 
 | ||||||
|  |         properties = { | ||||||
|  |           "indexes": self.table_dict[table_name_key].get("indexes"), | ||||||
|  |           "partition_column": self.table_dict[table_name_key].get("partition_column") | ||||||
|  |         } | ||||||
|         table_record = { |         table_record = { | ||||||
|           "name": row[1], |           "name": row[1], | ||||||
|           "columns": {}, |           "columns": None, | ||||||
|           "schema_type": "JSON", |           "schema_type": "JSON", | ||||||
|           "properties": self.table_dict[table_name_key], |           "properties": json.dumps(properties), | ||||||
|           "urn": table_urn, |           "urn": table_urn, | ||||||
|           "source": "Oracle", |           "source": "Oracle", | ||||||
|           "location_prefix": row[0], |           "location_prefix": row[0], | ||||||
| @ -249,7 +252,7 @@ class OracleExtract: | |||||||
| 
 | 
 | ||||||
|     # finish all remaining rows |     # finish all remaining rows | ||||||
|     schema_dict["num_fields"] = field_idx |     schema_dict["num_fields"] = field_idx | ||||||
|     table_record['columns'] = schema_dict |     table_record["columns"] = json.dumps(schema_dict) | ||||||
|     self.table_output_list.append(table_record) |     self.table_output_list.append(table_record) | ||||||
|     self.logger.info("%d Table records generated" % table_idx) |     self.logger.info("%d Table records generated" % table_idx) | ||||||
| 
 | 
 | ||||||
| @ -304,7 +307,17 @@ class OracleExtract: | |||||||
|       return None |       return None | ||||||
| 
 | 
 | ||||||
|   def trim_newline(self, line): |   def trim_newline(self, line): | ||||||
|     return None if line is None else line.replace('\n', ' ').replace('\r', ' ') |     return None if line is None else line.replace('\n', ' ').replace('\r', ' ').encode('ascii', 'ignore') | ||||||
|  | 
 | ||||||
|  |   def write_csv(self, csv_filename, csv_columns, data_list): | ||||||
|  |     csvfile = open(csv_filename, 'wb') | ||||||
|  |     os.chmod(csv_filename, 0644) | ||||||
|  |     writer = csv.DictWriter(csvfile, fieldnames=csv_columns, delimiter='\x1A', lineterminator='\n', | ||||||
|  |                             quoting=csv.QUOTE_NONE, quotechar='\1', escapechar='\0') | ||||||
|  |     writer.writeheader() | ||||||
|  |     for data in data_list: | ||||||
|  |       writer.writerow(data) | ||||||
|  |     csvfile.close() | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|   def run(self, database_name, table_name, table_output_file, field_output_file, sample_output_file, sample=False): |   def run(self, database_name, table_name, table_output_file, field_output_file, sample_output_file, sample=False): | ||||||
| @ -323,34 +336,25 @@ class OracleExtract: | |||||||
|       begin = datetime.datetime.now().strftime("%H:%M:%S") |       begin = datetime.datetime.now().strftime("%H:%M:%S") | ||||||
|       # table info |       # table info | ||||||
|       rows = self.get_table_info(None, None) |       rows = self.get_table_info(None, None) | ||||||
|  |       self.get_extra_table_info() | ||||||
|       self.format_table_metadata(rows) |       self.format_table_metadata(rows) | ||||||
|       end = datetime.datetime.now().strftime("%H:%M:%S") |       end = datetime.datetime.now().strftime("%H:%M:%S") | ||||||
|       self.logger.info("Collecting table info [%s -> %s]" % (str(begin), str(end))) |       self.logger.info("Collecting table info [%s -> %s]" % (str(begin), str(end))) | ||||||
| 
 | 
 | ||||||
|       csv_columns = ['name', 'columns', 'schema_type', 'properties', 'urn', 'source', 'location_prefix', |       csv_columns = ['name', 'columns', 'schema_type', 'properties', 'urn', 'source', 'location_prefix', | ||||||
|                      'parent_name', 'storage_type', 'dataset_type', 'is_partitioned'] |                      'parent_name', 'storage_type', 'dataset_type', 'is_partitioned'] | ||||||
|       csvfile = open(table_output_file, 'wb') |       self.write_csv(table_output_file, csv_columns, self.table_output_list) | ||||||
|       os.chmod(table_output_file, 0666) |  | ||||||
|       writer = csv.DictWriter(csvfile, fieldnames=csv_columns, delimiter='\x1A', lineterminator='\n') |  | ||||||
|       writer.writeheader() |  | ||||||
|       for data in self.table_output_list: |  | ||||||
|         writer.writerow(data) |  | ||||||
|       csvfile.close |  | ||||||
| 
 | 
 | ||||||
|       csv_columns = ['dataset_urn', 'sort_id', 'name', 'data_type', 'nullable', |       csv_columns = ['dataset_urn', 'sort_id', 'name', 'data_type', 'nullable', | ||||||
|                      'size', 'precision', 'scale', 'default_value', 'doc'] |                      'size', 'precision', 'scale', 'default_value', 'doc'] | ||||||
|       csvfile = open(field_output_file, 'wb') |       self.write_csv(field_output_file, csv_columns, self.field_output_list) | ||||||
|       os.chmod(field_output_file, 0666) |  | ||||||
|       writer = csv.DictWriter(csvfile, fieldnames=csv_columns, delimiter='\x1A', lineterminator='\n') |  | ||||||
|       writer.writeheader() |  | ||||||
|       for data in self.field_output_list: |  | ||||||
|         writer.writerow(data) |  | ||||||
|       csvfile.close |  | ||||||
| 
 | 
 | ||||||
|     if sample: |     if sample: | ||||||
|       csvfile = open(sample_output_file, 'wb') |       csvfile = open(sample_output_file, 'wb') | ||||||
|       os.chmod(sample_output_file, 0666) |       os.chmod(sample_output_file, 0666) | ||||||
|       writer = csv.DictWriter(csvfile, fieldnames=csv_columns, delimiter='\x1A', lineterminator='\n') |       writer = csv.DictWriter(csvfile, fieldnames=csv_columns, delimiter='\x1A', lineterminator='\n', | ||||||
|  |                               quoting=csv.QUOTE_NONE, quotechar='\1', escapechar='\0') | ||||||
|  |       self.logger.info("Writing to CSV file {}".format(sample_output_file)) | ||||||
| 
 | 
 | ||||||
|       # collect sample data |       # collect sample data | ||||||
|       for onedatabase in schema: |       for onedatabase in schema: | ||||||
|  | |||||||
| @ -15,7 +15,7 @@ | |||||||
| from com.ziclix.python.sql import zxJDBC | from com.ziclix.python.sql import zxJDBC | ||||||
| from wherehows.common import Constant | from wherehows.common import Constant | ||||||
| from org.slf4j import LoggerFactory | from org.slf4j import LoggerFactory | ||||||
| import datetime | import sys, os, datetime | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class OracleLoad: | class OracleLoad: | ||||||
| @ -39,10 +39,19 @@ class OracleLoad: | |||||||
|       lock_wait_time = args[Constant.INNODB_LOCK_WAIT_TIMEOUT] |       lock_wait_time = args[Constant.INNODB_LOCK_WAIT_TIMEOUT] | ||||||
|       self.conn_cursor.execute("SET innodb_lock_wait_timeout = %s;" % lock_wait_time) |       self.conn_cursor.execute("SET innodb_lock_wait_timeout = %s;" % lock_wait_time) | ||||||
| 
 | 
 | ||||||
|  |     self.logger.info("Load Oracle Metadata into {}, db_id {}, wh_exec_id {}" | ||||||
|  |                      .format(JDBC_URL, self.db_id, self.wh_etl_exec_id)) | ||||||
|  | 
 | ||||||
|  |     self.dict_dataset_table = 'dict_dataset' | ||||||
|  |     self.field_comments_table = 'field_comments' | ||||||
|  |     self.dict_field_table = 'dict_field_detail' | ||||||
|  |     self.dict_field_comment_table = 'dict_dataset_field_comment' | ||||||
|  |     self.dict_dataset_sample_table = 'dict_dataset_sample' | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
|   def load_tables(self): |   def load_tables(self): | ||||||
|     load_tables_cmd = ''' |     load_tables_cmd = ''' | ||||||
|     DELETE FROM stg_dict_dataset WHERE db_id = '{db_id}'; |     DELETE FROM stg_dict_dataset WHERE db_id = {db_id}; | ||||||
| 
 | 
 | ||||||
|     -- load into stg table |     -- load into stg table | ||||||
|     LOAD DATA LOCAL INFILE '{source_file}' |     LOAD DATA LOCAL INFILE '{source_file}' | ||||||
| @ -55,7 +64,7 @@ class OracleLoad: | |||||||
|     wh_etl_exec_id = {wh_etl_exec_id}; |     wh_etl_exec_id = {wh_etl_exec_id}; | ||||||
| 
 | 
 | ||||||
|     -- insert into final table |     -- insert into final table | ||||||
|     INSERT INTO dict_dataset |     INSERT INTO {dict_dataset} | ||||||
|     ( `name`, |     ( `name`, | ||||||
|       `schema`, |       `schema`, | ||||||
|       schema_type, |       schema_type, | ||||||
| @ -97,16 +106,18 @@ class OracleLoad: | |||||||
|         modified_time=UNIX_TIMESTAMP(now()), wh_etl_exec_id=s.wh_etl_exec_id |         modified_time=UNIX_TIMESTAMP(now()), wh_etl_exec_id=s.wh_etl_exec_id | ||||||
|     ; |     ; | ||||||
| 
 | 
 | ||||||
|     analyze table dict_dataset; |     analyze table {dict_dataset}; | ||||||
|     '''.format(source_file=self.input_table_file, db_id=self.db_id, wh_etl_exec_id=self.wh_etl_exec_id) |     '''.format(source_file=self.input_table_file, db_id=self.db_id, wh_etl_exec_id=self.wh_etl_exec_id, | ||||||
|  |                dict_dataset=self.dict_dataset_table) | ||||||
| 
 | 
 | ||||||
|     self.executeCommands(load_tables_cmd) |     self.executeCommands(load_tables_cmd) | ||||||
|     self.logger.info("finish loading oracle table metadata") |     self.logger.info("finish loading oracle table metadata from {} to {}" | ||||||
|  |                      .format(self.input_table_file, self.dict_dataset_table)) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|   def load_fields(self): |   def load_fields(self): | ||||||
|     load_fields_cmd = ''' |     load_fields_cmd = ''' | ||||||
|         DELETE FROM stg_dict_field_detail where db_id = '{db_id}'; |         DELETE FROM stg_dict_field_detail where db_id = {db_id}; | ||||||
| 
 | 
 | ||||||
|         LOAD DATA LOCAL INFILE '{source_file}' |         LOAD DATA LOCAL INFILE '{source_file}' | ||||||
|         INTO TABLE stg_dict_field_detail |         INTO TABLE stg_dict_field_detail | ||||||
| @ -132,13 +143,13 @@ class OracleLoad: | |||||||
|           and (char_length(trim(description)) = 0 |           and (char_length(trim(description)) = 0 | ||||||
|           or description in ('null', 'N/A', 'nothing', 'empty', 'none')); |           or description in ('null', 'N/A', 'nothing', 'empty', 'none')); | ||||||
| 
 | 
 | ||||||
|         insert into field_comments ( |         insert into {field_comments} ( | ||||||
|           user_id, comment, created, modified, comment_crc32_checksum |           user_id, comment, created, modified, comment_crc32_checksum | ||||||
|         ) |         ) | ||||||
|         select 0 user_id, description, now() created, now() modified, crc32(description) from |         select 0 user_id, description, now() created, now() modified, crc32(description) from | ||||||
|         ( |         ( | ||||||
|           select sf.description |           select sf.description | ||||||
|           from stg_dict_field_detail sf left join field_comments fc |           from stg_dict_field_detail sf left join {field_comments} fc | ||||||
|             on sf.description = fc.comment |             on sf.description = fc.comment | ||||||
|           where sf.description is not null |           where sf.description is not null | ||||||
|             and fc.id is null |             and fc.id is null | ||||||
| @ -146,40 +157,41 @@ class OracleLoad: | |||||||
|           group by 1 order by 1 |           group by 1 order by 1 | ||||||
|         ) d; |         ) d; | ||||||
| 
 | 
 | ||||||
|         analyze table field_comments; |         analyze table {field_comments}; | ||||||
| 
 | 
 | ||||||
|         -- delete old record if it does not exist in this load batch anymore (but have the dataset id) |         -- delete old record if it does not exist in this load batch anymore (but have the dataset id) | ||||||
|         create temporary table if not exists t_deleted_fields (primary key (field_id)) |         create temporary table if not exists t_deleted_fields (primary key (field_id)) | ||||||
|           select x.field_id |           select x.field_id | ||||||
|             from stg_dict_field_detail s |             from stg_dict_field_detail s | ||||||
|               join dict_dataset i |               join {dict_dataset} i | ||||||
|                 on s.urn = i.urn |                 on s.urn = i.urn | ||||||
|                 and s.db_id = {db_id} |                 and s.db_id = {db_id} | ||||||
|               right join dict_field_detail x |               right join {dict_field_detail} x | ||||||
|                 on i.id = x.dataset_id |                 on i.id = x.dataset_id | ||||||
|                 and s.field_name = x.field_name |                 and s.field_name = x.field_name | ||||||
|                 and s.parent_path = x.parent_path |                 and s.parent_path = x.parent_path | ||||||
|           where s.field_name is null |           where s.field_name is null | ||||||
|             and x.dataset_id in ( |             and x.dataset_id in ( | ||||||
|                        select d.id dataset_id |                        select d.id dataset_id | ||||||
|                        from stg_dict_field_detail k join dict_dataset d |                        from stg_dict_field_detail k join {dict_dataset} d | ||||||
|                          on k.urn = d.urn |                          on k.urn = d.urn | ||||||
|                         and k.db_id = {db_id} |                         and k.db_id = {db_id} | ||||||
|             ) |             ) | ||||||
|         ; -- run time : ~2min |         ; -- run time : ~2min | ||||||
| 
 | 
 | ||||||
|         delete from dict_field_detail where field_id in (select field_id from t_deleted_fields); |         delete from {dict_field_detail} where field_id in (select field_id from t_deleted_fields); | ||||||
| 
 | 
 | ||||||
|         -- update the old record if some thing changed |         -- update the old record if some thing changed | ||||||
|         update dict_field_detail t join |         update {dict_field_detail} t join | ||||||
|         ( |         ( | ||||||
|           select x.field_id, s.* |           select x.field_id, s.* | ||||||
|           from stg_dict_field_detail s join dict_dataset d |           from stg_dict_field_detail s | ||||||
|  |           join {dict_dataset} d | ||||||
|             on s.urn = d.urn |             on s.urn = d.urn | ||||||
|                join dict_field_detail x |           join {dict_field_detail} x | ||||||
|            on s.field_name = x.field_name |             on s.field_name = x.field_name | ||||||
|           and coalesce(s.parent_path, '*') = coalesce(x.parent_path, '*') |             and coalesce(s.parent_path, '*') = coalesce(x.parent_path, '*') | ||||||
|           and d.id = x.dataset_id |             and d.id = x.dataset_id | ||||||
|           where s.db_id = {db_id} |           where s.db_id = {db_id} | ||||||
|             and (x.sort_id <> s.sort_id |             and (x.sort_id <> s.sort_id | ||||||
|                 or x.parent_sort_id <> s.parent_sort_id |                 or x.parent_sort_id <> s.parent_sort_id | ||||||
| @ -207,24 +219,23 @@ class OracleLoad: | |||||||
|             t.modified = now() |             t.modified = now() | ||||||
|         ; |         ; | ||||||
| 
 | 
 | ||||||
|        insert into dict_field_detail ( |         insert into {dict_field_detail} ( | ||||||
|           dataset_id, fields_layout_id, sort_id, parent_sort_id, parent_path, |           dataset_id, fields_layout_id, sort_id, parent_sort_id, parent_path, | ||||||
|           field_name, namespace, data_type, data_size, is_nullable, default_value, |           field_name, namespace, data_type, data_size, is_nullable, default_value, modified | ||||||
|            modified |  | ||||||
|         ) |         ) | ||||||
|         select |         select | ||||||
|           d.id, 0, sf.sort_id, sf.parent_sort_id, sf.parent_path, |           d.id, 0, sf.sort_id, sf.parent_sort_id, sf.parent_path, | ||||||
|           sf.field_name, sf.namespace, sf.data_type, sf.data_size, sf.is_nullable, sf.default_value, now() |           sf.field_name, sf.namespace, sf.data_type, sf.data_size, sf.is_nullable, sf.default_value, now() | ||||||
|         from stg_dict_field_detail sf join dict_dataset d |         from stg_dict_field_detail sf join {dict_dataset} d | ||||||
|           on sf.urn = d.urn |           on sf.urn = d.urn | ||||||
|              left join dict_field_detail t |              left join {dict_field_detail} t | ||||||
|           on d.id = t.dataset_id |           on d.id = t.dataset_id | ||||||
|          and sf.field_name = t.field_name |           and sf.field_name = t.field_name | ||||||
|          and sf.parent_path = t.parent_path |           and sf.parent_path = t.parent_path | ||||||
|         where db_id = {db_id} and t.field_id is null |         where db_id = {db_id} and t.field_id is null | ||||||
|         ; |         ; | ||||||
| 
 | 
 | ||||||
|         analyze table dict_field_detail; |         analyze table {dict_field_detail}; | ||||||
| 
 | 
 | ||||||
|         -- delete old record in stagging |         -- delete old record in stagging | ||||||
|         delete from stg_dict_dataset_field_comment where db_id = {db_id}; |         delete from stg_dict_dataset_field_comment where db_id = {db_id}; | ||||||
| @ -232,36 +243,40 @@ class OracleLoad: | |||||||
|         -- insert |         -- insert | ||||||
|         insert into stg_dict_dataset_field_comment |         insert into stg_dict_dataset_field_comment | ||||||
|         select t.field_id field_id, fc.id comment_id,  d.id dataset_id, {db_id} |         select t.field_id field_id, fc.id comment_id,  d.id dataset_id, {db_id} | ||||||
|                 from stg_dict_field_detail sf join dict_dataset d |                 from stg_dict_field_detail sf join {dict_dataset} d | ||||||
|                   on sf.urn = d.urn |                   on sf.urn = d.urn | ||||||
|                       join field_comments fc |                       join {field_comments} fc | ||||||
|                   on sf.description = fc.comment |                   on sf.description = fc.comment | ||||||
|                       join dict_field_detail t |                       join {dict_field_detail} t | ||||||
|                   on d.id = t.dataset_id |                   on d.id = t.dataset_id | ||||||
|                  and sf.field_name = t.field_name |                  and sf.field_name = t.field_name | ||||||
|                  and sf.parent_path = t.parent_path |                  and sf.parent_path = t.parent_path | ||||||
|         where sf.db_id = {db_id}; |         where sf.db_id = {db_id}; | ||||||
| 
 | 
 | ||||||
|         -- have default comment, insert it set default to 0 |         -- have default comment, insert it set default to 0 | ||||||
|         insert ignore into dict_dataset_field_comment |         insert ignore into {dict_dataset_field_comment} | ||||||
|         select field_id, comment_id, dataset_id, 0 is_default from stg_dict_dataset_field_comment where field_id in ( |         select field_id, comment_id, dataset_id, 0 is_default from stg_dict_dataset_field_comment where field_id in ( | ||||||
|           select field_id from dict_dataset_field_comment |           select field_id from {dict_dataset_field_comment} | ||||||
|           where field_id in (select field_id from stg_dict_dataset_field_comment) |           where field_id in (select field_id from stg_dict_dataset_field_comment) | ||||||
|         and is_default = 1 ) and db_id = {db_id}; |         and is_default = 1 ) and db_id = {db_id}; | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|         -- doesn't have this comment before, insert into it and set as default |         -- doesn't have this comment before, insert into it and set as default | ||||||
|         insert ignore into dict_dataset_field_comment |         insert ignore into {dict_dataset_field_comment} | ||||||
|         select sd.field_id, sd.comment_id, sd.dataset_id, 1 from stg_dict_dataset_field_comment sd |         select sd.field_id, sd.comment_id, sd.dataset_id, 1 | ||||||
|         left join dict_dataset_field_comment d |         from stg_dict_dataset_field_comment sd | ||||||
|         on d.field_id = sd.field_id |         left join {dict_dataset_field_comment} d | ||||||
|          and d.comment_id = sd.comment_id |           on d.field_id = sd.field_id | ||||||
|  |           and d.comment_id = sd.comment_id | ||||||
|         where d.comment_id is null |         where d.comment_id is null | ||||||
|         and sd.db_id = {db_id}; |           and sd.db_id = {db_id}; | ||||||
|     '''.format(source_file=self.input_field_file, db_id=self.db_id, wh_etl_exec_id=self.wh_etl_exec_id) |     '''.format(source_file=self.input_field_file, db_id=self.db_id, wh_etl_exec_id=self.wh_etl_exec_id, | ||||||
|  |                dict_dataset=self.dict_dataset_table, dict_field_detail=self.dict_field_table, | ||||||
|  |                field_comments=self.field_comments_table, dict_dataset_field_comment=self.dict_field_comment_table) | ||||||
| 
 | 
 | ||||||
|     self.executeCommands(load_fields_cmd) |     self.executeCommands(load_fields_cmd) | ||||||
|     self.logger.info("finish loading oracle table fields") |     self.logger.info("finish loading oracle table fields from {} to {}" | ||||||
|  |                      .format(self.input_field_file, self.dict_field_table)) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|   def load_sample(self): |   def load_sample(self): | ||||||
| @ -277,12 +292,12 @@ class OracleLoad: | |||||||
| 
 | 
 | ||||||
|     -- update reference id in stagging table |     -- update reference id in stagging table | ||||||
|     UPDATE  stg_dict_dataset_sample s |     UPDATE  stg_dict_dataset_sample s | ||||||
|     LEFT JOIN dict_dataset d ON s.ref_urn = d.urn |     LEFT JOIN {dict_dataset} d ON s.ref_urn = d.urn | ||||||
|     SET s.ref_id = d.id |     SET s.ref_id = d.id | ||||||
|     WHERE s.db_id = {db_id}; |     WHERE s.db_id = {db_id}; | ||||||
| 
 | 
 | ||||||
|     -- first insert ref_id as 0 |     -- first insert ref_id as 0 | ||||||
|     INSERT INTO dict_dataset_sample |     INSERT INTO {dict_dataset_sample} | ||||||
|     ( `dataset_id`, |     ( `dataset_id`, | ||||||
|       `urn`, |       `urn`, | ||||||
|       `ref_id`, |       `ref_id`, | ||||||
| @ -290,20 +305,22 @@ class OracleLoad: | |||||||
|       created |       created | ||||||
|     ) |     ) | ||||||
|     select d.id as dataset_id, s.urn, s.ref_id, s.data, now() |     select d.id as dataset_id, s.urn, s.ref_id, s.data, now() | ||||||
|     from stg_dict_dataset_sample s left join dict_dataset d on d.urn = s.urn |     from stg_dict_dataset_sample s left join {dict_dataset} d on d.urn = s.urn | ||||||
|           where s.db_id = {db_id} |           where s.db_id = {db_id} | ||||||
|     on duplicate key update |     on duplicate key update | ||||||
|       `data`=s.data, modified=now(); |       `data`=s.data, modified=now(); | ||||||
| 
 | 
 | ||||||
|       -- update reference id in final table |       -- update reference id in final table | ||||||
|     UPDATE dict_dataset_sample d |     UPDATE {dict_dataset_sample} d | ||||||
|     RIGHT JOIN stg_dict_dataset_sample s ON d.urn = s.urn |     RIGHT JOIN stg_dict_dataset_sample s ON d.urn = s.urn | ||||||
|     SET d.ref_id = s.ref_id |     SET d.ref_id = s.ref_id | ||||||
|     WHERE s.db_id = {db_id} AND d.ref_id = 0; |     WHERE s.db_id = {db_id} AND d.ref_id = 0; | ||||||
|     '''.format(source_file=self.input_sample_file, db_id=self.db_id) |     '''.format(source_file=self.input_sample_file, db_id=self.db_id, | ||||||
|  |                dict_dataset=self.dict_dataset_table, dict_dataset_sample=self.dict_dataset_sample_table) | ||||||
| 
 | 
 | ||||||
|     self.executeCommands(load_sample_cmd) |     self.executeCommands(load_sample_cmd) | ||||||
|     self.logger.info("finish loading oracle sample data") |     self.logger.info("finish loading oracle sample data from {} to {}" | ||||||
|  |                      .format(self.input_sample_file, self.dict_dataset_sample_table)) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|   def executeCommands(self, commands): |   def executeCommands(self, commands): | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Yi Wang
						Yi Wang