diff --git a/data-model/DDL/ETL_DDL/dataset_info_metadata.sql b/data-model/DDL/ETL_DDL/dataset_info_metadata.sql index 7b5e3c21a0..ea04e37af6 100644 --- a/data-model/DDL/ETL_DDL/dataset_info_metadata.sql +++ b/data-model/DDL/ETL_DDL/dataset_info_metadata.sql @@ -110,8 +110,8 @@ CREATE TABLE `dataset_privacy_compliance` ( `dataset_urn` VARCHAR(200) NOT NULL, `compliance_purge_type` VARCHAR(30) DEFAULT NULL COMMENT 'AUTO_PURGE,CUSTOM_PURGE,LIMITED_RETENTION,PURGE_NOT_APPLICABLE', - `compliance_purge_entities` VARCHAR(200) DEFAULT NULL, - `modified_time` INT(10) UNSIGNED DEFAULT NULL + `compliance_purge_entities` VARCHAR(2000) DEFAULT NULL, + `modified_time` INT UNSIGNED DEFAULT NULL COMMENT 'the modified time in epoch', PRIMARY KEY (`dataset_id`), UNIQUE KEY `dataset_urn` (`dataset_urn`) @@ -130,7 +130,7 @@ CREATE TABLE `dataset_security` ( COMMENT 'JSON: specification of retention', `geographic_affinity` VARCHAR(200) DEFAULT NULL COMMENT 'JSON: must be stored in the geo region', - `modified_time` INT(10) UNSIGNED DEFAULT NULL + `modified_time` INT UNSIGNED DEFAULT NULL COMMENT 'the modified time in epoch', PRIMARY KEY (`dataset_id`), UNIQUE KEY `dataset_urn` (`dataset_urn`) diff --git a/metadata-etl/src/main/resources/jython/CodeSearchExtract.py b/metadata-etl/src/main/resources/jython/CodeSearchExtract.py index f0183f57ce..c1bde8d32b 100644 --- a/metadata-etl/src/main/resources/jython/CodeSearchExtract.py +++ b/metadata-etl/src/main/resources/jython/CodeSearchExtract.py @@ -123,7 +123,6 @@ class CodeSearchExtract: db['committers'] = self.get_svn_committers(schema_in_repo) committers_count +=1 self.logger.info("Committers for {} => {}".format(schema_in_repo,db['committers'])) - else: self.logger.info("Search request {}".format(prop_file)) @@ -146,10 +145,9 @@ class CodeSearchExtract: owner_count += 1 self.code_search_committer_writer.append(owner_record) except Exception as e: - self.logger.error(e) + self.logger.error(str(e)) self.logger.error("Exception happens with code {}".format(code)) - self.code_search_committer_writer.close() self.logger.info('Finish Fetching committers, total {} committers entries'.format(committers_count)) self.logger.info('Finish Fetching SVN owners, total {} records'.format(owner_count)) @@ -184,13 +182,13 @@ class CodeSearchExtract: if apvr not in committers: committers.append(apvr) - if len(committers) > 0: self.logger.debug(" {}, ' => ', {}".format(svn_repo_path,committers)) break return ','.join(committers) + if __name__ == "__main__": args = sys.argv[1] e = CodeSearchExtract() diff --git a/metadata-etl/src/main/resources/jython/DatasetTreeBuilder.py b/metadata-etl/src/main/resources/jython/DatasetTreeBuilder.py index c8ae97b42e..4c9d2c9459 100644 --- a/metadata-etl/src/main/resources/jython/DatasetTreeBuilder.py +++ b/metadata-etl/src/main/resources/jython/DatasetTreeBuilder.py @@ -32,7 +32,7 @@ class DatasetTreeBuilder: conn_mysql = zxJDBC.connect(jdbc_url, username, password, jdbc_driver) cur = conn_mysql.cursor() try: - query = "select distinct id, concat(SUBSTRING_INDEX(urn, ':///', 1), '/', SUBSTRING_INDEX(urn, ':///', -1)) p from dict_dataset order by urn" + query = "select distinct id, concat(SUBSTRING_INDEX(urn, ':///', 1), '/', SUBSTRING_INDEX(urn, ':///', -1)) p from dict_dataset order by 2" cur.execute(query) datasets = cur.fetchall() self.dataset_dict = dict() diff --git a/web/app/dao/DatasetsDAO.java b/web/app/dao/DatasetsDAO.java index ea3b7b3951..9c41174de1 100644 --- a/web/app/dao/DatasetsDAO.java +++ b/web/app/dao/DatasetsDAO.java @@ -335,18 +335,18 @@ public class DatasetsDAO extends AbstractMySQLOpenSourceDAO "FROM cfg_object_name_map WHERE mapped_object_name = ?"; private final static String GET_DATASET_LISTVIEW_TOP_LEVEL_NODES = "SELECT DISTINCT " + - "SUBSTRING_INDEX(urn, ':///', 1) as name, 0 as id, " + - "concat(SUBSTRING_INDEX(urn, ':///', 1), ':///') as urn FROM dict_dataset order by 1"; + "SUBSTRING_INDEX(urn, ':///', 1) as `name`, 0 as id, " + + "LEFT(urn, INSTR(urn, ':///') + 3) as urn FROM dict_dataset order by 1"; - private final static String GET_DATASET_LISTVIEW_NODES_BY_URN = "SELECT distinct " + - "SUBSTRING_INDEX(SUBSTRING_INDEX(d.urn, ?, -1), '/', 1) as name, " + + private final static String GET_DATASET_LISTVIEW_NODES_BY_URN = "SELECT DISTINCT " + + "SUBSTRING_INDEX(SUBSTRING_INDEX(d.urn, ?, -1), '/', 1) as `name`, " + "concat(?, SUBSTRING_INDEX(SUBSTRING_INDEX(d.urn, ?, -1), '/', 1)) as urn, " + "s.id FROM dict_dataset d LEFT JOIN dict_dataset s " + "ON s.urn = concat(?, SUBSTRING_INDEX(SUBSTRING_INDEX(d.urn, ?, -1), '/', 1)) " + - "WHERE d.urn LIKE ? ORDER BY d.urn"; + "WHERE d.urn LIKE ? ORDER BY 2"; - private final static String GET_DATASET_VERSIONS = "SELECT DISTINCT version " + - "FROM dict_dataset_instance WHERE dataset_id = ? and version != '0' ORDER BY version_sort_id DESC"; + private final static String GET_DATASET_VERSIONS = "SELECT DISTINCT `version` " + + "FROM dict_dataset_instance WHERE dataset_id = ? and `version` != '0' ORDER BY 1 DESC"; private final static String GET_DATASET_NATIVE_NAME = "SELECT native_name " + "FROM dict_dataset_instance WHERE dataset_id = ? ORDER BY version_sort_id DESC limit 1"; @@ -1991,7 +1991,7 @@ public class DatasetsDAO extends AbstractMySQLOpenSourceDAO node.datasetId = (Long) row.get(DatasetWithUserRowMapper.DATASET_ID_COLUMN); node.nodeName = (String) row.get(DatasetWithUserRowMapper.DATASET_NAME_COLUMN); String nodeUrn = (String) row.get(DatasetWithUserRowMapper.DATASET_URN_COLUMN); - if (node.datasetId !=null && node.datasetId > 0) + if (node.datasetId != null && node.datasetId > 0) { node.nodeUrl = "#/datasets/" + node.datasetId; } diff --git a/web/app/dao/FlowsDAO.java b/web/app/dao/FlowsDAO.java index ce9697fa3f..a4ee95858d 100644 --- a/web/app/dao/FlowsDAO.java +++ b/web/app/dao/FlowsDAO.java @@ -79,8 +79,8 @@ public class FlowsDAO extends AbstractMySQLOpenSourceDAO private final static String GET_PAGED_JOBS_BY_APP_ID_AND_FLOW_ID = "select SQL_CALC_FOUND_ROWS " + "j.job_id, MAX(j.last_source_version), j.job_name, j.job_path, j.job_type, j.ref_flow_id, " + - "FROM_UNIXTIME(j.created_time) as created_time, " + - "FROM_UNIXTIME(j.modified_time) as modified_time, f.flow_name, l.flow_group " + + "FROM_UNIXTIME(MIN(j.created_time)) as created_time, " + + "FROM_UNIXTIME(MAX(j.modified_time)) as modified_time, f.flow_name, l.flow_group " + "FROM flow_job j JOIN flow f on j.app_id = f.app_id and j.flow_id = f.flow_id " + "LEFT JOIN flow l on j.app_id = l.app_id and j.ref_flow_id = l.flow_id " + "WHERE j.app_id = ? and j.flow_id = ? GROUP BY j.job_id, j.job_name, " + @@ -88,11 +88,11 @@ public class FlowsDAO extends AbstractMySQLOpenSourceDAO "f.flow_name ORDER BY j.job_id LIMIT ?, ?"; private final static String GET_FLOW_TREE_APPLICATON_NODES = "SELECT DISTINCT ca.app_code " + - "From flow f JOIN cfg_application ca ON f.app_id = ca.app_id ORDER by app_code"; + "From flow f JOIN cfg_application ca ON f.app_id = ca.app_id ORDER by 1"; private final static String GET_FLOW_TREE_PROJECT_NODES = "SELECT DISTINCT IFNULL(f.flow_group, 'ROOT') " + "FROM flow f JOIN cfg_application ca ON f.app_id = ca.app_id " + - "WHERE (is_active is null or is_active = 'Y') and ca.app_code = ? ORDER BY flow_group"; + "WHERE (is_active is null or is_active = 'Y') and ca.app_code = ? ORDER BY 1"; private final static String GET_FLOW_TREE_FLOW_NODES = "SELECT DISTINCT f.flow_id, f.flow_name FROM flow f " + "JOIN cfg_application ca ON f.app_id = ca.app_id " + diff --git a/web/app/dao/LineageDAO.java b/web/app/dao/LineageDAO.java index 7aff7c42fa..fc0f1a5677 100644 --- a/web/app/dao/LineageDAO.java +++ b/web/app/dao/LineageDAO.java @@ -40,8 +40,8 @@ public class LineageDAO extends AbstractMySQLOpenSourceDAO private final static String GET_JOB = "SELECT ca.app_id, ca.app_code as cluster, " + "jedl.job_name, fj.job_path, fj.job_type, jedl.flow_path, jedl.storage_type, jedl.source_target_type, " + - "jedl.operation, jedl.source_srl_no, jedl.srl_no, " + - "max(jedl.job_exec_id) as job_exec_id FROM job_execution_data_lineage jedl " + + "jedl.operation, MAX(jedl.source_srl_no), MAX(jedl.srl_no), " + + "MAX(jedl.job_exec_id) as job_exec_id FROM job_execution_data_lineage jedl " + "JOIN cfg_application ca on ca.app_id = jedl.app_id " + "LEFT JOIN job_execution je on jedl.app_id = je.app_id " + "and jedl.flow_exec_id = je.flow_exec_id and jedl.job_exec_id = je.job_exec_id " + @@ -49,14 +49,14 @@ public class LineageDAO extends AbstractMySQLOpenSourceDAO "WHERE abstracted_object_name in ( :names ) and " + "jedl.flow_path not REGEXP '^(rent-metrics:|tracking-investigation:)' and " + "FROM_UNIXTIME(job_finished_unixtime) > CURRENT_DATE - INTERVAL (:days) DAY " + - "GROUP BY ca.app_id, cluster, jedl.job_name, jedl.flow_path, jedl.source_target_type, " + + "GROUP BY ca.app_id, cluster, jedl.job_name, fj.job_path, fj.job_type, jedl.flow_path, jedl.source_target_type, " + "jedl.storage_type, jedl.operation " + - "ORDER BY jedl.source_target_type DESC, jedl.job_finished_unixtime"; + "ORDER BY jedl.source_target_type DESC, job_exec_id"; private final static String GET_UP_LEVEL_JOB = "SELECT ca.app_id, ca.app_code as cluster, " + "jedl.job_name, fj.job_path, fj.job_type, jedl.flow_path, jedl.storage_type, jedl.source_target_type, " + - "jedl.operation, jedl.source_srl_no, jedl.srl_no, " + - "max(jedl.job_exec_id) as job_exec_id FROM job_execution_data_lineage jedl " + + "jedl.operation, MAX(jedl.source_srl_no), MAX(jedl.srl_no), " + + "MAX(jedl.job_exec_id) as job_exec_id FROM job_execution_data_lineage jedl " + "JOIN cfg_application ca on ca.app_id = jedl.app_id " + "LEFT JOIN job_execution je on jedl.app_id = je.app_id " + "and jedl.flow_exec_id = je.flow_exec_id and jedl.job_exec_id = je.job_exec_id " + @@ -64,14 +64,14 @@ public class LineageDAO extends AbstractMySQLOpenSourceDAO "WHERE abstracted_object_name in ( :names ) and jedl.source_target_type = 'target' and " + "jedl.flow_path not REGEXP '^(rent-metrics:|tracking-investigation:)' and " + "FROM_UNIXTIME(job_finished_unixtime) > CURRENT_DATE - INTERVAL (:days) DAY " + - "GROUP BY ca.app_id, cluster, jedl.job_name, jedl.flow_path, jedl.source_target_type, " + + "GROUP BY ca.app_id, cluster, jedl.job_name, fj.job_path, fj.job_type, jedl.flow_path, jedl.source_target_type, " + "jedl.storage_type, jedl.operation " + - "ORDER BY jedl.source_target_type DESC, jedl.job_finished_unixtime"; + "ORDER BY jedl.source_target_type DESC, job_exec_id"; private final static String GET_JOB_WITH_SOURCE = "SELECT ca.app_id, ca.app_code as cluster, " + "jedl.job_name, fj.job_path, fj.job_type, jedl.flow_path, jedl.storage_type, jedl.source_target_type, " + - "jedl.operation, jedl.source_srl_no, jedl.srl_no, " + - "max(jedl.job_exec_id) as job_exec_id FROM job_execution_data_lineage jedl " + + "jedl.operation, MAX(jedl.source_srl_no), MAX(jedl.srl_no), " + + "MAX(jedl.job_exec_id) as job_exec_id FROM job_execution_data_lineage jedl " + "JOIN cfg_application ca on ca.app_id = jedl.app_id " + "LEFT JOIN job_execution je on jedl.app_id = je.app_id " + "and jedl.flow_exec_id = je.flow_exec_id and jedl.job_exec_id = je.job_exec_id " + @@ -79,9 +79,9 @@ public class LineageDAO extends AbstractMySQLOpenSourceDAO "WHERE abstracted_object_name in ( :names ) and jedl.source_target_type != (:type) and " + "jedl.flow_path not REGEXP '^(rent-metrics:|tracking-investigation:)' and " + "FROM_UNIXTIME(job_finished_unixtime) > CURRENT_DATE - INTERVAL (:days) DAY " + - "GROUP BY ca.app_id, cluster, jedl.job_name, jedl.flow_path, jedl.source_target_type, " + + "GROUP BY ca.app_id, cluster, jedl.job_name, fj.job_path, fj.job_type, jedl.flow_path, jedl.source_target_type, " + "jedl.storage_type, jedl.operation " + - "ORDER BY jedl.source_target_type DESC, jedl.job_finished_unixtime"; + "ORDER BY jedl.source_target_type DESC, job_exec_id"; private final static String GET_DATA = "SELECT storage_type, operation, " + "abstracted_object_name, source_target_type, job_start_unixtime, job_finished_unixtime, " + diff --git a/web/app/dao/SchemaHistoryDAO.java b/web/app/dao/SchemaHistoryDAO.java index 0833132d5b..0bf1ca0abd 100644 --- a/web/app/dao/SchemaHistoryDAO.java +++ b/web/app/dao/SchemaHistoryDAO.java @@ -31,23 +31,23 @@ public class SchemaHistoryDAO extends AbstractMySQLOpenSourceDAO{ private final static String GET_PAGED_SCHEMA_DATASET = "SELECT SQL_CALC_FOUND_ROWS " + "DISTINCT dataset_id, urn, " + - "MAX(DATE_FORMAT(modified_date,'%Y-%m-%d')) as modified_date FROM dict_dataset_schema_history " + - "WHERE dataset_id is not null GROUP BY 1 ORDER BY urn LIMIT ?, ?"; + "DATE_FORMAT(MAX(modified_date), '%Y-%m-%d') as modified_date FROM dict_dataset_schema_history " + + "WHERE dataset_id is not null GROUP BY 1,2 ORDER BY urn LIMIT ?, ?"; private final static String GET_SPECIFIED_SCHEMA_DATASET = "SELECT SQL_CALC_FOUND_ROWS " + "DISTINCT dataset_id, urn, " + - "MAX(DATE_FORMAT(modified_date,'%Y-%m-%d')) as modified_date FROM dict_dataset_schema_history " + - "WHERE dataset_id = ? GROUP BY 1 ORDER BY urn LIMIT ?, ?"; + "DATE_FORMAT(MAX(modified_date), '%Y-%m-%d') as modified_date FROM dict_dataset_schema_history " + + "WHERE dataset_id = ? GROUP BY 1,2 ORDER BY urn LIMIT ?, ?"; private final static String GET_PAGED_SCHEMA_DATASET_WITH_FILTER = "SELECT SQL_CALC_FOUND_ROWS " + - "DISTINCT dataset_id, urn, DATE_FORMAT(modified_date,'%Y-%m-%d') as modified_date " + + "DISTINCT dataset_id, urn, DATE_FORMAT(MAX(modified_date), '%Y-%m-%d') as modified_date " + "FROM dict_dataset_schema_history WHERE dataset_id is not null and urn LIKE ? " + - "GROUP BY 1 ORDER BY urn LIMIT ?, ?"; + "GROUP BY 1,2 ORDER BY urn LIMIT ?, ?"; private final static String GET_SPECIFIED_SCHEMA_DATASET_WITH_FILTER = "SELECT SQL_CALC_FOUND_ROWS " + - "DISTINCT dataset_id, urn, DATE_FORMAT(modified_date,'%Y-%m-%d') as modified_date " + + "DISTINCT dataset_id, urn, DATE_FORMAT(MAX(modified_date), '%Y-%m-%d') as modified_date " + "FROM dict_dataset_schema_history WHERE dataset_id = ? and urn LIKE ? " + - "GROUP BY 1 ORDER BY urn LIMIT ?, ?"; + "GROUP BY 1,2 ORDER BY urn LIMIT ?, ?"; private final static String GET_SCHEMA_HISTORY_BY_DATASET_ID = "SELECT DATE_FORMAT(modified_date,'%Y-%m-%d') " + "as modified_date, `schema` FROM dict_dataset_schema_history WHERE dataset_id = ? ORDER BY 1";