Fix SQL query issues when upgrading to MySQL 5.7 (#345)

2025-12-02 13:44:39 +00:00 · 2017-03-15 09:22:46 -07:00 · 2017-03-15 09:22:46 -07:00 · d9438dc5bc
commit d9438dc5bc
parent 66a8eea21b
7 changed files with 38 additions and 40 deletions
--- a/data-model/DDL/ETL_DDL/dataset_info_metadata.sql
+++ b/data-model/DDL/ETL_DDL/dataset_info_metadata.sql
@ -110,8 +110,8 @@ CREATE TABLE `dataset_privacy_compliance` (
  `dataset_urn`               VARCHAR(200)     NOT NULL,
  `compliance_purge_type`     VARCHAR(30)      DEFAULT NULL
  COMMENT 'AUTO_PURGE,CUSTOM_PURGE,LIMITED_RETENTION,PURGE_NOT_APPLICABLE',
-  `compliance_purge_entities` VARCHAR(200)     DEFAULT NULL,
-  `modified_time`             INT(10) UNSIGNED DEFAULT NULL
+  `compliance_purge_entities` VARCHAR(2000)    DEFAULT NULL,
+  `modified_time`             INT UNSIGNED DEFAULT NULL
  COMMENT 'the modified time in epoch',
  PRIMARY KEY (`dataset_id`),
  UNIQUE KEY `dataset_urn` (`dataset_urn`)
@ -130,7 +130,7 @@ CREATE TABLE `dataset_security` (
  COMMENT 'JSON: specification of retention',
  `geographic_affinity`       VARCHAR(200)     DEFAULT NULL
  COMMENT 'JSON: must be stored in the geo region',
-  `modified_time`             INT(10) UNSIGNED DEFAULT NULL
+  `modified_time`             INT UNSIGNED DEFAULT NULL
  COMMENT 'the modified time in epoch',
  PRIMARY KEY (`dataset_id`),
  UNIQUE KEY `dataset_urn` (`dataset_urn`)
--- a/metadata-etl/src/main/resources/jython/CodeSearchExtract.py
+++ b/metadata-etl/src/main/resources/jython/CodeSearchExtract.py
@ -123,7 +123,6 @@ class CodeSearchExtract:
                    db['committers'] = self.get_svn_committers(schema_in_repo)
                    committers_count +=1
                    self.logger.info("Committers for {} => {}".format(schema_in_repo,db['committers']))
-
            else:
                self.logger.info("Search request {}".format(prop_file))

@ -146,10 +145,9 @@ class CodeSearchExtract:
                owner_count += 1
                self.code_search_committer_writer.append(owner_record)
            except Exception as e:
-                self.logger.error(e)
+                self.logger.error(str(e))
                self.logger.error("Exception happens with code {}".format(code))

-
        self.code_search_committer_writer.close()
        self.logger.info('Finish Fetching committers, total {} committers entries'.format(committers_count))
        self.logger.info('Finish Fetching SVN owners, total {} records'.format(owner_count))
@ -184,13 +182,13 @@ class CodeSearchExtract:
                        if apvr not in committers:
                            committers.append(apvr)

-
            if len(committers) > 0:
                self.logger.debug(" {}, ' => ', {}".format(svn_repo_path,committers))
                break

        return ','.join(committers)

+
 if __name__ == "__main__":
    args = sys.argv[1]
    e = CodeSearchExtract()
--- a/metadata-etl/src/main/resources/jython/DatasetTreeBuilder.py
+++ b/metadata-etl/src/main/resources/jython/DatasetTreeBuilder.py
@ -32,7 +32,7 @@ class DatasetTreeBuilder:
    conn_mysql = zxJDBC.connect(jdbc_url, username, password, jdbc_driver)
    cur = conn_mysql.cursor()
    try:
-      query = "select distinct id, concat(SUBSTRING_INDEX(urn, ':///', 1), '/', SUBSTRING_INDEX(urn, ':///', -1)) p from dict_dataset order by urn"
+      query = "select distinct id, concat(SUBSTRING_INDEX(urn, ':///', 1), '/', SUBSTRING_INDEX(urn, ':///', -1)) p from dict_dataset order by 2"
      cur.execute(query)
      datasets = cur.fetchall()
      self.dataset_dict = dict()
--- a/web/app/dao/DatasetsDAO.java
+++ b/web/app/dao/DatasetsDAO.java
@ -335,18 +335,18 @@ public class DatasetsDAO extends AbstractMySQLOpenSourceDAO
 			"FROM cfg_object_name_map WHERE mapped_object_name = ?";

 	private final static String GET_DATASET_LISTVIEW_TOP_LEVEL_NODES = "SELECT DISTINCT " +
-			"SUBSTRING_INDEX(urn, ':///', 1) as name, 0 as id, " +
-			"concat(SUBSTRING_INDEX(urn, ':///', 1), ':///') as urn FROM dict_dataset order by 1";
+			"SUBSTRING_INDEX(urn, ':///', 1) as `name`, 0 as id, " +
+			"LEFT(urn, INSTR(urn, ':///') + 3) as urn FROM dict_dataset order by 1";

-	private final static String GET_DATASET_LISTVIEW_NODES_BY_URN = "SELECT distinct " +
-			"SUBSTRING_INDEX(SUBSTRING_INDEX(d.urn, ?, -1), '/', 1) as name, " +
+	private final static String GET_DATASET_LISTVIEW_NODES_BY_URN = "SELECT DISTINCT " +
+			"SUBSTRING_INDEX(SUBSTRING_INDEX(d.urn, ?, -1), '/', 1) as `name`, " +
 			"concat(?, SUBSTRING_INDEX(SUBSTRING_INDEX(d.urn, ?, -1), '/', 1)) as urn, " +
 			"s.id FROM dict_dataset d LEFT JOIN dict_dataset s " +
 			"ON s.urn = concat(?, SUBSTRING_INDEX(SUBSTRING_INDEX(d.urn, ?, -1), '/', 1)) " +
-			"WHERE d.urn LIKE ? ORDER BY d.urn";
+			"WHERE d.urn LIKE ? ORDER BY 2";

-	private final static String GET_DATASET_VERSIONS = "SELECT DISTINCT version " +
-			"FROM dict_dataset_instance WHERE dataset_id = ? and version != '0' ORDER BY version_sort_id DESC";
+	private final static String GET_DATASET_VERSIONS = "SELECT DISTINCT `version` " +
+			"FROM dict_dataset_instance WHERE dataset_id = ? and `version` != '0' ORDER BY 1 DESC";

 	private final static String GET_DATASET_NATIVE_NAME = "SELECT native_name " +
 			"FROM dict_dataset_instance WHERE dataset_id = ? ORDER BY version_sort_id DESC limit 1";
@ -1991,7 +1991,7 @@ public class DatasetsDAO extends AbstractMySQLOpenSourceDAO
 			node.datasetId = (Long) row.get(DatasetWithUserRowMapper.DATASET_ID_COLUMN);
 			node.nodeName = (String) row.get(DatasetWithUserRowMapper.DATASET_NAME_COLUMN);
 			String nodeUrn = (String) row.get(DatasetWithUserRowMapper.DATASET_URN_COLUMN);
-			if (node.datasetId !=null && node.datasetId > 0)
+			if (node.datasetId != null && node.datasetId > 0)
 			{
 				node.nodeUrl = "#/datasets/" + node.datasetId;
 			}
--- a/web/app/dao/FlowsDAO.java
+++ b/web/app/dao/FlowsDAO.java
@ -79,8 +79,8 @@ public class FlowsDAO extends AbstractMySQLOpenSourceDAO

 	private final static String GET_PAGED_JOBS_BY_APP_ID_AND_FLOW_ID = "select SQL_CALC_FOUND_ROWS " +
 			"j.job_id, MAX(j.last_source_version), j.job_name, j.job_path, j.job_type, j.ref_flow_id, " +
-			"FROM_UNIXTIME(j.created_time) as created_time, " +
-			"FROM_UNIXTIME(j.modified_time) as modified_time, f.flow_name, l.flow_group " +
+			"FROM_UNIXTIME(MIN(j.created_time)) as created_time, " +
+			"FROM_UNIXTIME(MAX(j.modified_time)) as modified_time, f.flow_name, l.flow_group " +
 			"FROM flow_job j JOIN flow f on j.app_id = f.app_id and j.flow_id = f.flow_id " +
 			"LEFT JOIN flow l on j.app_id = l.app_id and j.ref_flow_id = l.flow_id " +
 			"WHERE j.app_id = ? and j.flow_id = ? GROUP BY j.job_id, j.job_name, " +
@ -88,11 +88,11 @@ public class FlowsDAO extends AbstractMySQLOpenSourceDAO
 			"f.flow_name ORDER BY j.job_id LIMIT ?, ?";

 	private final static String GET_FLOW_TREE_APPLICATON_NODES = "SELECT DISTINCT ca.app_code " +
-			"From flow f JOIN cfg_application ca ON f.app_id = ca.app_id ORDER by app_code";
+			"From flow f JOIN cfg_application ca ON f.app_id = ca.app_id ORDER by 1";

 	private final static String GET_FLOW_TREE_PROJECT_NODES = "SELECT DISTINCT IFNULL(f.flow_group, 'ROOT') " +
 			"FROM flow f JOIN cfg_application ca ON f.app_id = ca.app_id " +
-			"WHERE (is_active is null or is_active = 'Y') and ca.app_code = ? ORDER BY flow_group";
+			"WHERE (is_active is null or is_active = 'Y') and ca.app_code = ? ORDER BY 1";

 	private final static String GET_FLOW_TREE_FLOW_NODES = "SELECT DISTINCT f.flow_id, f.flow_name FROM flow f " +
 			"JOIN cfg_application ca ON f.app_id = ca.app_id " +
--- a/web/app/dao/LineageDAO.java
+++ b/web/app/dao/LineageDAO.java
@ -40,8 +40,8 @@ public class LineageDAO extends AbstractMySQLOpenSourceDAO

 	private final static String GET_JOB = "SELECT ca.app_id, ca.app_code as cluster, " +
 			"jedl.job_name, fj.job_path, fj.job_type, jedl.flow_path, jedl.storage_type, jedl.source_target_type, " +
-			"jedl.operation, jedl.source_srl_no, jedl.srl_no, " +
-			"max(jedl.job_exec_id) as job_exec_id FROM job_execution_data_lineage jedl " +
+			"jedl.operation, MAX(jedl.source_srl_no), MAX(jedl.srl_no), " +
+			"MAX(jedl.job_exec_id) as job_exec_id FROM job_execution_data_lineage jedl " +
 			"JOIN cfg_application ca on ca.app_id = jedl.app_id " +
 			"LEFT JOIN job_execution je on jedl.app_id = je.app_id " +
 			"and jedl.flow_exec_id = je.flow_exec_id and jedl.job_exec_id = je.job_exec_id " +
@ -49,14 +49,14 @@ public class LineageDAO extends AbstractMySQLOpenSourceDAO
 			"WHERE abstracted_object_name in ( :names ) and " +
 			"jedl.flow_path not REGEXP '^(rent-metrics:|tracking-investigation:)' and " +
 			"FROM_UNIXTIME(job_finished_unixtime) >  CURRENT_DATE - INTERVAL (:days) DAY " +
-			"GROUP BY ca.app_id, cluster, jedl.job_name, jedl.flow_path, jedl.source_target_type, " +
+			"GROUP BY ca.app_id, cluster, jedl.job_name, fj.job_path, fj.job_type, jedl.flow_path, jedl.source_target_type, " +
 			"jedl.storage_type, jedl.operation " +
-			"ORDER BY jedl.source_target_type DESC, jedl.job_finished_unixtime";
+			"ORDER BY jedl.source_target_type DESC, job_exec_id";

 	private final static String GET_UP_LEVEL_JOB = "SELECT ca.app_id, ca.app_code as cluster, " +
 			"jedl.job_name, fj.job_path, fj.job_type, jedl.flow_path, jedl.storage_type, jedl.source_target_type, " +
-			"jedl.operation, jedl.source_srl_no, jedl.srl_no, " +
-			"max(jedl.job_exec_id) as job_exec_id FROM job_execution_data_lineage jedl " +
+			"jedl.operation, MAX(jedl.source_srl_no), MAX(jedl.srl_no), " +
+			"MAX(jedl.job_exec_id) as job_exec_id FROM job_execution_data_lineage jedl " +
 			"JOIN cfg_application ca on ca.app_id = jedl.app_id " +
 			"LEFT JOIN job_execution je on jedl.app_id = je.app_id " +
 			"and jedl.flow_exec_id = je.flow_exec_id and jedl.job_exec_id = je.job_exec_id " +
@ -64,14 +64,14 @@ public class LineageDAO extends AbstractMySQLOpenSourceDAO
 			"WHERE abstracted_object_name in ( :names ) and jedl.source_target_type = 'target' and " +
 			"jedl.flow_path not REGEXP '^(rent-metrics:|tracking-investigation:)' and " +
 			"FROM_UNIXTIME(job_finished_unixtime) >  CURRENT_DATE - INTERVAL (:days) DAY " +
-			"GROUP BY ca.app_id, cluster, jedl.job_name, jedl.flow_path, jedl.source_target_type, " +
+			"GROUP BY ca.app_id, cluster, jedl.job_name, fj.job_path, fj.job_type, jedl.flow_path, jedl.source_target_type, " +
 			"jedl.storage_type, jedl.operation " +
-			"ORDER BY jedl.source_target_type DESC, jedl.job_finished_unixtime";
+			"ORDER BY jedl.source_target_type DESC, job_exec_id";

 	private final static String GET_JOB_WITH_SOURCE = "SELECT ca.app_id, ca.app_code as cluster, " +
 			"jedl.job_name, fj.job_path, fj.job_type, jedl.flow_path, jedl.storage_type, jedl.source_target_type, " +
-			"jedl.operation, jedl.source_srl_no, jedl.srl_no, " +
-			"max(jedl.job_exec_id) as job_exec_id FROM job_execution_data_lineage jedl " +
+			"jedl.operation, MAX(jedl.source_srl_no), MAX(jedl.srl_no), " +
+			"MAX(jedl.job_exec_id) as job_exec_id FROM job_execution_data_lineage jedl " +
 			"JOIN cfg_application ca on ca.app_id = jedl.app_id " +
 			"LEFT JOIN job_execution je on jedl.app_id = je.app_id " +
 			"and jedl.flow_exec_id = je.flow_exec_id and jedl.job_exec_id = je.job_exec_id " +
@ -79,9 +79,9 @@ public class LineageDAO extends AbstractMySQLOpenSourceDAO
 			"WHERE abstracted_object_name in ( :names ) and jedl.source_target_type != (:type) and " +
 			"jedl.flow_path not REGEXP '^(rent-metrics:|tracking-investigation:)' and " +
 			"FROM_UNIXTIME(job_finished_unixtime) >  CURRENT_DATE - INTERVAL (:days) DAY " +
-			"GROUP BY ca.app_id, cluster, jedl.job_name, jedl.flow_path, jedl.source_target_type, " +
+			"GROUP BY ca.app_id, cluster, jedl.job_name, fj.job_path, fj.job_type, jedl.flow_path, jedl.source_target_type, " +
 			"jedl.storage_type, jedl.operation " +
-			"ORDER BY jedl.source_target_type DESC, jedl.job_finished_unixtime";
+			"ORDER BY jedl.source_target_type DESC, job_exec_id";

 	private final static String GET_DATA = "SELECT storage_type, operation, " +
 			"abstracted_object_name, source_target_type, job_start_unixtime, job_finished_unixtime, " +
--- a/web/app/dao/SchemaHistoryDAO.java
+++ b/web/app/dao/SchemaHistoryDAO.java
@ -31,23 +31,23 @@ public class SchemaHistoryDAO extends AbstractMySQLOpenSourceDAO{

 	private final static String GET_PAGED_SCHEMA_DATASET  = "SELECT SQL_CALC_FOUND_ROWS " +
 			"DISTINCT dataset_id, urn, " +
-			"MAX(DATE_FORMAT(modified_date,'%Y-%m-%d')) as modified_date FROM dict_dataset_schema_history " +
-			"WHERE dataset_id is not null GROUP BY 1 ORDER BY urn LIMIT ?, ?";
+			"DATE_FORMAT(MAX(modified_date), '%Y-%m-%d') as modified_date FROM dict_dataset_schema_history " +
+			"WHERE dataset_id is not null GROUP BY 1,2 ORDER BY urn LIMIT ?, ?";

 	private final static String GET_SPECIFIED_SCHEMA_DATASET  = "SELECT SQL_CALC_FOUND_ROWS " +
 			"DISTINCT dataset_id, urn, " +
-			"MAX(DATE_FORMAT(modified_date,'%Y-%m-%d')) as modified_date FROM dict_dataset_schema_history " +
-			"WHERE dataset_id = ? GROUP BY 1 ORDER BY urn LIMIT ?, ?";
+			"DATE_FORMAT(MAX(modified_date), '%Y-%m-%d') as modified_date FROM dict_dataset_schema_history " +
+			"WHERE dataset_id = ? GROUP BY 1,2 ORDER BY urn LIMIT ?, ?";

 	private final static String GET_PAGED_SCHEMA_DATASET_WITH_FILTER  = "SELECT SQL_CALC_FOUND_ROWS " +
-			"DISTINCT dataset_id, urn, DATE_FORMAT(modified_date,'%Y-%m-%d') as modified_date " +
+			"DISTINCT dataset_id, urn, DATE_FORMAT(MAX(modified_date), '%Y-%m-%d') as modified_date " +
 			"FROM dict_dataset_schema_history WHERE dataset_id is not null and urn LIKE ? " +
-            "GROUP BY 1 ORDER BY urn LIMIT ?, ?";
+			"GROUP BY 1,2 ORDER BY urn LIMIT ?, ?";

 	private final static String GET_SPECIFIED_SCHEMA_DATASET_WITH_FILTER  = "SELECT SQL_CALC_FOUND_ROWS " +
-			"DISTINCT dataset_id, urn, DATE_FORMAT(modified_date,'%Y-%m-%d') as modified_date " +
+			"DISTINCT dataset_id, urn, DATE_FORMAT(MAX(modified_date), '%Y-%m-%d') as modified_date " +
 			"FROM dict_dataset_schema_history WHERE dataset_id = ? and urn LIKE ? " +
-			"GROUP BY 1 ORDER BY urn LIMIT ?, ?";
+			"GROUP BY 1,2 ORDER BY urn LIMIT ?, ?";

 	private final static String GET_SCHEMA_HISTORY_BY_DATASET_ID = "SELECT DATE_FORMAT(modified_date,'%Y-%m-%d') " +
            "as modified_date, `schema` FROM dict_dataset_schema_history WHERE dataset_id = ? ORDER BY 1";