Fix SQL query issues when upgrading to MySQL 5.7 (#345)

This commit is contained in:
Yi (Alan) Wang 2017-03-15 09:22:46 -07:00 committed by GitHub
parent 66a8eea21b
commit d9438dc5bc
7 changed files with 38 additions and 40 deletions

View File

@ -110,8 +110,8 @@ CREATE TABLE `dataset_privacy_compliance` (
`dataset_urn` VARCHAR(200) NOT NULL,
`compliance_purge_type` VARCHAR(30) DEFAULT NULL
COMMENT 'AUTO_PURGE,CUSTOM_PURGE,LIMITED_RETENTION,PURGE_NOT_APPLICABLE',
`compliance_purge_entities` VARCHAR(200) DEFAULT NULL,
`modified_time` INT(10) UNSIGNED DEFAULT NULL
`compliance_purge_entities` VARCHAR(2000) DEFAULT NULL,
`modified_time` INT UNSIGNED DEFAULT NULL
COMMENT 'the modified time in epoch',
PRIMARY KEY (`dataset_id`),
UNIQUE KEY `dataset_urn` (`dataset_urn`)
@ -130,7 +130,7 @@ CREATE TABLE `dataset_security` (
COMMENT 'JSON: specification of retention',
`geographic_affinity` VARCHAR(200) DEFAULT NULL
COMMENT 'JSON: must be stored in the geo region',
`modified_time` INT(10) UNSIGNED DEFAULT NULL
`modified_time` INT UNSIGNED DEFAULT NULL
COMMENT 'the modified time in epoch',
PRIMARY KEY (`dataset_id`),
UNIQUE KEY `dataset_urn` (`dataset_urn`)

View File

@ -123,7 +123,6 @@ class CodeSearchExtract:
db['committers'] = self.get_svn_committers(schema_in_repo)
committers_count +=1
self.logger.info("Committers for {} => {}".format(schema_in_repo,db['committers']))
else:
self.logger.info("Search request {}".format(prop_file))
@ -146,10 +145,9 @@ class CodeSearchExtract:
owner_count += 1
self.code_search_committer_writer.append(owner_record)
except Exception as e:
self.logger.error(e)
self.logger.error(str(e))
self.logger.error("Exception happens with code {}".format(code))
self.code_search_committer_writer.close()
self.logger.info('Finish Fetching committers, total {} committers entries'.format(committers_count))
self.logger.info('Finish Fetching SVN owners, total {} records'.format(owner_count))
@ -184,13 +182,13 @@ class CodeSearchExtract:
if apvr not in committers:
committers.append(apvr)
if len(committers) > 0:
self.logger.debug(" {}, ' => ', {}".format(svn_repo_path,committers))
break
return ','.join(committers)
if __name__ == "__main__":
args = sys.argv[1]
e = CodeSearchExtract()

View File

@ -32,7 +32,7 @@ class DatasetTreeBuilder:
conn_mysql = zxJDBC.connect(jdbc_url, username, password, jdbc_driver)
cur = conn_mysql.cursor()
try:
query = "select distinct id, concat(SUBSTRING_INDEX(urn, ':///', 1), '/', SUBSTRING_INDEX(urn, ':///', -1)) p from dict_dataset order by urn"
query = "select distinct id, concat(SUBSTRING_INDEX(urn, ':///', 1), '/', SUBSTRING_INDEX(urn, ':///', -1)) p from dict_dataset order by 2"
cur.execute(query)
datasets = cur.fetchall()
self.dataset_dict = dict()

View File

@ -335,18 +335,18 @@ public class DatasetsDAO extends AbstractMySQLOpenSourceDAO
"FROM cfg_object_name_map WHERE mapped_object_name = ?";
private final static String GET_DATASET_LISTVIEW_TOP_LEVEL_NODES = "SELECT DISTINCT " +
"SUBSTRING_INDEX(urn, ':///', 1) as name, 0 as id, " +
"concat(SUBSTRING_INDEX(urn, ':///', 1), ':///') as urn FROM dict_dataset order by 1";
"SUBSTRING_INDEX(urn, ':///', 1) as `name`, 0 as id, " +
"LEFT(urn, INSTR(urn, ':///') + 3) as urn FROM dict_dataset order by 1";
private final static String GET_DATASET_LISTVIEW_NODES_BY_URN = "SELECT distinct " +
"SUBSTRING_INDEX(SUBSTRING_INDEX(d.urn, ?, -1), '/', 1) as name, " +
private final static String GET_DATASET_LISTVIEW_NODES_BY_URN = "SELECT DISTINCT " +
"SUBSTRING_INDEX(SUBSTRING_INDEX(d.urn, ?, -1), '/', 1) as `name`, " +
"concat(?, SUBSTRING_INDEX(SUBSTRING_INDEX(d.urn, ?, -1), '/', 1)) as urn, " +
"s.id FROM dict_dataset d LEFT JOIN dict_dataset s " +
"ON s.urn = concat(?, SUBSTRING_INDEX(SUBSTRING_INDEX(d.urn, ?, -1), '/', 1)) " +
"WHERE d.urn LIKE ? ORDER BY d.urn";
"WHERE d.urn LIKE ? ORDER BY 2";
private final static String GET_DATASET_VERSIONS = "SELECT DISTINCT version " +
"FROM dict_dataset_instance WHERE dataset_id = ? and version != '0' ORDER BY version_sort_id DESC";
private final static String GET_DATASET_VERSIONS = "SELECT DISTINCT `version` " +
"FROM dict_dataset_instance WHERE dataset_id = ? and `version` != '0' ORDER BY 1 DESC";
private final static String GET_DATASET_NATIVE_NAME = "SELECT native_name " +
"FROM dict_dataset_instance WHERE dataset_id = ? ORDER BY version_sort_id DESC limit 1";
@ -1991,7 +1991,7 @@ public class DatasetsDAO extends AbstractMySQLOpenSourceDAO
node.datasetId = (Long) row.get(DatasetWithUserRowMapper.DATASET_ID_COLUMN);
node.nodeName = (String) row.get(DatasetWithUserRowMapper.DATASET_NAME_COLUMN);
String nodeUrn = (String) row.get(DatasetWithUserRowMapper.DATASET_URN_COLUMN);
if (node.datasetId !=null && node.datasetId > 0)
if (node.datasetId != null && node.datasetId > 0)
{
node.nodeUrl = "#/datasets/" + node.datasetId;
}

View File

@ -79,8 +79,8 @@ public class FlowsDAO extends AbstractMySQLOpenSourceDAO
private final static String GET_PAGED_JOBS_BY_APP_ID_AND_FLOW_ID = "select SQL_CALC_FOUND_ROWS " +
"j.job_id, MAX(j.last_source_version), j.job_name, j.job_path, j.job_type, j.ref_flow_id, " +
"FROM_UNIXTIME(j.created_time) as created_time, " +
"FROM_UNIXTIME(j.modified_time) as modified_time, f.flow_name, l.flow_group " +
"FROM_UNIXTIME(MIN(j.created_time)) as created_time, " +
"FROM_UNIXTIME(MAX(j.modified_time)) as modified_time, f.flow_name, l.flow_group " +
"FROM flow_job j JOIN flow f on j.app_id = f.app_id and j.flow_id = f.flow_id " +
"LEFT JOIN flow l on j.app_id = l.app_id and j.ref_flow_id = l.flow_id " +
"WHERE j.app_id = ? and j.flow_id = ? GROUP BY j.job_id, j.job_name, " +
@ -88,11 +88,11 @@ public class FlowsDAO extends AbstractMySQLOpenSourceDAO
"f.flow_name ORDER BY j.job_id LIMIT ?, ?";
private final static String GET_FLOW_TREE_APPLICATON_NODES = "SELECT DISTINCT ca.app_code " +
"From flow f JOIN cfg_application ca ON f.app_id = ca.app_id ORDER by app_code";
"From flow f JOIN cfg_application ca ON f.app_id = ca.app_id ORDER by 1";
private final static String GET_FLOW_TREE_PROJECT_NODES = "SELECT DISTINCT IFNULL(f.flow_group, 'ROOT') " +
"FROM flow f JOIN cfg_application ca ON f.app_id = ca.app_id " +
"WHERE (is_active is null or is_active = 'Y') and ca.app_code = ? ORDER BY flow_group";
"WHERE (is_active is null or is_active = 'Y') and ca.app_code = ? ORDER BY 1";
private final static String GET_FLOW_TREE_FLOW_NODES = "SELECT DISTINCT f.flow_id, f.flow_name FROM flow f " +
"JOIN cfg_application ca ON f.app_id = ca.app_id " +

View File

@ -40,8 +40,8 @@ public class LineageDAO extends AbstractMySQLOpenSourceDAO
private final static String GET_JOB = "SELECT ca.app_id, ca.app_code as cluster, " +
"jedl.job_name, fj.job_path, fj.job_type, jedl.flow_path, jedl.storage_type, jedl.source_target_type, " +
"jedl.operation, jedl.source_srl_no, jedl.srl_no, " +
"max(jedl.job_exec_id) as job_exec_id FROM job_execution_data_lineage jedl " +
"jedl.operation, MAX(jedl.source_srl_no), MAX(jedl.srl_no), " +
"MAX(jedl.job_exec_id) as job_exec_id FROM job_execution_data_lineage jedl " +
"JOIN cfg_application ca on ca.app_id = jedl.app_id " +
"LEFT JOIN job_execution je on jedl.app_id = je.app_id " +
"and jedl.flow_exec_id = je.flow_exec_id and jedl.job_exec_id = je.job_exec_id " +
@ -49,14 +49,14 @@ public class LineageDAO extends AbstractMySQLOpenSourceDAO
"WHERE abstracted_object_name in ( :names ) and " +
"jedl.flow_path not REGEXP '^(rent-metrics:|tracking-investigation:)' and " +
"FROM_UNIXTIME(job_finished_unixtime) > CURRENT_DATE - INTERVAL (:days) DAY " +
"GROUP BY ca.app_id, cluster, jedl.job_name, jedl.flow_path, jedl.source_target_type, " +
"GROUP BY ca.app_id, cluster, jedl.job_name, fj.job_path, fj.job_type, jedl.flow_path, jedl.source_target_type, " +
"jedl.storage_type, jedl.operation " +
"ORDER BY jedl.source_target_type DESC, jedl.job_finished_unixtime";
"ORDER BY jedl.source_target_type DESC, job_exec_id";
private final static String GET_UP_LEVEL_JOB = "SELECT ca.app_id, ca.app_code as cluster, " +
"jedl.job_name, fj.job_path, fj.job_type, jedl.flow_path, jedl.storage_type, jedl.source_target_type, " +
"jedl.operation, jedl.source_srl_no, jedl.srl_no, " +
"max(jedl.job_exec_id) as job_exec_id FROM job_execution_data_lineage jedl " +
"jedl.operation, MAX(jedl.source_srl_no), MAX(jedl.srl_no), " +
"MAX(jedl.job_exec_id) as job_exec_id FROM job_execution_data_lineage jedl " +
"JOIN cfg_application ca on ca.app_id = jedl.app_id " +
"LEFT JOIN job_execution je on jedl.app_id = je.app_id " +
"and jedl.flow_exec_id = je.flow_exec_id and jedl.job_exec_id = je.job_exec_id " +
@ -64,14 +64,14 @@ public class LineageDAO extends AbstractMySQLOpenSourceDAO
"WHERE abstracted_object_name in ( :names ) and jedl.source_target_type = 'target' and " +
"jedl.flow_path not REGEXP '^(rent-metrics:|tracking-investigation:)' and " +
"FROM_UNIXTIME(job_finished_unixtime) > CURRENT_DATE - INTERVAL (:days) DAY " +
"GROUP BY ca.app_id, cluster, jedl.job_name, jedl.flow_path, jedl.source_target_type, " +
"GROUP BY ca.app_id, cluster, jedl.job_name, fj.job_path, fj.job_type, jedl.flow_path, jedl.source_target_type, " +
"jedl.storage_type, jedl.operation " +
"ORDER BY jedl.source_target_type DESC, jedl.job_finished_unixtime";
"ORDER BY jedl.source_target_type DESC, job_exec_id";
private final static String GET_JOB_WITH_SOURCE = "SELECT ca.app_id, ca.app_code as cluster, " +
"jedl.job_name, fj.job_path, fj.job_type, jedl.flow_path, jedl.storage_type, jedl.source_target_type, " +
"jedl.operation, jedl.source_srl_no, jedl.srl_no, " +
"max(jedl.job_exec_id) as job_exec_id FROM job_execution_data_lineage jedl " +
"jedl.operation, MAX(jedl.source_srl_no), MAX(jedl.srl_no), " +
"MAX(jedl.job_exec_id) as job_exec_id FROM job_execution_data_lineage jedl " +
"JOIN cfg_application ca on ca.app_id = jedl.app_id " +
"LEFT JOIN job_execution je on jedl.app_id = je.app_id " +
"and jedl.flow_exec_id = je.flow_exec_id and jedl.job_exec_id = je.job_exec_id " +
@ -79,9 +79,9 @@ public class LineageDAO extends AbstractMySQLOpenSourceDAO
"WHERE abstracted_object_name in ( :names ) and jedl.source_target_type != (:type) and " +
"jedl.flow_path not REGEXP '^(rent-metrics:|tracking-investigation:)' and " +
"FROM_UNIXTIME(job_finished_unixtime) > CURRENT_DATE - INTERVAL (:days) DAY " +
"GROUP BY ca.app_id, cluster, jedl.job_name, jedl.flow_path, jedl.source_target_type, " +
"GROUP BY ca.app_id, cluster, jedl.job_name, fj.job_path, fj.job_type, jedl.flow_path, jedl.source_target_type, " +
"jedl.storage_type, jedl.operation " +
"ORDER BY jedl.source_target_type DESC, jedl.job_finished_unixtime";
"ORDER BY jedl.source_target_type DESC, job_exec_id";
private final static String GET_DATA = "SELECT storage_type, operation, " +
"abstracted_object_name, source_target_type, job_start_unixtime, job_finished_unixtime, " +

View File

@ -31,23 +31,23 @@ public class SchemaHistoryDAO extends AbstractMySQLOpenSourceDAO{
private final static String GET_PAGED_SCHEMA_DATASET = "SELECT SQL_CALC_FOUND_ROWS " +
"DISTINCT dataset_id, urn, " +
"MAX(DATE_FORMAT(modified_date,'%Y-%m-%d')) as modified_date FROM dict_dataset_schema_history " +
"WHERE dataset_id is not null GROUP BY 1 ORDER BY urn LIMIT ?, ?";
"DATE_FORMAT(MAX(modified_date), '%Y-%m-%d') as modified_date FROM dict_dataset_schema_history " +
"WHERE dataset_id is not null GROUP BY 1,2 ORDER BY urn LIMIT ?, ?";
private final static String GET_SPECIFIED_SCHEMA_DATASET = "SELECT SQL_CALC_FOUND_ROWS " +
"DISTINCT dataset_id, urn, " +
"MAX(DATE_FORMAT(modified_date,'%Y-%m-%d')) as modified_date FROM dict_dataset_schema_history " +
"WHERE dataset_id = ? GROUP BY 1 ORDER BY urn LIMIT ?, ?";
"DATE_FORMAT(MAX(modified_date), '%Y-%m-%d') as modified_date FROM dict_dataset_schema_history " +
"WHERE dataset_id = ? GROUP BY 1,2 ORDER BY urn LIMIT ?, ?";
private final static String GET_PAGED_SCHEMA_DATASET_WITH_FILTER = "SELECT SQL_CALC_FOUND_ROWS " +
"DISTINCT dataset_id, urn, DATE_FORMAT(modified_date,'%Y-%m-%d') as modified_date " +
"DISTINCT dataset_id, urn, DATE_FORMAT(MAX(modified_date), '%Y-%m-%d') as modified_date " +
"FROM dict_dataset_schema_history WHERE dataset_id is not null and urn LIKE ? " +
"GROUP BY 1 ORDER BY urn LIMIT ?, ?";
"GROUP BY 1,2 ORDER BY urn LIMIT ?, ?";
private final static String GET_SPECIFIED_SCHEMA_DATASET_WITH_FILTER = "SELECT SQL_CALC_FOUND_ROWS " +
"DISTINCT dataset_id, urn, DATE_FORMAT(modified_date,'%Y-%m-%d') as modified_date " +
"DISTINCT dataset_id, urn, DATE_FORMAT(MAX(modified_date), '%Y-%m-%d') as modified_date " +
"FROM dict_dataset_schema_history WHERE dataset_id = ? and urn LIKE ? " +
"GROUP BY 1 ORDER BY urn LIMIT ?, ?";
"GROUP BY 1,2 ORDER BY urn LIMIT ?, ?";
private final static String GET_SCHEMA_HISTORY_BY_DATASET_ID = "SELECT DATE_FORMAT(modified_date,'%Y-%m-%d') " +
"as modified_date, `schema` FROM dict_dataset_schema_history WHERE dataset_id = ? ORDER BY 1";