Fix multithread caused default database id bug

This commit is contained in:
SunZhaonan 2015-12-01 16:39:58 -08:00
parent b83c0a4322
commit 5946f355ff
3 changed files with 9 additions and 11 deletions

View File

@ -61,8 +61,8 @@ public class AzLineageExtractor {
// normalize and combine the path // normalize and combine the path
LineageCombiner lineageCombiner = new LineageCombiner(message.connection); LineageCombiner lineageCombiner = new LineageCombiner(message.connection);
lineageCombiner.addAll(oneAzkabanJobLineage); lineageCombiner.addAll(oneAzkabanJobLineage);
Integer defaultDatabaseId = Integer.valueOf(message.prop.getProperty(Constant.AZ_DEFAULT_HADOOP_DATABASE_ID_KEY));
List<LineageRecord> lineageFromLog = AzLogParser.getLineageFromLog(log, message.azkabanJobExecution); List<LineageRecord> lineageFromLog = AzLogParser.getLineageFromLog(log, message.azkabanJobExecution, defaultDatabaseId);
lineageCombiner.addAll(lineageFromLog); lineageCombiner.addAll(lineageFromLog);
return lineageCombiner.getCombinedLineage(); return lineageCombiner.getCombinedLineage();

View File

@ -90,7 +90,7 @@ public class AzLineageExtractorMaster {
Connection conn = DriverManager.getConnection(connUrl); Connection conn = DriverManager.getConnection(connUrl);
DatabaseWriter databaseWriter = new DatabaseWriter(connUrl, "stg_job_execution_data_lineage"); DatabaseWriter databaseWriter = new DatabaseWriter(connUrl, "stg_job_execution_data_lineage");
AzLogParser.initialize(conn, Integer.valueOf(prop.getProperty(Constant.AZ_DEFAULT_HADOOP_DATABASE_ID_KEY))); AzLogParser.initialize(conn);
PathAnalyzer.initialize(conn); PathAnalyzer.initialize(conn);
int timeout = 30; // default 30 minutes for one job int timeout = 30; // default 30 minutes for one job
if (prop.containsKey(Constant.LINEAGE_ACTOR_TIMEOUT_KEY)) if (prop.containsKey(Constant.LINEAGE_ACTOR_TIMEOUT_KEY))

View File

@ -34,7 +34,6 @@ public class AzLogParser {
static List<LogLineagePattern> logLineagePatterns; static List<LogLineagePattern> logLineagePatterns;
static List<String> logHadoopIdPatterns; static List<String> logHadoopIdPatterns;
static int defaultDatabaseId;
/** /**
* Parse the hadoop job id from the log. * Parse the hadoop job id from the log.
@ -65,18 +64,17 @@ public class AzLogParser {
/** /**
* initialize, download the regex info into cache * initialize, download the regex info into cache
*/ */
public synchronized static void initialize(Connection conn, int defaultDatabaseId) public synchronized static void initialize(Connection conn)
throws SQLException { throws SQLException {
if (logHadoopIdPatterns != null && logLineagePatterns != null) { if (logHadoopIdPatterns != null && logLineagePatterns != null) {
return; return;
} }
loadLineagePatterns(conn, defaultDatabaseId); loadLineagePatterns(conn);
loadHadoopIdPatterns(conn, defaultDatabaseId); loadHadoopIdPatterns(conn);
} }
private static void loadLineagePatterns(Connection conn, int defaultDatabaseId) private static void loadLineagePatterns(Connection conn)
throws SQLException { throws SQLException {
AzLogParser.defaultDatabaseId = defaultDatabaseId;
logLineagePatterns = new ArrayList<>(); logLineagePatterns = new ArrayList<>();
String cmd = "SELECT regex, database_type, database_name_index, dataset_index, operation_type, source_target_type, " String cmd = "SELECT regex, database_type, database_name_index, dataset_index, operation_type, source_target_type, "
+ "record_count_index, record_byte_index, insert_count_index, insert_byte_index, " + "record_count_index, record_byte_index, insert_count_index, insert_byte_index, "
@ -95,7 +93,7 @@ public class AzLogParser {
} }
} }
private static void loadHadoopIdPatterns(Connection conn, int defaultDatabaseId) private static void loadHadoopIdPatterns(Connection conn)
throws SQLException { throws SQLException {
logHadoopIdPatterns = new ArrayList<>(); logHadoopIdPatterns = new ArrayList<>();
String cmd = "SELECT regex FROM log_reference_job_id_pattern WHERE is_active = 1"; String cmd = "SELECT regex FROM log_reference_job_id_pattern WHERE is_active = 1";
@ -111,7 +109,7 @@ public class AzLogParser {
* @param azkabanJobExecRecord contain the job execution info to construct the result * @param azkabanJobExecRecord contain the job execution info to construct the result
* @return * @return
*/ */
public static List<LineageRecord> getLineageFromLog(String log, AzkabanJobExecRecord azkabanJobExecRecord) { public static List<LineageRecord> getLineageFromLog(String log, AzkabanJobExecRecord azkabanJobExecRecord, Integer defaultDatabaseId) {
List<LineageRecord> result = new ArrayList<>(); List<LineageRecord> result = new ArrayList<>();