Modify KafkaConsumerMaster to handle more than one kafka config, add error handling

This commit is contained in:
Yi Wang 2016-08-04 13:07:19 -07:00
parent ef584552be
commit c0cfe1f5ca
2 changed files with 161 additions and 98 deletions

View File

@ -17,7 +17,6 @@ import akka.actor.ActorRef;
import akka.actor.Props; import akka.actor.Props;
import akka.actor.UntypedActor; import akka.actor.UntypedActor;
import java.io.IOException; import java.io.IOException;
import java.lang.reflect.Method;
import java.sql.SQLException; import java.sql.SQLException;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
@ -31,12 +30,10 @@ import kafka.javaapi.consumer.ConsumerConnector;
import metadata.etl.models.EtlJobName; import metadata.etl.models.EtlJobName;
import models.daos.ClusterDao; import models.daos.ClusterDao;
import models.daos.EtlJobDao; import models.daos.EtlJobDao;
import org.apache.avro.generic.GenericData;
import msgs.KafkaResponseMsg; import msgs.KafkaResponseMsg;
import play.Logger; import play.Logger;
import play.Play; import play.Play;
import utils.JdbcUtil;
import utils.KafkaConfig; import utils.KafkaConfig;
import utils.KafkaConfig.Topic; import utils.KafkaConfig.Topic;
import wherehows.common.kafka.schemaregistry.client.CachedSchemaRegistryClient; import wherehows.common.kafka.schemaregistry.client.CachedSchemaRegistryClient;
@ -47,111 +44,104 @@ import wherehows.common.writers.DatabaseWriter;
/** /**
* Akka actor responsible to manage Kafka workers and use kafka topic processor * Akka actor responsible for managing Kafka workers
* to deal with response messages
*
*/ */
public class KafkaConsumerMaster extends UntypedActor { public class KafkaConsumerMaster extends UntypedActor {
// List of kafka job IDs
private static List<Integer> _kafkaJobList; private static List<Integer> _kafkaJobList;
private static ConsumerConnector _consumer; // map of kafka job id to configs
private static Properties _kafkaConfig; private static Map<Integer, KafkaConfig> _kafkaConfigs = new HashMap<>();
private static Map<String, Topic> _kafkaTopics; // map of topic name to DB writer
private static Map<String, DatabaseWriter> _topicDbWriters = new HashMap<>();
private final Map<String, Object> _topicProcessorClass;
private final Map<String, Method> _topicProcessorMethod;
private final Map<String, DatabaseWriter> _topicDbWriter;
public KafkaConsumerMaster() {
this._topicProcessorClass = new HashMap<>();
this._topicProcessorMethod = new HashMap<>();
this._topicDbWriter = new HashMap<>();
}
@Override @Override
public void preStart() throws Exception { public void preStart()
_kafkaJobList = Play.application().configuration().getIntList("kafka.consumer.etl.jobid", null);; throws Exception {
_kafkaJobList = Play.application().configuration().getIntList("kafka.consumer.etl.jobid", null);
if (_kafkaJobList == null || _kafkaJobList.size() == 0) { if (_kafkaJobList == null || _kafkaJobList.size() == 0) {
context().stop(getSelf());
Logger.error("Kafka job id error, kafkaJobList: " + _kafkaJobList); Logger.error("Kafka job id error, kafkaJobList: " + _kafkaJobList);
getContext().stop(getSelf()); return;
} }
Logger.info("Start the KafkaConsumerMaster... Kafka etl job id list: " + _kafkaJobList); Logger.info("Start KafkaConsumerMaster... Kafka job id list: " + _kafkaJobList);
// handle 1 kafka connection for (final int kafkaJobId : _kafkaJobList) {
Map<String, Object> kafkaEtlJob = EtlJobDao.getEtlJobById(_kafkaJobList.get(0)); try {
final int kafkaJobRefId = Integer.parseInt(kafkaEtlJob.get("ref_id").toString()); // handle 1 kafka connection
final String kafkaJobName = kafkaEtlJob.get("wh_etl_job_name").toString(); Map<String, Object> kafkaEtlJob = EtlJobDao.getEtlJobById(kafkaJobId);
final int kafkaJobRefId = Integer.parseInt(kafkaEtlJob.get("ref_id").toString());
final String kafkaJobName = kafkaEtlJob.get("wh_etl_job_name").toString();
if (!kafkaJobName.equals(EtlJobName.KAFKA_CONSUMER_ETL.name())) { // get Kafka configurations from database
Logger.error("Kafka job info error: job name '" + kafkaJobName + "' not equal " final KafkaConfig kafkaConfig = new KafkaConfig();
+ EtlJobName.KAFKA_CONSUMER_ETL.name()); kafkaConfig.updateKafkaProperties(EtlJobName.valueOf(kafkaJobName), kafkaJobRefId);
getContext().stop(getSelf()); final Properties kafkaProps = kafkaConfig.getProperties();
} final Map<String, Topic> kafkaTopics = kafkaConfig.getTopics();
// get Kafka configurations from database kafkaConfig.updateTopicProcessor();
KafkaConfig.updateKafkaProperties(kafkaJobRefId); _topicDbWriters.putAll(kafkaConfig.getTopicDbWriters());
_kafkaConfig = KafkaConfig.getProperties();
_kafkaTopics = KafkaConfig.getTopics();
// get list of cluster information from database and update ClusterUtil // create Kafka consumer connector
ClusterUtil.updateClusterInfo(ClusterDao.getClusterInfo()); Logger.info("Create Kafka Consumer with config: " + kafkaProps.toString());
final SchemaRegistryClient schemaRegistryClient =
new CachedSchemaRegistryClient((String) kafkaProps.get("schemaRegistryUrl"));
final ConsumerConfig cfg = new ConsumerConfig(kafkaProps);
ConsumerConnector consumerConnector = Consumer.createJavaConsumerConnector(cfg);
for (String topic : _kafkaTopics.keySet()) { // create Kafka message streams
// get the processor class and method final Map<String, Integer> topicCountMap = new HashMap<>();
final Class processorClass = Class.forName(_kafkaTopics.get(topic).processor); for (Topic topic : kafkaTopics.values()) {
_topicProcessorClass.put(topic, processorClass.newInstance()); topicCountMap.put(topic.topic, topic.numOfWorkers);
}
final Method method = processorClass.getDeclaredMethod("process", GenericData.Record.class, String.class); final Map<String, List<KafkaStream<byte[], byte[]>>> consumerMap =
_topicProcessorMethod.put(topic, method); consumerConnector.createMessageStreams(topicCountMap);
// get the database writer // add config to kafka config map
final DatabaseWriter dw = new DatabaseWriter(JdbcUtil.wherehowsJdbcTemplate, _kafkaTopics.get(topic).dbTable); kafkaConfig.setSchemaRegistryClient(schemaRegistryClient);
_topicDbWriter.put(topic, dw); kafkaConfig.setConsumer(consumerConnector);
} _kafkaConfigs.put(kafkaJobId, kafkaConfig);
// create Kafka consumer connector // create workers to handle each message stream
final SchemaRegistryClient schemaRegistryClient = for (String topic : kafkaTopics.keySet()) {
new CachedSchemaRegistryClient((String) _kafkaConfig.get("schemaRegistryUrl")); final List<KafkaStream<byte[], byte[]>> streams = consumerMap.get(topic);
Logger.info("Create Kafka Consumer Config: " + _kafkaConfig.toString());
final ConsumerConfig cfg = new ConsumerConfig(_kafkaConfig);
_consumer = Consumer.createJavaConsumerConnector(cfg);
// create Kafka message streams int threadNumber = 0;
final Map<String, Integer> topicCountMap = new HashMap<>(); for (final KafkaStream<byte[], byte[]> stream : streams) {
for (Topic topic : _kafkaTopics.values()) { ActorRef childActor = getContext().actorOf(
topicCountMap.put(topic.topic, topic.numOfWorkers); Props.create(KafkaConsumerWorker.class, topic, threadNumber, stream, schemaRegistryClient,
} kafkaConfig.getProcessorClass(topic), kafkaConfig.getProcessorMethod(topic)));
final Map<String, List<KafkaStream<byte[], byte[]>>> consumerMap = childActor.tell("Start", getSelf());
_consumer.createMessageStreams(topicCountMap); threadNumber++;
}
for (String topic : _kafkaTopics.keySet()) { }
final List<KafkaStream<byte[], byte[]>> streams = consumerMap.get(topic); Logger.info("Initiate Kafka consumer job " + kafkaJobId + " with topics " + kafkaTopics.keySet());
} catch (Exception e) {
int threadNumber = 0; Logger.error("Initiating Kafka properties on startup fail, job id: " + kafkaJobId, e);
for (final KafkaStream<byte[], byte[]> stream : streams) {
ActorRef childActor =
getContext().actorOf(
Props.create(KafkaConsumerWorker.class,
topic, threadNumber, stream, schemaRegistryClient,
_topicProcessorClass.get(topic), _topicProcessorMethod.get(topic)));
childActor.tell("Start", getSelf());
threadNumber++;
} }
} }
try {
// get list of cluster information from database and update ClusterUtil
ClusterUtil.updateClusterInfo(ClusterDao.getClusterInfo());
} catch (Exception e) {
Logger.error("Fail to fetch cluster info from DB ", e);
}
} }
@Override @Override
public void onReceive(Object message) throws Exception { public void onReceive(Object message)
throws Exception {
if (message instanceof KafkaResponseMsg) { if (message instanceof KafkaResponseMsg) {
final KafkaResponseMsg kafkaMsg = (KafkaResponseMsg) message; final KafkaResponseMsg kafkaMsg = (KafkaResponseMsg) message;
final String topic = kafkaMsg.getTopic(); final String topic = kafkaMsg.getTopic();
final AbstractRecord record = kafkaMsg.getRecord(); final AbstractRecord record = kafkaMsg.getRecord();
if (record != null && _kafkaTopics.containsKey(topic)) { if (record != null && _topicDbWriters.containsKey(topic)) {
Logger.debug("Writing to DB kafka event record: " + topic); Logger.debug("Writing to DB kafka event record: " + topic);
final DatabaseWriter dbWriter = _topicDbWriter.get(topic); final DatabaseWriter dbWriter = _topicDbWriters.get(topic);
try { try {
dbWriter.append(record); dbWriter.append(record);
@ -171,10 +161,8 @@ public class KafkaConsumerMaster extends UntypedActor {
@Override @Override
public void postStop() { public void postStop() {
Logger.info("Terminating KafkaConsumerMaster..."); Logger.info("Terminating KafkaConsumerMaster...");
if (_consumer != null) { for (KafkaConfig config : _kafkaConfigs.values()) {
_consumer.shutdown(); config.close();
_kafkaConfig.clear();
_kafkaTopics.clear();
} }
} }
} }

View File

@ -13,12 +13,17 @@
*/ */
package utils; package utils;
import java.lang.reflect.Method;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
import java.util.Properties; import java.util.Properties;
import kafka.javaapi.consumer.ConsumerConnector;
import metadata.etl.models.EtlJobName; import metadata.etl.models.EtlJobName;
import models.daos.EtlJobPropertyDao; import models.daos.EtlJobPropertyDao;
import org.apache.avro.generic.GenericData;
import play.Logger; import play.Logger;
import wherehows.common.kafka.schemaregistry.client.SchemaRegistryClient;
import wherehows.common.writers.DatabaseWriter;
/** /**
@ -26,12 +31,6 @@ import play.Logger;
*/ */
public class KafkaConfig { public class KafkaConfig {
private static Properties _props = new Properties();
// Map of <topic_name, topic_content>
private static Map<String, Topic> _topics = new HashMap<>();
public static final EtlJobName KAFKA_JOBNAME = EtlJobName.KAFKA_CONSUMER_ETL;
/** /**
* Class for storing Kafka Topic info * Class for storing Kafka Topic info
*/ */
@ -49,19 +48,30 @@ public class KafkaConfig {
} }
} }
private final Properties _props = new Properties();
// Map of <topic_name, topic_content>
private final Map<String, Topic> _topics = new HashMap<>();
private final Map<String, Object> _topicProcessorClass = new HashMap<>();
private final Map<String, Method> _topicProcessorMethod = new HashMap<>();
private final Map<String, DatabaseWriter> _topicDbWriter = new HashMap<>();
private ConsumerConnector _consumer;
private SchemaRegistryClient _schemaRegistryClient;
/** /**
* Update Kafka properties and topics from etl_job_properies table * Update Kafka properties and topics from wh_etl_job_properties table
* @throws Exception * @throws Exception
*/ */
public static void updateKafkaProperties(int kafkaJobRefId) throws Exception { public void updateKafkaProperties(EtlJobName jobName, int jobRefId)
Properties props = EtlJobPropertyDao.getJobProperties(KAFKA_JOBNAME, kafkaJobRefId); throws Exception {
if (props == null || props.size() < 5) { Properties props;
Logger.error("Fail to update Kafka job properties for " + KAFKA_JOBNAME.name() try {
+ ", job ref id: " + kafkaJobRefId); props = EtlJobPropertyDao.getJobProperties(jobName, jobRefId);
} catch (Exception e) {
Logger.error("Fail to update Kafka job properties for " + jobName.name() + ", ref id: " + jobRefId);
return; return;
} else {
Logger.info("Get Kafka job properties for " + KAFKA_JOBNAME.name() + ", job ref id: " + kafkaJobRefId);
} }
Logger.info("Get Kafka job properties for " + jobName.name() + ", job ref id: " + jobRefId);
String[] topics = ((String) props.remove("kafka.topics")).split("\\s*,\\s*"); String[] topics = ((String) props.remove("kafka.topics")).split("\\s*,\\s*");
String[] processors = ((String) props.remove("kafka.processors")).split("\\s*,\\s*"); String[] processors = ((String) props.remove("kafka.processors")).split("\\s*,\\s*");
@ -77,11 +87,61 @@ public class KafkaConfig {
} }
} }
/**
* update processor class, method and db writer for each topic
*/
public void updateTopicProcessor() {
for (String topic : _topics.keySet()) {
try {
// get the processor class and method
final Class processorClass = Class.forName(_topics.get(topic).processor);
_topicProcessorClass.put(topic, processorClass.newInstance());
final Method method = processorClass.getDeclaredMethod("process", GenericData.Record.class, String.class);
_topicProcessorMethod.put(topic, method);
// get the database writer
final DatabaseWriter dw = new DatabaseWriter(JdbcUtil.wherehowsJdbcTemplate, _topics.get(topic).dbTable);
_topicDbWriter.put(topic, dw);
} catch (Exception e) {
Logger.error("Fail to create Processor for topic: " + topic, e);
_topicProcessorClass.remove(topic);
_topicProcessorMethod.remove(topic);
_topicDbWriter.remove(topic);
}
}
}
public Object getProcessorClass(String topic) {
return _topicProcessorClass.get(topic);
}
public Method getProcessorMethod(String topic) {
return _topicProcessorMethod.get(topic);
}
public DatabaseWriter getDbWriter(String topic) {
return _topicDbWriter.get(topic);
}
public Map<String, DatabaseWriter> getTopicDbWriters() {
return _topicDbWriter;
}
/**
* close the Config
*/
public void close() {
if (_consumer != null) {
_consumer.shutdown();
}
}
/** /**
* get Kafka configuration * get Kafka configuration
* @return * @return
*/ */
public static Properties getProperties() { public Properties getProperties() {
return _props; return _props;
} }
@ -89,8 +149,23 @@ public class KafkaConfig {
* get Kafka topics * get Kafka topics
* @return * @return
*/ */
public static Map<String, Topic> getTopics() { public Map<String, Topic> getTopics() {
return _topics; return _topics;
} }
public ConsumerConnector getConsumer() {
return _consumer;
}
public void setConsumer(ConsumerConnector consumer) {
_consumer = consumer;
}
public SchemaRegistryClient getSchemaRegistryClient() {
return _schemaRegistryClient;
}
public void setSchemaRegistryClient(SchemaRegistryClient schemaRegistryClient) {
_schemaRegistryClient = schemaRegistryClient;
}
} }