diff --git a/wherehows-etl/src/main/resources/jython/ElasticSearchIndex.py b/wherehows-etl/src/main/resources/jython/ElasticSearchIndex.py index f9939dea3e..a98dad2a01 100644 --- a/wherehows-etl/src/main/resources/jython/ElasticSearchIndex.py +++ b/wherehows-etl/src/main/resources/jython/ElasticSearchIndex.py @@ -22,7 +22,7 @@ import time class ElasticSearchIndex(): def __init__(self, args): self.logger = LoggerFactory.getLogger('jython script : ' + self.__class__.__name__) - self.elasticsearch_index_url = args[Constant.ELASTICSEARCH_URL_KEY] + self.elasticsearch_server_url = args[Constant.ELASTICSEARCH_URL_KEY] self.elasticsearch_port = args[Constant.ELASTICSEARCH_PORT_KEY] if Constant.ELASTICSEARCH_INDEX_KEY not in args: @@ -37,7 +37,7 @@ class ElasticSearchIndex(): self.max_retry_times = int(args[Constant.WH_DB_MAX_RETRY_TIMES]) # max times for db re-connection when lost during fetching source data - self.base_url = self.elasticsearch_index_url + ':' + str(self.elasticsearch_port) + '/' + self.base_url = self.elasticsearch_server_url + ':' + str(self.elasticsearch_port) + '/' self.logger.info(self.base_url) self.old_index = [] @@ -83,7 +83,7 @@ class ElasticSearchIndex(): SELECT d.field_id, d.dataset_id, f.comment FROM dict_dataset_field_comment d LEFT JOIN field_comments f ON d.comment_id = f.id WHERE d.field_id = %d """ - url = self.elasticsearch_index_url + ':' + str( + url = self.elasticsearch_server_url + ':' + str( self.elasticsearch_port) + '/' + self.new_index + '/field/_bulk' params = [] attempts = 0 @@ -149,7 +149,7 @@ class ElasticSearchIndex(): SELECT * FROM comments """ - url = self.elasticsearch_index_url + ':' + str( + url = self.elasticsearch_server_url + ':' + str( self.elasticsearch_port) + '/' + self.new_index + '/comment/_bulk' params = [] self.wh_cursor.execute(sql) @@ -257,7 +257,7 @@ class ElasticSearchIndex(): row_count = 1 result = self.wh_cursor.fetchone() - url = self.elasticsearch_index_url + ':' + str( + url = self.elasticsearch_server_url + ':' + str( self.elasticsearch_port) + '/' + self.new_index + '/dataset/_bulk' params = [] while result: @@ -303,7 +303,7 @@ class ElasticSearchIndex(): SELECT * FROM dict_business_metric """ - url = self.elasticsearch_index_url + ':' + str( + url = self.elasticsearch_server_url + ':' + str( self.elasticsearch_port) + '/' + self.new_index + '/metric/_bulk' params = [] self.wh_cursor.execute(sql) @@ -375,7 +375,7 @@ class ElasticSearchIndex(): SELECT * FROM flow_job WHERE app_id = %d and flow_id = %d """ - url = self.elasticsearch_index_url + ':' + str( + url = self.elasticsearch_server_url + ':' + str( self.elasticsearch_port) + '/' + self.new_index + '/flow_jobs/_bulk' params = [] @@ -488,6 +488,8 @@ class ElasticSearchIndex(): except Exception as e: self.logger.error(str(e)) + sys.exit("Error in re-indexing, the old index stays active") + finally: self.wh_cursor.close() self.wh_con.close() @@ -512,19 +514,25 @@ class ElasticSearchIndex(): self.logger.error(str(e)) def create_index(self): - now = int(time.time()) - url = self.base_url + str(now) + try: + now = int(time.time()) + url = self.base_url + str(now) - json_filepath = self.index_mapping_file - with open(json_filepath, 'r') as f: - req_body = json.load(f) + json_filepath = self.index_mapping_file + with open(json_filepath, 'r') as f: + req_body = json.load(f) - data = self.es_http_request("PUT", url, json.dumps(req_body)) - if str(data['acknowledged']) != 'True': - self.logger.error(str(data)) + data = self.es_http_request("PUT", url, json.dumps(req_body)) + if str(data['acknowledged']) != 'True': + self.logger.error(str(data)) + sys.exit("Error in create_index") - self.new_index = str(now) - self.logger.info('Successfully created index : {}'.format(self.new_index)) + self.new_index = str(now) + self.logger.info('Successfully created index : {}'.format(self.new_index)) + + except Exception as e: + self.logger.error(str(e)) + sys.exit("Error in create_index") def alias_switch(self): url = self.base_url + '_aliases' @@ -571,7 +579,6 @@ class ElasticSearchIndex(): self.logger.error(str(data)) self.logger.info('Successfully removed index: {}'.format(self.old_index)) - def run(self): try: # 1, create a new index with current timestamp, using json body from model