Merge pull request #808 from camelliazhang/master

fix a  bug in ETL
This commit is contained in:
na zhang 2017-10-19 14:07:28 -07:00 committed by GitHub
commit ba7082a787

View File

@ -252,7 +252,6 @@ class ElasticSearchIndex():
url = self.elasticsearch_index_url + ':' + str(
self.elasticsearch_port) + '/' + self.elasticsearch_index + '/dataset/_bulk'
params = []
while result:
row = dict(zip(description, result))
@ -276,12 +275,13 @@ class ElasticSearchIndex():
params.append('{ "index": { "_id": ' + str(row['id']) + ' }}')
params.append(json.dumps(dataset_detail))
if row_count % self.bulk_chunk_size == 0:
if row_count % self.bulk_chunk_size == 0:
self.bulk_insert(params, url)
self.logger.info('dataset ' + str(row_count))
self.wh_con.commit()
params = []
params = []
row_count += 1
result = self.wh_cursor.fetchone()
self.logger.info('total dataset row count is: ' + str(row_count))
@ -344,8 +344,8 @@ class ElasticSearchIndex():
self.bulk_insert(params, url)
self.logger.info('metric ' + str(row_count))
self.wh_con.commit()
params = []
params = []
row_count += 1
result = self.wh_cursor.fetchone()
if len(params) > 0:
@ -488,4 +488,4 @@ class ElasticSearchIndex():
if __name__ == "__main__":
args = sys.argv[1]
esi = ElasticSearchIndex(args)
esi.run()
esi.run()