Fix #1220: catch and log any errors during the es index (#1220)

This commit is contained in:
Sriharsha Chintalapani 2022-02-28 22:57:50 -08:00 committed by GitHub
parent 06d9329ae3
commit 15d0440599
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -13,6 +13,7 @@ import json
import logging import logging
import ssl import ssl
import time import time
import traceback
from datetime import datetime from datetime import datetime
from typing import List, Optional from typing import List, Optional
@ -205,71 +206,75 @@ class ElasticsearchSink(Sink[Entity]):
) )
def write_record(self, record: Entity) -> None: def write_record(self, record: Entity) -> None:
if isinstance(record, Table): try:
table_doc = self._create_table_es_doc(record) if isinstance(record, Table):
self.elasticsearch_client.index( table_doc = self._create_table_es_doc(record)
index=self.config.table_index_name, self.elasticsearch_client.index(
id=str(table_doc.table_id), index=self.config.table_index_name,
body=table_doc.json(), id=str(table_doc.table_id),
request_timeout=self.config.timeout, body=table_doc.json(),
) request_timeout=self.config.timeout,
if isinstance(record, Topic): )
topic_doc = self._create_topic_es_doc(record) if isinstance(record, Topic):
self.elasticsearch_client.index( topic_doc = self._create_topic_es_doc(record)
index=self.config.topic_index_name, self.elasticsearch_client.index(
id=str(topic_doc.topic_id), index=self.config.topic_index_name,
body=topic_doc.json(), id=str(topic_doc.topic_id),
request_timeout=self.config.timeout, body=topic_doc.json(),
) request_timeout=self.config.timeout,
if isinstance(record, Dashboard): )
dashboard_doc = self._create_dashboard_es_doc(record) if isinstance(record, Dashboard):
self.elasticsearch_client.index( dashboard_doc = self._create_dashboard_es_doc(record)
index=self.config.dashboard_index_name, self.elasticsearch_client.index(
id=str(dashboard_doc.dashboard_id), index=self.config.dashboard_index_name,
body=dashboard_doc.json(), id=str(dashboard_doc.dashboard_id),
request_timeout=self.config.timeout, body=dashboard_doc.json(),
) request_timeout=self.config.timeout,
if isinstance(record, Pipeline): )
pipeline_doc = self._create_pipeline_es_doc(record) if isinstance(record, Pipeline):
self.elasticsearch_client.index( pipeline_doc = self._create_pipeline_es_doc(record)
index=self.config.pipeline_index_name, self.elasticsearch_client.index(
id=str(pipeline_doc.pipeline_id), index=self.config.pipeline_index_name,
body=pipeline_doc.json(), id=str(pipeline_doc.pipeline_id),
request_timeout=self.config.timeout, body=pipeline_doc.json(),
) request_timeout=self.config.timeout,
)
if isinstance(record, User): if isinstance(record, User):
user_doc = self._create_user_es_doc(record) user_doc = self._create_user_es_doc(record)
print(user_doc.json()) print(user_doc.json())
self.elasticsearch_client.index( self.elasticsearch_client.index(
index=self.config.user_index_name, index=self.config.user_index_name,
id=str(user_doc.user_id), id=str(user_doc.user_id),
body=user_doc.json(), body=user_doc.json(),
request_timeout=self.config.timeout, request_timeout=self.config.timeout,
) )
if isinstance(record, Team): if isinstance(record, Team):
team_doc = self._create_team_es_doc(record) team_doc = self._create_team_es_doc(record)
self.elasticsearch_client.index( self.elasticsearch_client.index(
index=self.config.team_index_name, index=self.config.team_index_name,
id=str(team_doc.team_id), id=str(team_doc.team_id),
body=team_doc.json(), body=team_doc.json(),
request_timeout=self.config.timeout, request_timeout=self.config.timeout,
) )
if isinstance(record, GlossaryTerm): if isinstance(record, GlossaryTerm):
glossary_term_doc = self._create_glossary_term_es_doc(record) glossary_term_doc = self._create_glossary_term_es_doc(record)
self.elasticsearch_client.index( self.elasticsearch_client.index(
index=self.config.glossary_term_index_name, index=self.config.glossary_term_index_name,
id=str(glossary_term_doc.glossary_term_id), id=str(glossary_term_doc.glossary_term_id),
body=glossary_term_doc.json(), body=glossary_term_doc.json(),
request_timeout=self.config.timeout, request_timeout=self.config.timeout,
) )
if hasattr(record.name, "__root__"): if hasattr(record.name, "__root__"):
self.status.records_written(record.name.__root__) self.status.records_written(record.name.__root__)
else: else:
self.status.records_written(record.name) self.status.records_written(record.name)
except Exception as e:
logger.error(f"Failed to index entity {record} due to {e}")
logger.debug(traceback.print_exc())
def _create_table_es_doc(self, table: Table): def _create_table_es_doc(self, table: Table):
fqdn = table.fullyQualifiedName fqdn = table.fullyQualifiedName