diff --git a/ingestion/pipelines/metadata_to_es.json b/ingestion/pipelines/metadata_to_es.json index 06f5bb7a7d4..9a468da8207 100644 --- a/ingestion/pipelines/metadata_to_es.json +++ b/ingestion/pipelines/metadata_to_es.json @@ -13,7 +13,8 @@ "index_tables": "true", "index_topics": "true", "index_dashboards": "true", - "es_host_port": "localhost" + "es_host": "localhost", + "es_port": 9300 } }, "metadata_server": { diff --git a/ingestion/setup.py b/ingestion/setup.py index 295e6da125b..a242a067568 100644 --- a/ingestion/setup.py +++ b/ingestion/setup.py @@ -42,8 +42,7 @@ scheduler_requirements = { } profiler_requirements = { - "openmetadata-data-profiler@git+git://github.com/open-metadata/data-profiler.git#egg" - "=openmetadata-data-profiler" + "openmetadata-data-profiler@git+git://github.com/open-metadata/data-profiler.git#egg=openmetadata-data-profiler" } base_requirements = { @@ -92,7 +91,6 @@ plugins: Dict[str, Set[str]] = { "pii-processor": {"pandas~=1.3.1"}, "presto": {"pyhive~=0.6.3"}, "postgres": {"pymysql>=1.0.2", "psycopg2-binary", "GeoAlchemy2"}, - "profiler": {"ruamel.yaml", "jsonpatch", "pandas", "IPython", "jsonschema", "scipy", "mistune", "altair", "tzlocal"}, "redshift": {"sqlalchemy-redshift", "GeoAlchemy2", "psycopg2-binary"}, "redshift-usage": {"sqlalchemy-redshift", "psycopg2-binary", "GeoAlchemy2"}, "scheduler": scheduler_requirements, diff --git a/ingestion/src/metadata/ingestion/sink/elasticsearch.py b/ingestion/src/metadata/ingestion/sink/elasticsearch.py index c953e9d7573..d041870b891 100644 --- a/ingestion/src/metadata/ingestion/sink/elasticsearch.py +++ b/ingestion/src/metadata/ingestion/sink/elasticsearch.py @@ -37,7 +37,8 @@ logger = logging.getLogger(__name__) class ElasticSearchConfig(ConfigModel): - es_host_port: str + es_host: str + es_port: int = 9200 index_tables: Optional[bool] = True index_topics: Optional[bool] = False index_dashboards: Optional[bool] = False @@ -68,7 +69,8 @@ class ElasticsearchSink(Sink): self.rest = OpenMetadataAPIClient(self.metadata_config) self.elasticsearch_doc_type = '_doc' self.elasticsearch_client = Elasticsearch([ - {'host': self.config.es_host_port}, + {'host': self.config.es_host, + 'port': self.config.es_port}, ]) if self.config.index_tables: self._check_or_create_index(self.config.table_index_name, TABLE_ELASTICSEARCH_INDEX_MAPPING)