Yao You af7639e23f
ci: add retry to elastic search ingest test (#1581)
Occasionally the es test can fail because the index fail to be created
on the first try. Experiments show adding timeout doesn't help but add
retry mitigates the issue. See history of commits in branch:
yao/bump-inference-to-0.6.6
https://github.com/Unstructured-IO/unstructured/pull/1563

---------

Co-authored-by: ryannikolaidis <1208590+ryannikolaidis@users.noreply.github.com>
Co-authored-by: badGarnet <badGarnet@users.noreply.github.com>
2023-09-29 13:42:21 -05:00

32 lines
999 B
Python

import pandas as pd
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk
from es_cluster_config import (
CLUSTER_URL,
DATA_PATH,
INDEX_NAME,
MAPPINGS,
form_elasticsearch_doc_dict,
)
print("Connecting to the Elasticsearch cluster.")
es = Elasticsearch(CLUSTER_URL, request_timeout=30)
print(es.info())
df = pd.read_csv(DATA_PATH).dropna().reset_index()
print("Creating an Elasticsearch index for testing elasticsearch ingest.")
response = es.options(max_retries=5).indices.create(index=INDEX_NAME, mappings=MAPPINGS)
if response.meta.status != 200:
raise RuntimeError("failed to create index")
print("Loading data into the index.")
bulk_data = []
for i, row in df.iterrows():
bulk_data.append(form_elasticsearch_doc_dict(i, row))
bulk(es, bulk_data)
es.indices.refresh(index=INDEX_NAME)
response = es.cat.count(index=INDEX_NAME, format="json")
print("Succesfully created and filled an Elasticsearch index for testing elasticsearch ingest.")