mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-07-03 23:20:35 +00:00

change opensearch port to see if fixes CI. We think there may be a conflict with the elasticsearch docker port. Also adding simple retry to vector query. --------- Co-authored-by: potter-potter <david.potter@gmail.com>
67 lines
1.7 KiB
Python
Executable File
67 lines
1.7 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
from contextlib import suppress
|
|
|
|
import pandas as pd
|
|
from opensearchpy import Document, Keyword, OpenSearch, Text
|
|
from opensearchpy.exceptions import NotFoundError
|
|
|
|
DATA_PATH = "scripts/opensearch-test-helpers/wiki_movie_plots_small.csv"
|
|
CLUSTER_URL = "http://localhost:9247"
|
|
INDEX_NAME = "movies"
|
|
|
|
|
|
class Movie(Document):
|
|
title = Text(fields={"raw": Keyword()})
|
|
year = Text()
|
|
director = Text()
|
|
cast = Text()
|
|
genre = Text()
|
|
wiki_page = Text()
|
|
ethnicity = Text()
|
|
plot = Text()
|
|
|
|
class Index:
|
|
name = "movies"
|
|
|
|
def save(self, **kwargs):
|
|
return super(Movie, self).save(**kwargs)
|
|
|
|
|
|
print("Connecting to the OpenSearch cluster.")
|
|
client = OpenSearch(
|
|
hosts=[{"host": "localhost", "port": 9247}],
|
|
http_auth=("admin", "admin"),
|
|
use_ssl=True,
|
|
verify_certs=False,
|
|
ssl_show_warn=False,
|
|
)
|
|
print(client.info())
|
|
df = pd.read_csv(DATA_PATH).dropna().reset_index()
|
|
|
|
with suppress(NotFoundError):
|
|
client.indices.delete(index="movies")
|
|
|
|
print("Creating an OpenSearch index for testing opensearch ingest.")
|
|
response = client.indices.create(index=INDEX_NAME)
|
|
if not response.get("acknowledged"):
|
|
raise RuntimeError("failed to create index")
|
|
|
|
for i, row in df.iterrows():
|
|
Movie.init(using=client)
|
|
movie = Movie(
|
|
meta={"id": i},
|
|
title=row["Title"],
|
|
year=row["Release Year"],
|
|
director=row["Director"],
|
|
cast=row["Cast"],
|
|
genre=row["Genre"],
|
|
wiki_page=row["Wiki Page"],
|
|
ethnicity=row["Origin/Ethnicity"],
|
|
plot=row["Plot"],
|
|
)
|
|
movie.save(using=client)
|
|
|
|
client.count()
|
|
|
|
print("Successfully created and filled an OpenSearch index for testing opensearch ingest.")
|