67 lines
1.7 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
from contextlib import suppress
import pandas as pd
from opensearchpy import Document, Keyword, OpenSearch, Text
from opensearchpy.exceptions import NotFoundError
DATA_PATH = "scripts/opensearch-test-helpers/wiki_movie_plots_small.csv"
CLUSTER_URL = "http://localhost:9247"
INDEX_NAME = "movies"
class Movie(Document):
title = Text(fields={"raw": Keyword()})
year = Text()
director = Text()
cast = Text()
genre = Text()
wiki_page = Text()
ethnicity = Text()
plot = Text()
class Index:
name = "movies"
def save(self, **kwargs):
return super(Movie, self).save(**kwargs)
print("Connecting to the OpenSearch cluster.")
client = OpenSearch(
hosts=[{"host": "localhost", "port": 9247}],
http_auth=("admin", "admin"),
use_ssl=True,
verify_certs=False,
ssl_show_warn=False,
)
print(client.info())
df = pd.read_csv(DATA_PATH).dropna().reset_index()
with suppress(NotFoundError):
client.indices.delete(index="movies")
print("Creating an OpenSearch index for testing opensearch ingest.")
response = client.indices.create(index=INDEX_NAME)
if not response.get("acknowledged"):
raise RuntimeError("failed to create index")
for i, row in df.iterrows():
Movie.init(using=client)
movie = Movie(
meta={"id": i},
title=row["Title"],
year=row["Release Year"],
director=row["Director"],
cast=row["Cast"],
genre=row["Genre"],
wiki_page=row["Wiki Page"],
ethnicity=row["Origin/Ethnicity"],
plot=row["Plot"],
)
movie.save(using=client)
client.count()
print("Successfully created and filled an OpenSearch index for testing opensearch ingest.")