roman/es ingest test fixes (#1610)

### Description
update elasticsearch docker setup to use docker-compose

Would close out
https://github.com/Unstructured-IO/unstructured/issues/1609
This commit is contained in:
Roman Isecke 2023-10-03 10:39:33 -04:00 committed by GitHub
parent 9d81971fcb
commit b2e997635f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 38 additions and 38 deletions

View File

@ -255,6 +255,10 @@ jobs:
source .venv/bin/activate
mkdir "$NLTK_DATA"
make install-ci
- name: Setup docker-compose
uses: KengoTODA/actions-setup-docker-compose@v1
with:
version: '2.22.0'
- name: Test Ingest (unit)
run: |
source .venv/bin/activate

View File

@ -9,7 +9,7 @@ env:
jobs:
setup:
runs-on: ubuntu-latest
runs-on: ubuntu-latest-m
if: |
github.event_name == 'workflow_dispatch' ||
(github.event_name == 'push' && contains(github.event.head_commit.message, 'ingest-test-fixtures-update'))
@ -56,6 +56,10 @@ jobs:
source .venv/bin/activate
mkdir "$NLTK_DATA"
make install-ci
- name: Setup docker-compose
uses: KengoTODA/actions-setup-docker-compose@v1
with:
version: '2.22.0'
- name: Update test fixtures
env:
AIRTABLE_PERSONAL_ACCESS_TOKEN: ${{ secrets.AIRTABLE_PERSONAL_ACCESS_TOKEN }}

View File

@ -1,4 +1,4 @@
## 0.10.19-dev5
## 0.10.19-dev6
### Enhancements

View File

@ -1,37 +1,14 @@
#!/usr/bin/env bash
set -e
SCRIPT_DIR=$(dirname "$(realpath "$0")")
# Create the Elasticsearch cluster and get the container id
docker run -d --rm -p 9200:9200 -p 9300:9300 -e "xpack.security.enabled=false" -e "discovery.type=single-node" --name es-test docker.elastic.co/elasticsearch/elasticsearch:8.7.0
# Create the Elasticsearch cluster
docker compose version
docker compose -f "$SCRIPT_DIR"/docker-compose.yaml up --wait
docker compose -f "$SCRIPT_DIR"/docker-compose.yaml ps
# Wait for Elasticsearch container to start
echo "Waiting for Elasticsearch container to start..."
sleep 1
url="http://localhost:9200/_cluster/health?wait_for_status=green&timeout=50s"
status_code=0
retry_count=0
max_retries=6
# Check the cluster status repeatedly until it becomes live or maximum retries are reached
while [ "$status_code" -ne 200 ] && [ "$retry_count" -lt "$max_retries" ]; do
# Send a GET request to the cluster health API
response=$(curl -s -o /dev/null -w "%{http_code}" "$url")
status_code="$response"
# Process the files only when the Elasticsearch cluster is live
if [ "$status_code" -eq 200 ]; then
echo "Cluster is live."
python "$SCRIPT_DIR/create_and_fill_es.py"
else
((retry_count++))
echo "Cluster is not available. Retrying in 5 seconds... (Attempt $retry_count)"
sleep 5
fi
done
# If the cluster has not become live, exit after a certain number of tries
if [ "$status_code" -ne 200 ]; then
echo "Cluster took an unusually long time to create (>25 seconds). Expected time is around 10 seconds. Exiting."
fi
echo "Cluster is live."
"$SCRIPT_DIR"/create_and_fill_es.py

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python3
import pandas as pd
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk

View File

@ -0,0 +1,15 @@
services:
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch:8.7.0
container_name: es-test
ports:
- 9200:9200
- 9300:9300
environment:
- xpack.security.enabled=false
- discovery.type=single-node
healthcheck:
test: ["CMD-SHELL", "curl --silent --fail localhost:9200/_cluster/health || exit 1"]
interval: 30s
timeout: 30s
retries: 3

View File

@ -16,10 +16,8 @@ source "$SCRIPT_DIR"/cleanup.sh
function cleanup() {
# Kill the container so the script can be repeatedly run using the same ports
if docker ps --filter "name=es-test"; then
echo "Stopping Elasticsearch Docker container"
docker stop es-test
fi
echo "Stopping Elasticsearch Docker container"
docker-compose -f scripts/elasticsearch-test-helpers/docker-compose.yaml down --remove-orphans -v
cleanup_dir "$OUTPUT_DIR"
if [ "$CI" == "true" ]; then

View File

@ -1 +1 @@
__version__ = "0.10.19-dev5" # pragma: no cover
__version__ = "0.10.19-dev6" # pragma: no cover