mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-11-12 16:37:31 +00:00
roman/es ingest test fixes (#1610)
### Description update elasticsearch docker setup to use docker-compose Would close out https://github.com/Unstructured-IO/unstructured/issues/1609
This commit is contained in:
parent
9d81971fcb
commit
b2e997635f
4
.github/workflows/ci.yml
vendored
4
.github/workflows/ci.yml
vendored
@ -255,6 +255,10 @@ jobs:
|
|||||||
source .venv/bin/activate
|
source .venv/bin/activate
|
||||||
mkdir "$NLTK_DATA"
|
mkdir "$NLTK_DATA"
|
||||||
make install-ci
|
make install-ci
|
||||||
|
- name: Setup docker-compose
|
||||||
|
uses: KengoTODA/actions-setup-docker-compose@v1
|
||||||
|
with:
|
||||||
|
version: '2.22.0'
|
||||||
- name: Test Ingest (unit)
|
- name: Test Ingest (unit)
|
||||||
run: |
|
run: |
|
||||||
source .venv/bin/activate
|
source .venv/bin/activate
|
||||||
|
|||||||
@ -9,7 +9,7 @@ env:
|
|||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
setup:
|
setup:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest-m
|
||||||
if: |
|
if: |
|
||||||
github.event_name == 'workflow_dispatch' ||
|
github.event_name == 'workflow_dispatch' ||
|
||||||
(github.event_name == 'push' && contains(github.event.head_commit.message, 'ingest-test-fixtures-update'))
|
(github.event_name == 'push' && contains(github.event.head_commit.message, 'ingest-test-fixtures-update'))
|
||||||
@ -56,6 +56,10 @@ jobs:
|
|||||||
source .venv/bin/activate
|
source .venv/bin/activate
|
||||||
mkdir "$NLTK_DATA"
|
mkdir "$NLTK_DATA"
|
||||||
make install-ci
|
make install-ci
|
||||||
|
- name: Setup docker-compose
|
||||||
|
uses: KengoTODA/actions-setup-docker-compose@v1
|
||||||
|
with:
|
||||||
|
version: '2.22.0'
|
||||||
- name: Update test fixtures
|
- name: Update test fixtures
|
||||||
env:
|
env:
|
||||||
AIRTABLE_PERSONAL_ACCESS_TOKEN: ${{ secrets.AIRTABLE_PERSONAL_ACCESS_TOKEN }}
|
AIRTABLE_PERSONAL_ACCESS_TOKEN: ${{ secrets.AIRTABLE_PERSONAL_ACCESS_TOKEN }}
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
## 0.10.19-dev5
|
## 0.10.19-dev6
|
||||||
|
|
||||||
### Enhancements
|
### Enhancements
|
||||||
|
|
||||||
|
|||||||
@ -1,37 +1,14 @@
|
|||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
SCRIPT_DIR=$(dirname "$(realpath "$0")")
|
SCRIPT_DIR=$(dirname "$(realpath "$0")")
|
||||||
|
|
||||||
# Create the Elasticsearch cluster and get the container id
|
# Create the Elasticsearch cluster
|
||||||
docker run -d --rm -p 9200:9200 -p 9300:9300 -e "xpack.security.enabled=false" -e "discovery.type=single-node" --name es-test docker.elastic.co/elasticsearch/elasticsearch:8.7.0
|
docker compose version
|
||||||
|
docker compose -f "$SCRIPT_DIR"/docker-compose.yaml up --wait
|
||||||
|
docker compose -f "$SCRIPT_DIR"/docker-compose.yaml ps
|
||||||
|
|
||||||
# Wait for Elasticsearch container to start
|
|
||||||
echo "Waiting for Elasticsearch container to start..."
|
|
||||||
sleep 1
|
|
||||||
|
|
||||||
url="http://localhost:9200/_cluster/health?wait_for_status=green&timeout=50s"
|
echo "Cluster is live."
|
||||||
status_code=0
|
"$SCRIPT_DIR"/create_and_fill_es.py
|
||||||
retry_count=0
|
|
||||||
max_retries=6
|
|
||||||
|
|
||||||
# Check the cluster status repeatedly until it becomes live or maximum retries are reached
|
|
||||||
while [ "$status_code" -ne 200 ] && [ "$retry_count" -lt "$max_retries" ]; do
|
|
||||||
# Send a GET request to the cluster health API
|
|
||||||
response=$(curl -s -o /dev/null -w "%{http_code}" "$url")
|
|
||||||
status_code="$response"
|
|
||||||
|
|
||||||
# Process the files only when the Elasticsearch cluster is live
|
|
||||||
if [ "$status_code" -eq 200 ]; then
|
|
||||||
echo "Cluster is live."
|
|
||||||
python "$SCRIPT_DIR/create_and_fill_es.py"
|
|
||||||
else
|
|
||||||
((retry_count++))
|
|
||||||
echo "Cluster is not available. Retrying in 5 seconds... (Attempt $retry_count)"
|
|
||||||
sleep 5
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
|
|
||||||
# If the cluster has not become live, exit after a certain number of tries
|
|
||||||
if [ "$status_code" -ne 200 ]; then
|
|
||||||
echo "Cluster took an unusually long time to create (>25 seconds). Expected time is around 10 seconds. Exiting."
|
|
||||||
fi
|
|
||||||
|
|||||||
2
scripts/elasticsearch-test-helpers/create_and_fill_es.py
Normal file → Executable file
2
scripts/elasticsearch-test-helpers/create_and_fill_es.py
Normal file → Executable file
@ -1,3 +1,5 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from elasticsearch import Elasticsearch
|
from elasticsearch import Elasticsearch
|
||||||
from elasticsearch.helpers import bulk
|
from elasticsearch.helpers import bulk
|
||||||
|
|||||||
15
scripts/elasticsearch-test-helpers/docker-compose.yaml
Normal file
15
scripts/elasticsearch-test-helpers/docker-compose.yaml
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
services:
|
||||||
|
elasticsearch:
|
||||||
|
image: docker.elastic.co/elasticsearch/elasticsearch:8.7.0
|
||||||
|
container_name: es-test
|
||||||
|
ports:
|
||||||
|
- 9200:9200
|
||||||
|
- 9300:9300
|
||||||
|
environment:
|
||||||
|
- xpack.security.enabled=false
|
||||||
|
- discovery.type=single-node
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "curl --silent --fail localhost:9200/_cluster/health || exit 1"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 30s
|
||||||
|
retries: 3
|
||||||
@ -16,10 +16,8 @@ source "$SCRIPT_DIR"/cleanup.sh
|
|||||||
|
|
||||||
function cleanup() {
|
function cleanup() {
|
||||||
# Kill the container so the script can be repeatedly run using the same ports
|
# Kill the container so the script can be repeatedly run using the same ports
|
||||||
if docker ps --filter "name=es-test"; then
|
|
||||||
echo "Stopping Elasticsearch Docker container"
|
echo "Stopping Elasticsearch Docker container"
|
||||||
docker stop es-test
|
docker-compose -f scripts/elasticsearch-test-helpers/docker-compose.yaml down --remove-orphans -v
|
||||||
fi
|
|
||||||
|
|
||||||
cleanup_dir "$OUTPUT_DIR"
|
cleanup_dir "$OUTPUT_DIR"
|
||||||
if [ "$CI" == "true" ]; then
|
if [ "$CI" == "true" ]; then
|
||||||
|
|||||||
@ -1 +1 @@
|
|||||||
__version__ = "0.10.19-dev5" # pragma: no cover
|
__version__ = "0.10.19-dev6" # pragma: no cover
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user