name: Haystack 1.x Benchmarks on: workflow_dispatch: schedule: # At 00:01 on Sunday - cron: "1 0 * * 0" permissions: id-token: write contents: read env: AWS_REGION: eu-central-1 jobs: deploy-runner: runs-on: ubuntu-latest outputs: cml_runner_id: ${{ steps.deploy.outputs.cml_runner_id }} steps: - uses: actions/checkout@v4 - uses: iterative/setup-cml@v2 - name: AWS authentication uses: aws-actions/configure-aws-credentials@010d0da01d0b5a38af31e9c3470dbfdabdecca3a with: aws-region: ${{ env.AWS_REGION }} role-to-assume: ${{ secrets.AWS_CI_ROLE_ARN }} - name: Launch EC2 instance and deploy runner id: deploy env: repo_token: ${{ secrets.HAYSTACK_BOT_TOKEN }} run: | OUTPUT=$(cml runner launch \ --cloud aws \ --cloud-region ${{ env.AWS_REGION }} \ --cloud-type=p3.2xlarge \ --cloud-hdd-size=64 \ --labels=cml 2>&1 | tee /dev/fd/2) # Extract 'id' from the log and set it as an environment variable ID_VALUE=$(echo "$OUTPUT" | jq -r '.message? | fromjson? | select(.id != null) | .id // empty') echo "cml_runner_id=$ID_VALUE" >> "$GITHUB_OUTPUT" run-reader-benchmarks: needs: deploy-runner runs-on: [self-hosted, cml] container: image: docker://iterativeai/cml:0-dvc2-base1-gpu options: --gpus all timeout-minutes: 2880 steps: - uses: actions/checkout@v4 with: ref: v1.x - name: Install Haystack + Datadog requirements run: | pip install .[metrics,benchmarks,inference] pip install -r test/benchmarks/datadog/requirements.txt - name: Run benchmarks working-directory: test/benchmarks run: | mkdir +p out for f in ./configs/reader/*.yml; do name="${f%.*}" echo "=== Running benchmarks for $name ==="; config_name="$(basename "$name")" python run.py --output "out/$config_name.json" "$f"; echo "=== Benchmarks done for $name (or failed) ==="; done - name: Send Benchmark results to Datadog working-directory: test/benchmarks run: | python datadog/send_metrics.py out/ ${{ secrets.CORE_DATADOG_API_KEY }} https://api.datadoghq.eu - name: Archive benchmark results uses: actions/upload-artifact@v4 with: name: benchmark-results-reader path: test/benchmarks/out/ run-elasticsearch-benchmarks: needs: - deploy-runner - run-reader-benchmarks runs-on: [self-hosted, cml] container: image: docker://iterativeai/cml:0-dvc2-base1-gpu options: --gpus all services: elasticsearch: image: elasticsearch:7.17.6 env: discovery.type: "single-node" ports: - 9201:9200 timeout-minutes: 2880 steps: - uses: actions/checkout@v4 with: ref: v1.x - name: Install Haystack + Datadog requirements run: | pip install .[metrics,elasticsearch,benchmarks,inference] pip install -r test/benchmarks/datadog/requirements.txt - name: Run benchmarks working-directory: test/benchmarks run: | mkdir +p out for f in ./configs/**/*-elasticsearch-*.yml; do name="${f%.*}" echo "=== Running benchmarks for $name ==="; config_name="$(basename "$name")" python run.py --output "out/$config_name.json" "$f"; echo "=== Benchmarks done for $name (or failed) ==="; done - name: Send Benchmark results to Datadog working-directory: test/benchmarks run: | python datadog/send_metrics.py out/ ${{ secrets.CORE_DATADOG_API_KEY }} https://api.datadoghq.eu - name: Archive benchmark results uses: actions/upload-artifact@v4 with: name: benchmark-results-elasticsearch path: test/benchmarks/out/ run-weaviate-benchmarks: needs: - deploy-runner - run-elasticsearch-benchmarks runs-on: [self-hosted, cml] container: image: docker://iterativeai/cml:0-dvc2-base1-gpu options: --gpus all services: weaviate: image: semitechnologies/weaviate:1.17.2 env: AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: "true" PERSISTENCE_DATA_PATH: "/var/lib/weaviate" ports: - 8080:8080 timeout-minutes: 2880 steps: - uses: actions/checkout@v4 with: ref: v1.x - name: Install Haystack + Datadog requirements run: | pip install .[metrics,weaviate,benchmarks,inference] pip install -r test/benchmarks/datadog/requirements.txt - name: Run benchmarks working-directory: test/benchmarks run: | mkdir +p out for f in ./configs/**/*-weaviate-*.yml; do name="${f%.*}" echo "=== Running benchmarks for $name ==="; config_name="$(basename "$name")" python run.py --output "out/$config_name.json" "$f"; echo "=== Benchmarks done for $name (or failed) ==="; done - name: Send Benchmark results to Datadog working-directory: test/benchmarks run: | python datadog/send_metrics.py out/ ${{ secrets.CORE_DATADOG_API_KEY }} https://api.datadoghq.eu - name: Archive benchmark results uses: actions/upload-artifact@v4 with: name: benchmark-results-weaviate path: test/benchmarks/out/ run-opensearch-benchmarks: needs: - deploy-runner - run-weaviate-benchmarks runs-on: [self-hosted, cml] container: image: docker://iterativeai/cml:0-dvc2-base1-gpu options: --gpus all services: opensearch: image: opensearchproject/opensearch:1.3.5 env: discovery.type: "single-node" OPENSEARCH_JAVA_OPTS: "-Xms4096m -Xmx4096m" ports: - 9200:9200 timeout-minutes: 2880 steps: - uses: actions/checkout@v4 with: ref: v1.x - name: Install Haystack + Datadog requirements run: | pip install .[metrics,opensearch,benchmarks,inference] pip install -r test/benchmarks/datadog/requirements.txt - name: Run benchmarks working-directory: test/benchmarks run: | mkdir +p out for f in ./configs/**/*-opensearch-*.yml; do name="${f%.*}" echo "=== Running benchmarks for $name ==="; config_name="$(basename "$name")" python run.py --output "out/$config_name.json" "$f"; echo "=== Benchmarks done for $name (or failed) ==="; done - name: Send Benchmark results to Datadog working-directory: test/benchmarks run: | python datadog/send_metrics.py out/ ${{ secrets.CORE_DATADOG_API_KEY }} https://api.datadoghq.eu - name: Archive benchmark results uses: actions/upload-artifact@v4 with: name: benchmark-results-opensearch path: test/benchmarks/out/ terminate-runner: if: always() needs: - deploy-runner - run-opensearch-benchmarks runs-on: ubuntu-latest steps: - name: AWS authentication uses: aws-actions/configure-aws-credentials@010d0da01d0b5a38af31e9c3470dbfdabdecca3a with: aws-region: ${{ env.AWS_REGION }} role-to-assume: ${{ secrets.AWS_CI_ROLE_ARN }} - name: Terminate EC2 instance env: CML_RUNNER_ID: ${{needs.deploy-runner.outputs.cml_runner_id}} run: | # Get the instance ID using its Name tag and terminate the instance INSTANCE_ID=$(aws ec2 describe-instances --filters "Name=tag:Name,Values=${{ env.CML_RUNNER_ID }}" --query "Reservations[*].Instances[*].[InstanceId]" --output text) aws ec2 terminate-instances --instance-ids "$INSTANCE_ID"