Revamp CI (#825)

This commit is contained in:
oryx1729 2021-02-12 13:38:54 +01:00 committed by GitHub
parent c807f0d050
commit c4607cbd98
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 71 additions and 75 deletions

View File

@ -7,13 +7,69 @@ on:
branches: [ master ]
jobs:
build:
type-check:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
python-version: 3.7
- name: Test with mypy
run: |
pip install mypy
mypy haystack --ignore-missing-imports
build-cache:
needs: type-check
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
python-version: 3.7
- run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
- name: Cache
id: cache-python-env
uses: actions/cache@v2
with:
path: ${{ env.pythonLocation }}
key: ${{ env.pythonLocation }}-${{ hashFiles('setup.py') }}-${{ hashFiles('requirements.txt') }}
- name: Install dependencies
if: steps.cache-python-env.outputs.cache-hit != 'true'
run: |
python -m pip install --upgrade pip
pip install pytest
pip install --upgrade --upgrade-strategy eager -r requirements.txt -e .
prepare-build:
needs: build-cache
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v2
- id: set-matrix
run: |
echo "::set-output name=matrix::$(cd test && ls -d test_*.py | jq -R . | jq -cs .)"
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
build:
needs: prepare-build
runs-on: ubuntu-20.04
strategy:
matrix:
test-path: ${{fromJson(needs.prepare-build.outputs.matrix)}}
fail-fast: false
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.7
uses: actions/setup-python@v2
with:
python-version: 3.7
- run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
- name: Cache
uses: actions/cache@v2
with:
path: ${{ env.pythonLocation }}
key: ${{ env.pythonLocation }}-${{ hashFiles('setup.py') }}-${{ hashFiles('requirements.txt') }}
- name: Run Elasticsearch
run: docker run -d -p 9200:9200 -e "discovery.type=single-node" -e "ES_JAVA_OPTS=-Xms128m -Xmx128m" elasticsearch:7.9.2
@ -23,33 +79,8 @@ jobs:
- name: Run Apache Tika
run: docker run -d -p 9998:9998 -e "TIKA_CHILD_JAVA_OPTS=-JXms128m" -e "TIKA_CHILD_JAVA_OPTS=-JXmx128m" apache/tika:1.24.1
- name: Set up Python 3.7
uses: actions/setup-python@v2
with:
python-version: 3.7
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pytest
pip install -r requirements.txt
pip install -e .
wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.03.tar.gz && tar -xvf xpdf-tools-linux-4.03.tar.gz && sudo cp xpdf-tools-linux-4.03/bin64/pdftotext /usr/local/bin
- name: Test with mypy
run: |
pip install mypy
mypy haystack --ignore-missing-imports
- name: Run Pytest without generator/pipeline marker
run: cd test && pytest -m "not pipeline and not generator and not summarizer"
# - name: Stop Containers
# run: docker rm -f `docker ps -a -q`
- name: Run pytest with generator/pipeline marker
run: cd test && pytest -m "pipeline or generator"
- name: Run pytest with summarizer marker
run: cd test && pytest -m "summarizer"
- name: Install pdftotext
run: wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.03.tar.gz && tar -xvf xpdf-tools-linux-4.03.tar.gz && sudo cp xpdf-tools-linux-4.03/bin64/pdftotext /usr/local/bin
- name: Run tests
run: cd test && pytest ${{ matrix.test-path }}

View File

@ -132,10 +132,9 @@ def xpdf_fixture(tika_fixture):
raise Exception(
"""Currently auto installation of pdftotext is not supported on {0} platform """.format(platform)
)
commands = """ wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-{0}-4.02.tar.gz &&
tar -xvf xpdf-tools-{0}-4.02.tar.gz &&
{1} cp xpdf-tools-{0}-4.02/bin64/pdftotext /usr/local/bin""".format(platform_id, sudo_prefix)
commands = """ wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-{0}-4.03.tar.gz &&
tar -xvf xpdf-tools-{0}-4.03.tar.gz &&
{1} cp xpdf-tools-{0}-4.03/bin64/pdftotext /usr/local/bin""".format(platform_id, sudo_prefix)
run([commands], shell=True)
verify_installation = run(["pdftotext -v"], shell=True)

View File

@ -1,12 +0,0 @@
from pathlib import Path
import pytest
from haystack.file_converter.docx import DocxToTextConverter
@pytest.mark.tika
def test_convert():
converter = DocxToTextConverter()
document = converter.convert(file_path=Path("samples/docx/sample_docx.docx"))
assert document["text"].startswith("Sample Docx File")

View File

@ -2,6 +2,7 @@ from pathlib import Path
import pytest
from haystack.file_converter.docx import DocxToTextConverter
from haystack.file_converter.pdf import PDFToTextConverter
from haystack.file_converter.tika import TikaConverter
@ -45,3 +46,7 @@ def test_language_validation(Converter, xpdf_fixture, caplog):
assert "The language for samples/pdf/sample_pdf_1.pdf is not one of ['de']." in caplog.text
def test_docx_converter():
converter = DocxToTextConverter()
document = converter.convert(file_path=Path("samples/docx/sample_docx.docx"))
assert document["text"].startswith("Sample Docx File")

View File

@ -1,27 +0,0 @@
def test_module_imports():
from haystack import Finder
from haystack.document_store.sql import SQLDocumentStore
from haystack.document_store.elasticsearch import ElasticsearchDocumentStore
from haystack.document_store.faiss import FAISSDocumentStore
from haystack.document_store.milvus import MilvusDocumentStore
from haystack.document_store.base import BaseDocumentStore
from haystack.preprocessor.cleaning import clean_wiki_text
from haystack.preprocessor.utils import convert_files_to_dicts, fetch_archive_from_http
from haystack.reader.farm import FARMReader
from haystack.reader.transformers import TransformersReader
from haystack.retriever.sparse import TfidfRetriever
from haystack.utils import print_answers
assert Finder is not None
assert SQLDocumentStore is not None
assert ElasticsearchDocumentStore is not None
assert FAISSDocumentStore is not None
assert MilvusDocumentStore is not None
assert BaseDocumentStore is not None
assert clean_wiki_text is not None
assert convert_files_to_dicts is not None
assert fetch_archive_from_http is not None
assert FARMReader is not None
assert TransformersReader is not None
assert TfidfRetriever is not None
assert print_answers is not None