diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 344e16da9..80db5055c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,13 +7,69 @@ on: branches: [ master ] jobs: - build: - + type-check: runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + with: + python-version: 3.7 + - name: Test with mypy + run: | + pip install mypy + mypy haystack --ignore-missing-imports + build-cache: + needs: type-check + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + with: + python-version: 3.7 + - run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_ENV + - name: Cache + id: cache-python-env + uses: actions/cache@v2 + with: + path: ${{ env.pythonLocation }} + key: ${{ env.pythonLocation }}-${{ hashFiles('setup.py') }}-${{ hashFiles('requirements.txt') }} + - name: Install dependencies + if: steps.cache-python-env.outputs.cache-hit != 'true' + run: | + python -m pip install --upgrade pip + pip install pytest + pip install --upgrade --upgrade-strategy eager -r requirements.txt -e . + + prepare-build: + needs: build-cache + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v2 + - id: set-matrix + run: | + echo "::set-output name=matrix::$(cd test && ls -d test_*.py | jq -R . | jq -cs .)" + outputs: + matrix: ${{ steps.set-matrix.outputs.matrix }} + build: + needs: prepare-build + runs-on: ubuntu-20.04 + strategy: + matrix: + test-path: ${{fromJson(needs.prepare-build.outputs.matrix)}} + fail-fast: false steps: - uses: actions/checkout@v2 - + - name: Set up Python 3.7 + uses: actions/setup-python@v2 + with: + python-version: 3.7 + - run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_ENV + - name: Cache + uses: actions/cache@v2 + with: + path: ${{ env.pythonLocation }} + key: ${{ env.pythonLocation }}-${{ hashFiles('setup.py') }}-${{ hashFiles('requirements.txt') }} - name: Run Elasticsearch run: docker run -d -p 9200:9200 -e "discovery.type=single-node" -e "ES_JAVA_OPTS=-Xms128m -Xmx128m" elasticsearch:7.9.2 @@ -23,33 +79,8 @@ jobs: - name: Run Apache Tika run: docker run -d -p 9998:9998 -e "TIKA_CHILD_JAVA_OPTS=-JXms128m" -e "TIKA_CHILD_JAVA_OPTS=-JXmx128m" apache/tika:1.24.1 - - name: Set up Python 3.7 - uses: actions/setup-python@v2 - with: - python-version: 3.7 - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install pytest - pip install -r requirements.txt - pip install -e . - wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.03.tar.gz && tar -xvf xpdf-tools-linux-4.03.tar.gz && sudo cp xpdf-tools-linux-4.03/bin64/pdftotext /usr/local/bin - - name: Test with mypy - run: | - pip install mypy - mypy haystack --ignore-missing-imports - - - name: Run Pytest without generator/pipeline marker - run: cd test && pytest -m "not pipeline and not generator and not summarizer" - -# - name: Stop Containers -# run: docker rm -f `docker ps -a -q` - - - name: Run pytest with generator/pipeline marker - run: cd test && pytest -m "pipeline or generator" - - - name: Run pytest with summarizer marker - run: cd test && pytest -m "summarizer" - + - name: Install pdftotext + run: wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.03.tar.gz && tar -xvf xpdf-tools-linux-4.03.tar.gz && sudo cp xpdf-tools-linux-4.03/bin64/pdftotext /usr/local/bin + - name: Run tests + run: cd test && pytest ${{ matrix.test-path }} diff --git a/test/conftest.py b/test/conftest.py index 7537a3255..2626b5eef 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -132,10 +132,9 @@ def xpdf_fixture(tika_fixture): raise Exception( """Currently auto installation of pdftotext is not supported on {0} platform """.format(platform) ) - - commands = """ wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-{0}-4.02.tar.gz && - tar -xvf xpdf-tools-{0}-4.02.tar.gz && - {1} cp xpdf-tools-{0}-4.02/bin64/pdftotext /usr/local/bin""".format(platform_id, sudo_prefix) + commands = """ wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-{0}-4.03.tar.gz && + tar -xvf xpdf-tools-{0}-4.03.tar.gz && + {1} cp xpdf-tools-{0}-4.03/bin64/pdftotext /usr/local/bin""".format(platform_id, sudo_prefix) run([commands], shell=True) verify_installation = run(["pdftotext -v"], shell=True) diff --git a/test/test_docx_conversion.py b/test/test_docx_conversion.py deleted file mode 100644 index 109c3fea5..000000000 --- a/test/test_docx_conversion.py +++ /dev/null @@ -1,12 +0,0 @@ -from pathlib import Path - -import pytest - -from haystack.file_converter.docx import DocxToTextConverter - - -@pytest.mark.tika -def test_convert(): - converter = DocxToTextConverter() - document = converter.convert(file_path=Path("samples/docx/sample_docx.docx")) - assert document["text"].startswith("Sample Docx File") diff --git a/test/test_pdf_conversion.py b/test/test_file_converter.py similarity index 88% rename from test/test_pdf_conversion.py rename to test/test_file_converter.py index 1e3e0209a..7870953c6 100644 --- a/test/test_pdf_conversion.py +++ b/test/test_file_converter.py @@ -2,6 +2,7 @@ from pathlib import Path import pytest +from haystack.file_converter.docx import DocxToTextConverter from haystack.file_converter.pdf import PDFToTextConverter from haystack.file_converter.tika import TikaConverter @@ -45,3 +46,7 @@ def test_language_validation(Converter, xpdf_fixture, caplog): assert "The language for samples/pdf/sample_pdf_1.pdf is not one of ['de']." in caplog.text +def test_docx_converter(): + converter = DocxToTextConverter() + document = converter.convert(file_path=Path("samples/docx/sample_docx.docx")) + assert document["text"].startswith("Sample Docx File") diff --git a/test/test_imports.py b/test/test_imports.py deleted file mode 100644 index 8ef311a2e..000000000 --- a/test/test_imports.py +++ /dev/null @@ -1,27 +0,0 @@ -def test_module_imports(): - from haystack import Finder - from haystack.document_store.sql import SQLDocumentStore - from haystack.document_store.elasticsearch import ElasticsearchDocumentStore - from haystack.document_store.faiss import FAISSDocumentStore - from haystack.document_store.milvus import MilvusDocumentStore - from haystack.document_store.base import BaseDocumentStore - from haystack.preprocessor.cleaning import clean_wiki_text - from haystack.preprocessor.utils import convert_files_to_dicts, fetch_archive_from_http - from haystack.reader.farm import FARMReader - from haystack.reader.transformers import TransformersReader - from haystack.retriever.sparse import TfidfRetriever - from haystack.utils import print_answers - - assert Finder is not None - assert SQLDocumentStore is not None - assert ElasticsearchDocumentStore is not None - assert FAISSDocumentStore is not None - assert MilvusDocumentStore is not None - assert BaseDocumentStore is not None - assert clean_wiki_text is not None - assert convert_files_to_dicts is not None - assert fetch_archive_from_http is not None - assert FARMReader is not None - assert TransformersReader is not None - assert TfidfRetriever is not None - assert print_answers is not None