Introduce pylint & other improvements on the CI (#2130)

* Make mypy check also ui and rest_api, fix ui

* Remove explicit type packages from extras, mypy now downloads them

* Make pylint and mypy run on every file except tests

* Rename tasks

* Change cache key

* Fix mypy errors in rest_api

* Normalize python versions to avoid cache misses

* Add all exclusions to make pylint pass

* Run mypy on rest_api and ui as well

* test if installing the package really changes outcome

* Comment out installation of packages

* Experiment: randomize tests

* Add fallback installation steps on cache misses

* Remove randomization

* Add comment on cache

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
This commit is contained in:
Sara Zan 2022-02-09 18:27:12 +01:00 committed by GitHub
parent 9dc89d2bd2
commit 40328a57b6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 231 additions and 56 deletions

View File

@ -15,6 +15,42 @@ on:
jobs: jobs:
type-check:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
# Mypy can't run properly on 3.7 as it misses support for Literal types.
# FIXME once we drop support for 3.7, use the cache.
python-version: 3.8
- name: Setup mypy
run: |
# FIXME installing the packages before running mypy raises
# a lot of errors which were never detected before!
# pip install .
# pip install rest_api/
# pip install ui/
# FIXME --install-types does not work properly yet, see https://github.com/python/mypy/issues/10600
# Hotfixing by installing type packages explicitly.
# Run mypy --install-types haystack locally to ensure the list is still up to date
# mypy --install-types --non-interactive .
pip install mypy pydantic types-Markdown types-PyYAML types-requests types-setuptools types-six types-tabulate types-chardet types-emoji types-protobuf
- name: Test with mypy
run: |
echo "=== haystack/ ==="
mypy haystack
echo "=== rest_api/ ==="
mypy rest_api --exclude=rest_api/build/ --exclude=rest_api/test/
echo "=== ui/ ==="
mypy ui --exclude=ui/build/ --exclude=ui/test/
build-cache: build-cache:
runs-on: ubuntu-20.04 runs-on: ubuntu-20.04
steps: steps:
@ -31,7 +67,7 @@ jobs:
with: with:
path: ${{ env.pythonLocation }} path: ${{ env.pythonLocation }}
# The cache will be rebuild every day and at every change of the dependency files # The cache will be rebuild every day and at every change of the dependency files
key: linux-${{ env.date }}-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('.github') }} key: linux-${{ env.date }}-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }}
- name: Install dependencies - name: Install dependencies
if: steps.cache-python-env.outputs.cache-hit != 'true' if: steps.cache-python-env.outputs.cache-hit != 'true'
@ -41,17 +77,9 @@ jobs:
pip install rest_api/ pip install rest_api/
pip install ui/ pip install ui/
pip install torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cpu.html pip install torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cpu.html
echo "=== pip freeze ==="
pip freeze
prepare-build:
needs: build-cache
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v2
- id: set-matrix
run: |
echo "::set-output name=matrix::$(find $(find . -type d -name test -not -path "./*env*/*") -type f -name test_*.py | jq -SR . | jq -cs .)"
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
code-and-docs-updates: code-and-docs-updates:
needs: build-cache needs: build-cache
@ -75,11 +103,24 @@ jobs:
uses: actions/cache@v2 uses: actions/cache@v2
with: with:
path: ${{ env.pythonLocation }} path: ${{ env.pythonLocation }}
key: linux-${{ env.date }}-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('.github') }} key: linux-${{ env.date }}-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }}
# Apply black on the entire codebase - name: Install Dependencies (on cache miss only)
# The cache might miss during the execution of an action: there should always be a fallback step to
# rebuild it in case it goes missing
if: steps.cache.outputs.cache-hit != 'true'
run: |
pip install --upgrade pip
pip install .[test]
pip install rest_api/
pip install ui/
pip install torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cpu.html
echo "=== pip freeze ==="
pip freeze
# Apply Black on the entire codebase
- name: Blacken - name: Blacken
run: python3 -m black . run: black .
# Convert the Jupyter notebooks into markdown tutorials # Convert the Jupyter notebooks into markdown tutorials
- name: Generate Tutorials - name: Generate Tutorials
@ -120,7 +161,8 @@ jobs:
git status git status
git push git push
type-check:
linter:
needs: code-and-docs-updates needs: code-and-docs-updates
runs-on: ubuntu-20.04 runs-on: ubuntu-20.04
steps: steps:
@ -129,23 +171,53 @@ jobs:
- run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_ENV - run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
- uses: actions/setup-python@v2 - uses: actions/setup-python@v2
with: with:
python-version: 3.8 python-version: 3.7
- name: Cache Python - name: Cache Python
uses: actions/cache@v2 uses: actions/cache@v2
with: with:
path: ${{ env.pythonLocation }} path: ${{ env.pythonLocation }}
key: linux-${{ env.date }}-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('.github') }} key: linux-${{ env.date }}-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }}
- name: Test with mypy - name: Install Dependencies (on cache miss only)
run: mypy haystack # The cache might miss during the execution of an action: there should always be a fallback step to
# rebuild it in case it goes missing
if: steps.cache.outputs.cache-hit != 'true'
run: |
pip install --upgrade pip
pip install .[test]
pip install rest_api/
pip install ui/
pip install torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cpu.html
echo "=== pip freeze ==="
pip freeze
build: - name: Linter
needs: prepare-build run: |
pylint -ry haystack/
pylint -ry rest_api/
pylint -ry ui/
prepare-matrix:
needs: build-cache
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v2
- id: set-matrix
run: |
find $(find . -type d -name test -not -path "./*env*/*") -type f -name test_*.py | jq -SR . | jq -cs .
echo "::set-output name=matrix::$(find $(find . -type d -name test -not -path "./*env*/*") -type f -name test_*.py | jq -SR . | jq -cs .)"
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
tests:
needs: prepare-matrix
runs-on: ubuntu-20.04 runs-on: ubuntu-20.04
strategy: strategy:
matrix: matrix:
test-path: ${{fromJson(needs.prepare-build.outputs.matrix)}} test-path: ${{fromJson(needs.prepare-matrix.outputs.matrix)}}
fail-fast: false fail-fast: false
steps: steps:
@ -161,7 +233,7 @@ jobs:
uses: actions/cache@v2 uses: actions/cache@v2
with: with:
path: ${{ env.pythonLocation }} path: ${{ env.pythonLocation }}
key: linux-${{ env.date }}-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('.github') }} key: linux-${{ env.date }}-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }}
- name: Run Elasticsearch - name: Run Elasticsearch
run: docker run -d -p 9200:9200 -e "discovery.type=single-node" -e "ES_JAVA_OPTS=-Xms128m -Xmx128m" elasticsearch:7.9.2 run: docker run -d -p 9200:9200 -e "discovery.type=single-node" -e "ES_JAVA_OPTS=-Xms128m -Xmx128m" elasticsearch:7.9.2
@ -190,11 +262,26 @@ jobs:
- name: Install tesseract - name: Install tesseract
run: sudo apt-get install tesseract-ocr libtesseract-dev poppler-utils run: sudo apt-get install tesseract-ocr libtesseract-dev poppler-utils
- name: Install Dependencies (on cache miss only)
# The cache might miss during the execution of an action: there should always be a fallback step to
# rebuild it in case it goes missing
if: steps.cache.outputs.cache-hit != 'true'
run: |
pip install --upgrade pip
pip install .[test]
pip install rest_api/
pip install ui/
pip install torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cpu.html
echo "=== pip freeze ==="
pip freeze
# Haystack needs to be reinstalled at this stage to make sure the current commit's version is the one getting tested.
# The cache can last way longer than a specific action's run, so older Haystack version could be carried over.
- name: Reinstall Haystack - name: Reinstall Haystack
run: | run: |
pip install .[test] pip install .[test]
pip install rest_api/ pip install rest_api/
pip install eager ui/ pip install ui/
- name: Run tests - name: Run tests
run: pytest -s ${{ matrix.test-path }} run: pytest -s ${{ matrix.test-path }}

View File

@ -20,7 +20,93 @@ disable = [
"fixme", "fixme",
"protected-access", "protected-access",
"too-few-public-methods", "too-few-public-methods",
"raise-missing-from" "raise-missing-from",
"invalid-name",
"logging-fstring-interpolation",
"wrong-import-position",
"too-many-locals",
"duplicate-code",
"too-many-arguments",
"arguments-differ",
"wrong-import-order",
"consider-using-f-string",
"no-else-return",
"unused-variable",
"attribute-defined-outside-init",
"too-many-instance-attributes",
"no-self-use",
"super-with-arguments",
"anomalous-backslash-in-string",
"redefined-builtin",
"logging-format-interpolation",
"f-string-without-interpolation",
"abstract-method",
"too-many-branches",
"trailing-whitespace",
"unspecified-encoding",
"unidiomatic-typecheck",
"no-name-in-module",
"dangerous-default-value",
"unused-import",
"consider-using-with",
"redefined-outer-name",
"cyclic-import",
"arguments-renamed",
"unnecessary-pass",
"ungrouped-imports",
"broad-except",
"unnecessary-comprehension",
"subprocess-run-check",
"singleton-comparison",
"no-else-raise",
"import-outside-toplevel",
"consider-iterating-dictionary",
"too-many-nested-blocks",
"undefined-loop-variable",
"too-many-statements",
"consider-using-in",
"bare-except",
"too-many-lines",
"unexpected-keyword-arg",
"simplifiable-if-expression",
"use-list-literal",
"reimported",
"no-else-continue",
"deprecated-method",
"consider-using-dict-items",
"use-a-generator",
"simplifiable-if-statement",
"import-error",
"consider-using-from-import",
"useless-object-inheritance",
"use-dict-literal",
"unsubscriptable-object",
"too-many-return-statements",
"superfluous-parens",
"no-value-for-parameter",
"no-else-break",
"inconsistent-return-statements",
"consider-using-set-comprehension",
"c-extension-no-member",
"useless-super-delegation",
"useless-else-on-loop",
"used-before-assignment",
"unsupported-membership-test",
"unneeded-not",
"unnecessary-lambda",
"trailing-newlines",
"too-many-boolean-expressions",
"super-init-not-called",
"pointless-string-statement",
"non-parent-init-called",
"invalid-sequence-index",
"import-self",
"deprecated-argument",
"access-member-before-definition",
"invalid-envvar-default",
"logging-too-many-args",
] ]
[tool.pylint.'DESIGN'] [tool.pylint.'DESIGN']
max-args=7 max-args=7

View File

@ -42,11 +42,11 @@ def get_openapi_specs() -> dict:
""" """
app = get_application() app = get_application()
return get_openapi( return get_openapi(
title=app.title if app.title else None, title=app.title,
version=app.version if app.version else None, version=app.version,
openapi_version=app.openapi_version if app.openapi_version else None, openapi_version=app.openapi_version,
description=app.description if app.description else None, description=app.description,
routes=app.routes if app.routes else None, routes=app.routes,
) )

View File

@ -1,3 +1,5 @@
from typing import Dict, Union, Optional
import json import json
import logging import logging
@ -51,13 +53,14 @@ def get_feedback_metrics(filters: FilterRequest = None):
""" """
if filters: if filters:
filters = filters.filters filters_content = filters.filters or {}
filters["origin"] = ["user-feedback"] filters_content["origin"] = ["user-feedback"]
else: else:
filters = {"origin": ["user-feedback"]} filters_content = {"origin": ["user-feedback"]}
labels = DOCUMENT_STORE.get_all_labels(filters=filters) labels = DOCUMENT_STORE.get_all_labels(filters=filters_content)
res: Dict[str, Optional[Union[float, int]]]
if len(labels) > 0: if len(labels) > 0:
answer_feedback = [1 if l.is_correct_answer else 0 for l in labels] answer_feedback = [1 if l.is_correct_answer else 0 for l in labels]
doc_feedback = [1 if l.is_correct_document else 0 for l in labels] doc_feedback = [1 if l.is_correct_document else 0 for l in labels]

View File

@ -1,10 +1,11 @@
from typing import Optional, List, Union
import json import json
import logging import logging
import os import os
import shutil import shutil
import uuid import uuid
from pathlib import Path from pathlib import Path
from typing import Optional, List
from fastapi import APIRouter, UploadFile, File, Form, HTTPException, Depends from fastapi import APIRouter, UploadFile, File, Form, HTTPException, Depends
from pydantic import BaseModel from pydantic import BaseModel
@ -48,7 +49,8 @@ except KeyError:
logger.warning("Indexing Pipeline not found in the YAML configuration. File Upload API will not be available.") logger.warning("Indexing Pipeline not found in the YAML configuration. File Upload API will not be available.")
os.makedirs(FILE_UPLOAD_PATH, exist_ok=True) # create directory for uploading files # create directory for uploading files
os.makedirs(FILE_UPLOAD_PATH, exist_ok=True)
@as_form @as_form
@ -75,9 +77,10 @@ class Response(BaseModel):
@router.post("/file-upload") @router.post("/file-upload")
def upload_file( def upload_file(
files: List[UploadFile] = File(...), files: List[UploadFile] = File(...),
meta: Optional[str] = Form("null"), # JSON serialized string # JSON serialized string
fileconverter_params: FileConverterParams = Depends(FileConverterParams.as_form), meta: Optional[str] = Form("null"), # type: ignore
preprocessor_params: PreprocessorParams = Depends(PreprocessorParams.as_form), fileconverter_params: FileConverterParams = Depends(FileConverterParams.as_form), # type: ignore
preprocessor_params: PreprocessorParams = Depends(PreprocessorParams.as_form), # type: ignore
): ):
""" """
You can use this endpoint to upload a file for indexing You can use this endpoint to upload a file for indexing
@ -88,7 +91,7 @@ def upload_file(
file_paths: list = [] file_paths: list = []
file_metas: list = [] file_metas: list = []
meta = json.loads(meta) or {} meta_form = json.loads(meta) # type: ignore
for file in files: for file in files:
try: try:
@ -97,8 +100,8 @@ def upload_file(
shutil.copyfileobj(file.file, buffer) shutil.copyfileobj(file.file, buffer)
file_paths.append(file_path) file_paths.append(file_path)
meta["name"] = file.filename meta_form["name"] = file.filename
file_metas.append(meta) file_metas.append(meta_form)
finally: finally:
file.file.close() file.file.close()

View File

@ -44,6 +44,6 @@ def as_form(cls: Type[BaseModel]):
sig = inspect.signature(_as_form) sig = inspect.signature(_as_form)
sig = sig.replace(parameters=new_params) sig = sig.replace(parameters=new_params)
_as_form.__signature__ = sig _as_form.__signature__ = sig # type: ignore
setattr(cls, "as_form", _as_form) setattr(cls, "as_form", _as_form)
return cls return cls

View File

@ -35,7 +35,7 @@ class AnswerSerialized(Answer):
@pydantic_dataclass @pydantic_dataclass
class DocumentSerialized(Document): class DocumentSerialized(Document):
content: str content: str
embedding: Optional[List[float]] embedding: Optional[List[float]] # type: ignore
@pydantic_dataclass @pydantic_dataclass

View File

@ -3,7 +3,7 @@ import logging
from pathlib import Path from pathlib import Path
VERSION = None VERSION = "0.0.0"
try: try:
VERSION = open(Path(__file__).parent.parent / "VERSION.txt", "r").read() VERSION = open(Path(__file__).parent.parent / "VERSION.txt", "r").read()
except Exception as e: except Exception as e:

View File

@ -42,7 +42,6 @@ def exclude_no_answer(responses):
return responses return responses
@pytest.mark.elasticsearch
@pytest.fixture(scope="session") @pytest.fixture(scope="session")
def client() -> TestClient: def client() -> TestClient:
os.environ["PIPELINE_YAML_PATH"] = str( os.environ["PIPELINE_YAML_PATH"] = str(
@ -55,7 +54,6 @@ def client() -> TestClient:
client.post(url="/documents/delete_by_filters", data='{"filters": {}}') client.post(url="/documents/delete_by_filters", data='{"filters": {}}')
@pytest.mark.elasticsearch
@pytest.fixture(scope="session") @pytest.fixture(scope="session")
def populated_client(client: TestClient) -> TestClient: def populated_client(client: TestClient) -> TestClient:
client.post(url="/documents/delete_by_filters", data='{"filters": {}}') client.post(url="/documents/delete_by_filters", data='{"filters": {}}')

View File

@ -154,9 +154,7 @@ colab =
dev = dev =
# Type check # Type check
mypy mypy
types-Markdown typing_extensions; python_version < '3.8'
types-requests
types-PyYAML
# Test # Test
pytest pytest
responses responses

View File

@ -3,7 +3,7 @@ import logging
from pathlib import Path from pathlib import Path
VERSION = None VERSION = "0.0.0"
try: try:
# After git clone, VERSION.txt is in the root folder # After git clone, VERSION.txt is in the root folder
VERSION = open(Path(__file__).parent.parent / "VERSION.txt", "r").read() VERSION = open(Path(__file__).parent.parent / "VERSION.txt", "r").read()

View File

@ -1,4 +1,4 @@
from typing import List, Dict, Any, Tuple from typing import List, Dict, Any, Tuple, Optional
import os import os
import logging import logging
@ -112,7 +112,7 @@ def upload_doc(file):
return response return response
def get_backlink(result) -> Tuple[str, str]: def get_backlink(result) -> Tuple[Optional[str], Optional[str]]:
if result.get("document", None): if result.get("document", None):
doc = result["document"] doc = result["document"]
if isinstance(doc, dict): if isinstance(doc, dict):