Introduce pylint & other improvements on the CI (#2130)

* Make mypy check also ui and rest_api, fix ui

* Remove explicit type packages from extras, mypy now downloads them

* Make pylint and mypy run on every file except tests

* Rename tasks

* Change cache key

* Fix mypy errors in rest_api

* Normalize python versions to avoid cache misses

* Add all exclusions to make pylint pass

* Run mypy on rest_api and ui as well

* test if installing the package really changes outcome

* Comment out installation of packages

* Experiment: randomize tests

* Add fallback installation steps on cache misses

* Remove randomization

* Add comment on cache

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
This commit is contained in:
Sara Zan 2022-02-09 18:27:12 +01:00 committed by GitHub
parent 9dc89d2bd2
commit 40328a57b6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 231 additions and 56 deletions

View File

@ -15,6 +15,42 @@ on:
jobs:
type-check:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
# Mypy can't run properly on 3.7 as it misses support for Literal types.
# FIXME once we drop support for 3.7, use the cache.
python-version: 3.8
- name: Setup mypy
run: |
# FIXME installing the packages before running mypy raises
# a lot of errors which were never detected before!
# pip install .
# pip install rest_api/
# pip install ui/
# FIXME --install-types does not work properly yet, see https://github.com/python/mypy/issues/10600
# Hotfixing by installing type packages explicitly.
# Run mypy --install-types haystack locally to ensure the list is still up to date
# mypy --install-types --non-interactive .
pip install mypy pydantic types-Markdown types-PyYAML types-requests types-setuptools types-six types-tabulate types-chardet types-emoji types-protobuf
- name: Test with mypy
run: |
echo "=== haystack/ ==="
mypy haystack
echo "=== rest_api/ ==="
mypy rest_api --exclude=rest_api/build/ --exclude=rest_api/test/
echo "=== ui/ ==="
mypy ui --exclude=ui/build/ --exclude=ui/test/
build-cache:
runs-on: ubuntu-20.04
steps:
@ -31,7 +67,7 @@ jobs:
with:
path: ${{ env.pythonLocation }}
# The cache will be rebuild every day and at every change of the dependency files
key: linux-${{ env.date }}-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('.github') }}
key: linux-${{ env.date }}-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }}
- name: Install dependencies
if: steps.cache-python-env.outputs.cache-hit != 'true'
@ -41,17 +77,9 @@ jobs:
pip install rest_api/
pip install ui/
pip install torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cpu.html
echo "=== pip freeze ==="
pip freeze
prepare-build:
needs: build-cache
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v2
- id: set-matrix
run: |
echo "::set-output name=matrix::$(find $(find . -type d -name test -not -path "./*env*/*") -type f -name test_*.py | jq -SR . | jq -cs .)"
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
code-and-docs-updates:
needs: build-cache
@ -75,11 +103,24 @@ jobs:
uses: actions/cache@v2
with:
path: ${{ env.pythonLocation }}
key: linux-${{ env.date }}-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('.github') }}
key: linux-${{ env.date }}-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }}
- name: Install Dependencies (on cache miss only)
# The cache might miss during the execution of an action: there should always be a fallback step to
# rebuild it in case it goes missing
if: steps.cache.outputs.cache-hit != 'true'
run: |
pip install --upgrade pip
pip install .[test]
pip install rest_api/
pip install ui/
pip install torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cpu.html
echo "=== pip freeze ==="
pip freeze
# Apply black on the entire codebase
# Apply Black on the entire codebase
- name: Blacken
run: python3 -m black .
run: black .
# Convert the Jupyter notebooks into markdown tutorials
- name: Generate Tutorials
@ -120,7 +161,8 @@ jobs:
git status
git push
type-check:
linter:
needs: code-and-docs-updates
runs-on: ubuntu-20.04
steps:
@ -129,23 +171,53 @@ jobs:
- run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
- uses: actions/setup-python@v2
with:
python-version: 3.8
python-version: 3.7
- name: Cache Python
uses: actions/cache@v2
with:
path: ${{ env.pythonLocation }}
key: linux-${{ env.date }}-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('.github') }}
key: linux-${{ env.date }}-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }}
- name: Test with mypy
run: mypy haystack
- name: Install Dependencies (on cache miss only)
# The cache might miss during the execution of an action: there should always be a fallback step to
# rebuild it in case it goes missing
if: steps.cache.outputs.cache-hit != 'true'
run: |
pip install --upgrade pip
pip install .[test]
pip install rest_api/
pip install ui/
pip install torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cpu.html
echo "=== pip freeze ==="
pip freeze
build:
needs: prepare-build
- name: Linter
run: |
pylint -ry haystack/
pylint -ry rest_api/
pylint -ry ui/
prepare-matrix:
needs: build-cache
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v2
- id: set-matrix
run: |
find $(find . -type d -name test -not -path "./*env*/*") -type f -name test_*.py | jq -SR . | jq -cs .
echo "::set-output name=matrix::$(find $(find . -type d -name test -not -path "./*env*/*") -type f -name test_*.py | jq -SR . | jq -cs .)"
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
tests:
needs: prepare-matrix
runs-on: ubuntu-20.04
strategy:
matrix:
test-path: ${{fromJson(needs.prepare-build.outputs.matrix)}}
test-path: ${{fromJson(needs.prepare-matrix.outputs.matrix)}}
fail-fast: false
steps:
@ -161,7 +233,7 @@ jobs:
uses: actions/cache@v2
with:
path: ${{ env.pythonLocation }}
key: linux-${{ env.date }}-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('.github') }}
key: linux-${{ env.date }}-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }}
- name: Run Elasticsearch
run: docker run -d -p 9200:9200 -e "discovery.type=single-node" -e "ES_JAVA_OPTS=-Xms128m -Xmx128m" elasticsearch:7.9.2
@ -190,11 +262,26 @@ jobs:
- name: Install tesseract
run: sudo apt-get install tesseract-ocr libtesseract-dev poppler-utils
- name: Install Dependencies (on cache miss only)
# The cache might miss during the execution of an action: there should always be a fallback step to
# rebuild it in case it goes missing
if: steps.cache.outputs.cache-hit != 'true'
run: |
pip install --upgrade pip
pip install .[test]
pip install rest_api/
pip install ui/
pip install torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cpu.html
echo "=== pip freeze ==="
pip freeze
# Haystack needs to be reinstalled at this stage to make sure the current commit's version is the one getting tested.
# The cache can last way longer than a specific action's run, so older Haystack version could be carried over.
- name: Reinstall Haystack
run: |
pip install .[test]
pip install rest_api/
pip install eager ui/
pip install ui/
- name: Run tests
run: pytest -s ${{ matrix.test-path }}

File diff suppressed because one or more lines are too long

View File

@ -20,7 +20,93 @@ disable = [
"fixme",
"protected-access",
"too-few-public-methods",
"raise-missing-from"
"raise-missing-from",
"invalid-name",
"logging-fstring-interpolation",
"wrong-import-position",
"too-many-locals",
"duplicate-code",
"too-many-arguments",
"arguments-differ",
"wrong-import-order",
"consider-using-f-string",
"no-else-return",
"unused-variable",
"attribute-defined-outside-init",
"too-many-instance-attributes",
"no-self-use",
"super-with-arguments",
"anomalous-backslash-in-string",
"redefined-builtin",
"logging-format-interpolation",
"f-string-without-interpolation",
"abstract-method",
"too-many-branches",
"trailing-whitespace",
"unspecified-encoding",
"unidiomatic-typecheck",
"no-name-in-module",
"dangerous-default-value",
"unused-import",
"consider-using-with",
"redefined-outer-name",
"cyclic-import",
"arguments-renamed",
"unnecessary-pass",
"ungrouped-imports",
"broad-except",
"unnecessary-comprehension",
"subprocess-run-check",
"singleton-comparison",
"no-else-raise",
"import-outside-toplevel",
"consider-iterating-dictionary",
"too-many-nested-blocks",
"undefined-loop-variable",
"too-many-statements",
"consider-using-in",
"bare-except",
"too-many-lines",
"unexpected-keyword-arg",
"simplifiable-if-expression",
"use-list-literal",
"reimported",
"no-else-continue",
"deprecated-method",
"consider-using-dict-items",
"use-a-generator",
"simplifiable-if-statement",
"import-error",
"consider-using-from-import",
"useless-object-inheritance",
"use-dict-literal",
"unsubscriptable-object",
"too-many-return-statements",
"superfluous-parens",
"no-value-for-parameter",
"no-else-break",
"inconsistent-return-statements",
"consider-using-set-comprehension",
"c-extension-no-member",
"useless-super-delegation",
"useless-else-on-loop",
"used-before-assignment",
"unsupported-membership-test",
"unneeded-not",
"unnecessary-lambda",
"trailing-newlines",
"too-many-boolean-expressions",
"super-init-not-called",
"pointless-string-statement",
"non-parent-init-called",
"invalid-sequence-index",
"import-self",
"deprecated-argument",
"access-member-before-definition",
"invalid-envvar-default",
"logging-too-many-args",
]
[tool.pylint.'DESIGN']
max-args=7

View File

@ -42,11 +42,11 @@ def get_openapi_specs() -> dict:
"""
app = get_application()
return get_openapi(
title=app.title if app.title else None,
version=app.version if app.version else None,
openapi_version=app.openapi_version if app.openapi_version else None,
description=app.description if app.description else None,
routes=app.routes if app.routes else None,
title=app.title,
version=app.version,
openapi_version=app.openapi_version,
description=app.description,
routes=app.routes,
)

View File

@ -1,3 +1,5 @@
from typing import Dict, Union, Optional
import json
import logging
@ -40,8 +42,8 @@ def get_feedback():
@router.post("/eval-feedback")
def get_feedback_metrics(filters: FilterRequest = None):
"""
This endpoint returns basic accuracy metrics based on user feedback,
e.g., the ratio of correct answers or correctly identified documents.
This endpoint returns basic accuracy metrics based on user feedback,
e.g., the ratio of correct answers or correctly identified documents.
You can filter the output by document or label.
Example:
@ -51,13 +53,14 @@ def get_feedback_metrics(filters: FilterRequest = None):
"""
if filters:
filters = filters.filters
filters["origin"] = ["user-feedback"]
filters_content = filters.filters or {}
filters_content["origin"] = ["user-feedback"]
else:
filters = {"origin": ["user-feedback"]}
filters_content = {"origin": ["user-feedback"]}
labels = DOCUMENT_STORE.get_all_labels(filters=filters)
labels = DOCUMENT_STORE.get_all_labels(filters=filters_content)
res: Dict[str, Optional[Union[float, int]]]
if len(labels) > 0:
answer_feedback = [1 if l.is_correct_answer else 0 for l in labels]
doc_feedback = [1 if l.is_correct_document else 0 for l in labels]

View File

@ -1,10 +1,11 @@
from typing import Optional, List, Union
import json
import logging
import os
import shutil
import uuid
from pathlib import Path
from typing import Optional, List
from fastapi import APIRouter, UploadFile, File, Form, HTTPException, Depends
from pydantic import BaseModel
@ -48,7 +49,8 @@ except KeyError:
logger.warning("Indexing Pipeline not found in the YAML configuration. File Upload API will not be available.")
os.makedirs(FILE_UPLOAD_PATH, exist_ok=True) # create directory for uploading files
# create directory for uploading files
os.makedirs(FILE_UPLOAD_PATH, exist_ok=True)
@as_form
@ -75,9 +77,10 @@ class Response(BaseModel):
@router.post("/file-upload")
def upload_file(
files: List[UploadFile] = File(...),
meta: Optional[str] = Form("null"), # JSON serialized string
fileconverter_params: FileConverterParams = Depends(FileConverterParams.as_form),
preprocessor_params: PreprocessorParams = Depends(PreprocessorParams.as_form),
# JSON serialized string
meta: Optional[str] = Form("null"), # type: ignore
fileconverter_params: FileConverterParams = Depends(FileConverterParams.as_form), # type: ignore
preprocessor_params: PreprocessorParams = Depends(PreprocessorParams.as_form), # type: ignore
):
"""
You can use this endpoint to upload a file for indexing
@ -88,7 +91,7 @@ def upload_file(
file_paths: list = []
file_metas: list = []
meta = json.loads(meta) or {}
meta_form = json.loads(meta) # type: ignore
for file in files:
try:
@ -97,8 +100,8 @@ def upload_file(
shutil.copyfileobj(file.file, buffer)
file_paths.append(file_path)
meta["name"] = file.filename
file_metas.append(meta)
meta_form["name"] = file.filename
file_metas.append(meta_form)
finally:
file.file.close()

View File

@ -44,6 +44,6 @@ def as_form(cls: Type[BaseModel]):
sig = inspect.signature(_as_form)
sig = sig.replace(parameters=new_params)
_as_form.__signature__ = sig
_as_form.__signature__ = sig # type: ignore
setattr(cls, "as_form", _as_form)
return cls

View File

@ -35,7 +35,7 @@ class AnswerSerialized(Answer):
@pydantic_dataclass
class DocumentSerialized(Document):
content: str
embedding: Optional[List[float]]
embedding: Optional[List[float]] # type: ignore
@pydantic_dataclass

View File

@ -3,7 +3,7 @@ import logging
from pathlib import Path
VERSION = None
VERSION = "0.0.0"
try:
VERSION = open(Path(__file__).parent.parent / "VERSION.txt", "r").read()
except Exception as e:

View File

@ -42,7 +42,6 @@ def exclude_no_answer(responses):
return responses
@pytest.mark.elasticsearch
@pytest.fixture(scope="session")
def client() -> TestClient:
os.environ["PIPELINE_YAML_PATH"] = str(
@ -55,7 +54,6 @@ def client() -> TestClient:
client.post(url="/documents/delete_by_filters", data='{"filters": {}}')
@pytest.mark.elasticsearch
@pytest.fixture(scope="session")
def populated_client(client: TestClient) -> TestClient:
client.post(url="/documents/delete_by_filters", data='{"filters": {}}')

View File

@ -154,9 +154,7 @@ colab =
dev =
# Type check
mypy
types-Markdown
types-requests
types-PyYAML
typing_extensions; python_version < '3.8'
# Test
pytest
responses

View File

@ -3,7 +3,7 @@ import logging
from pathlib import Path
VERSION = None
VERSION = "0.0.0"
try:
# After git clone, VERSION.txt is in the root folder
VERSION = open(Path(__file__).parent.parent / "VERSION.txt", "r").read()

View File

@ -1,4 +1,4 @@
from typing import List, Dict, Any, Tuple
from typing import List, Dict, Any, Tuple, Optional
import os
import logging
@ -112,7 +112,7 @@ def upload_doc(file):
return response
def get_backlink(result) -> Tuple[str, str]:
def get_backlink(result) -> Tuple[Optional[str], Optional[str]]:
if result.get("document", None):
doc = result["document"]
if isinstance(doc, dict):