mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-11-03 03:09:28 +00:00
Introduce pylint & other improvements on the CI (#2130)
* Make mypy check also ui and rest_api, fix ui * Remove explicit type packages from extras, mypy now downloads them * Make pylint and mypy run on every file except tests * Rename tasks * Change cache key * Fix mypy errors in rest_api * Normalize python versions to avoid cache misses * Add all exclusions to make pylint pass * Run mypy on rest_api and ui as well * test if installing the package really changes outcome * Comment out installation of packages * Experiment: randomize tests * Add fallback installation steps on cache misses * Remove randomization * Add comment on cache Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
This commit is contained in:
parent
9dc89d2bd2
commit
40328a57b6
135
.github/workflows/linux_ci.yml
vendored
135
.github/workflows/linux_ci.yml
vendored
@ -15,6 +15,42 @@ on:
|
||||
|
||||
jobs:
|
||||
|
||||
type-check:
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/setup-python@v2
|
||||
with:
|
||||
# Mypy can't run properly on 3.7 as it misses support for Literal types.
|
||||
# FIXME once we drop support for 3.7, use the cache.
|
||||
python-version: 3.8
|
||||
- name: Setup mypy
|
||||
run: |
|
||||
# FIXME installing the packages before running mypy raises
|
||||
# a lot of errors which were never detected before!
|
||||
# pip install .
|
||||
# pip install rest_api/
|
||||
# pip install ui/
|
||||
|
||||
# FIXME --install-types does not work properly yet, see https://github.com/python/mypy/issues/10600
|
||||
# Hotfixing by installing type packages explicitly.
|
||||
# Run mypy --install-types haystack locally to ensure the list is still up to date
|
||||
# mypy --install-types --non-interactive .
|
||||
|
||||
pip install mypy pydantic types-Markdown types-PyYAML types-requests types-setuptools types-six types-tabulate types-chardet types-emoji types-protobuf
|
||||
|
||||
- name: Test with mypy
|
||||
run: |
|
||||
echo "=== haystack/ ==="
|
||||
mypy haystack
|
||||
|
||||
echo "=== rest_api/ ==="
|
||||
mypy rest_api --exclude=rest_api/build/ --exclude=rest_api/test/
|
||||
|
||||
echo "=== ui/ ==="
|
||||
mypy ui --exclude=ui/build/ --exclude=ui/test/
|
||||
|
||||
|
||||
build-cache:
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
@ -31,7 +67,7 @@ jobs:
|
||||
with:
|
||||
path: ${{ env.pythonLocation }}
|
||||
# The cache will be rebuild every day and at every change of the dependency files
|
||||
key: linux-${{ env.date }}-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('.github') }}
|
||||
key: linux-${{ env.date }}-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }}
|
||||
|
||||
- name: Install dependencies
|
||||
if: steps.cache-python-env.outputs.cache-hit != 'true'
|
||||
@ -41,17 +77,9 @@ jobs:
|
||||
pip install rest_api/
|
||||
pip install ui/
|
||||
pip install torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cpu.html
|
||||
echo "=== pip freeze ==="
|
||||
pip freeze
|
||||
|
||||
prepare-build:
|
||||
needs: build-cache
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- id: set-matrix
|
||||
run: |
|
||||
echo "::set-output name=matrix::$(find $(find . -type d -name test -not -path "./*env*/*") -type f -name test_*.py | jq -SR . | jq -cs .)"
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
|
||||
code-and-docs-updates:
|
||||
needs: build-cache
|
||||
@ -75,11 +103,24 @@ jobs:
|
||||
uses: actions/cache@v2
|
||||
with:
|
||||
path: ${{ env.pythonLocation }}
|
||||
key: linux-${{ env.date }}-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('.github') }}
|
||||
key: linux-${{ env.date }}-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }}
|
||||
|
||||
- name: Install Dependencies (on cache miss only)
|
||||
# The cache might miss during the execution of an action: there should always be a fallback step to
|
||||
# rebuild it in case it goes missing
|
||||
if: steps.cache.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
pip install --upgrade pip
|
||||
pip install .[test]
|
||||
pip install rest_api/
|
||||
pip install ui/
|
||||
pip install torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cpu.html
|
||||
echo "=== pip freeze ==="
|
||||
pip freeze
|
||||
|
||||
# Apply black on the entire codebase
|
||||
# Apply Black on the entire codebase
|
||||
- name: Blacken
|
||||
run: python3 -m black .
|
||||
run: black .
|
||||
|
||||
# Convert the Jupyter notebooks into markdown tutorials
|
||||
- name: Generate Tutorials
|
||||
@ -120,7 +161,8 @@ jobs:
|
||||
git status
|
||||
git push
|
||||
|
||||
type-check:
|
||||
|
||||
linter:
|
||||
needs: code-and-docs-updates
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
@ -129,23 +171,53 @@ jobs:
|
||||
- run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
|
||||
- uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: 3.8
|
||||
python-version: 3.7
|
||||
|
||||
- name: Cache Python
|
||||
uses: actions/cache@v2
|
||||
with:
|
||||
path: ${{ env.pythonLocation }}
|
||||
key: linux-${{ env.date }}-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('.github') }}
|
||||
key: linux-${{ env.date }}-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }}
|
||||
|
||||
- name: Test with mypy
|
||||
run: mypy haystack
|
||||
- name: Install Dependencies (on cache miss only)
|
||||
# The cache might miss during the execution of an action: there should always be a fallback step to
|
||||
# rebuild it in case it goes missing
|
||||
if: steps.cache.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
pip install --upgrade pip
|
||||
pip install .[test]
|
||||
pip install rest_api/
|
||||
pip install ui/
|
||||
pip install torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cpu.html
|
||||
echo "=== pip freeze ==="
|
||||
pip freeze
|
||||
|
||||
build:
|
||||
needs: prepare-build
|
||||
- name: Linter
|
||||
run: |
|
||||
pylint -ry haystack/
|
||||
pylint -ry rest_api/
|
||||
pylint -ry ui/
|
||||
|
||||
|
||||
prepare-matrix:
|
||||
needs: build-cache
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- id: set-matrix
|
||||
run: |
|
||||
find $(find . -type d -name test -not -path "./*env*/*") -type f -name test_*.py | jq -SR . | jq -cs .
|
||||
echo "::set-output name=matrix::$(find $(find . -type d -name test -not -path "./*env*/*") -type f -name test_*.py | jq -SR . | jq -cs .)"
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
|
||||
|
||||
tests:
|
||||
needs: prepare-matrix
|
||||
runs-on: ubuntu-20.04
|
||||
strategy:
|
||||
matrix:
|
||||
test-path: ${{fromJson(needs.prepare-build.outputs.matrix)}}
|
||||
test-path: ${{fromJson(needs.prepare-matrix.outputs.matrix)}}
|
||||
fail-fast: false
|
||||
|
||||
steps:
|
||||
@ -161,7 +233,7 @@ jobs:
|
||||
uses: actions/cache@v2
|
||||
with:
|
||||
path: ${{ env.pythonLocation }}
|
||||
key: linux-${{ env.date }}-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('.github') }}
|
||||
key: linux-${{ env.date }}-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }}
|
||||
|
||||
- name: Run Elasticsearch
|
||||
run: docker run -d -p 9200:9200 -e "discovery.type=single-node" -e "ES_JAVA_OPTS=-Xms128m -Xmx128m" elasticsearch:7.9.2
|
||||
@ -190,11 +262,26 @@ jobs:
|
||||
- name: Install tesseract
|
||||
run: sudo apt-get install tesseract-ocr libtesseract-dev poppler-utils
|
||||
|
||||
- name: Install Dependencies (on cache miss only)
|
||||
# The cache might miss during the execution of an action: there should always be a fallback step to
|
||||
# rebuild it in case it goes missing
|
||||
if: steps.cache.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
pip install --upgrade pip
|
||||
pip install .[test]
|
||||
pip install rest_api/
|
||||
pip install ui/
|
||||
pip install torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cpu.html
|
||||
echo "=== pip freeze ==="
|
||||
pip freeze
|
||||
|
||||
# Haystack needs to be reinstalled at this stage to make sure the current commit's version is the one getting tested.
|
||||
# The cache can last way longer than a specific action's run, so older Haystack version could be carried over.
|
||||
- name: Reinstall Haystack
|
||||
run: |
|
||||
pip install .[test]
|
||||
pip install rest_api/
|
||||
pip install eager ui/
|
||||
pip install ui/
|
||||
|
||||
- name: Run tests
|
||||
run: pytest -s ${{ matrix.test-path }}
|
||||
|
||||
File diff suppressed because one or more lines are too long
@ -20,7 +20,93 @@ disable = [
|
||||
"fixme",
|
||||
"protected-access",
|
||||
"too-few-public-methods",
|
||||
"raise-missing-from"
|
||||
"raise-missing-from",
|
||||
|
||||
"invalid-name",
|
||||
"logging-fstring-interpolation",
|
||||
"wrong-import-position",
|
||||
"too-many-locals",
|
||||
"duplicate-code",
|
||||
"too-many-arguments",
|
||||
"arguments-differ",
|
||||
"wrong-import-order",
|
||||
"consider-using-f-string",
|
||||
"no-else-return",
|
||||
"unused-variable",
|
||||
"attribute-defined-outside-init",
|
||||
"too-many-instance-attributes",
|
||||
"no-self-use",
|
||||
"super-with-arguments",
|
||||
"anomalous-backslash-in-string",
|
||||
"redefined-builtin",
|
||||
"logging-format-interpolation",
|
||||
"f-string-without-interpolation",
|
||||
"abstract-method",
|
||||
"too-many-branches",
|
||||
"trailing-whitespace",
|
||||
"unspecified-encoding",
|
||||
"unidiomatic-typecheck",
|
||||
"no-name-in-module",
|
||||
"dangerous-default-value",
|
||||
"unused-import",
|
||||
"consider-using-with",
|
||||
"redefined-outer-name",
|
||||
"cyclic-import",
|
||||
"arguments-renamed",
|
||||
"unnecessary-pass",
|
||||
"ungrouped-imports",
|
||||
"broad-except",
|
||||
"unnecessary-comprehension",
|
||||
"subprocess-run-check",
|
||||
"singleton-comparison",
|
||||
"no-else-raise",
|
||||
"import-outside-toplevel",
|
||||
"consider-iterating-dictionary",
|
||||
"too-many-nested-blocks",
|
||||
"undefined-loop-variable",
|
||||
"too-many-statements",
|
||||
"consider-using-in",
|
||||
"bare-except",
|
||||
"too-many-lines",
|
||||
"unexpected-keyword-arg",
|
||||
"simplifiable-if-expression",
|
||||
"use-list-literal",
|
||||
"reimported",
|
||||
"no-else-continue",
|
||||
"deprecated-method",
|
||||
"consider-using-dict-items",
|
||||
"use-a-generator",
|
||||
"simplifiable-if-statement",
|
||||
"import-error",
|
||||
"consider-using-from-import",
|
||||
"useless-object-inheritance",
|
||||
"use-dict-literal",
|
||||
"unsubscriptable-object",
|
||||
"too-many-return-statements",
|
||||
"superfluous-parens",
|
||||
"no-value-for-parameter",
|
||||
"no-else-break",
|
||||
"inconsistent-return-statements",
|
||||
"consider-using-set-comprehension",
|
||||
"c-extension-no-member",
|
||||
"useless-super-delegation",
|
||||
"useless-else-on-loop",
|
||||
"used-before-assignment",
|
||||
"unsupported-membership-test",
|
||||
"unneeded-not",
|
||||
"unnecessary-lambda",
|
||||
"trailing-newlines",
|
||||
"too-many-boolean-expressions",
|
||||
"super-init-not-called",
|
||||
"pointless-string-statement",
|
||||
"non-parent-init-called",
|
||||
"invalid-sequence-index",
|
||||
"import-self",
|
||||
"deprecated-argument",
|
||||
"access-member-before-definition",
|
||||
|
||||
"invalid-envvar-default",
|
||||
"logging-too-many-args",
|
||||
]
|
||||
[tool.pylint.'DESIGN']
|
||||
max-args=7
|
||||
|
||||
@ -42,11 +42,11 @@ def get_openapi_specs() -> dict:
|
||||
"""
|
||||
app = get_application()
|
||||
return get_openapi(
|
||||
title=app.title if app.title else None,
|
||||
version=app.version if app.version else None,
|
||||
openapi_version=app.openapi_version if app.openapi_version else None,
|
||||
description=app.description if app.description else None,
|
||||
routes=app.routes if app.routes else None,
|
||||
title=app.title,
|
||||
version=app.version,
|
||||
openapi_version=app.openapi_version,
|
||||
description=app.description,
|
||||
routes=app.routes,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@ -1,3 +1,5 @@
|
||||
from typing import Dict, Union, Optional
|
||||
|
||||
import json
|
||||
import logging
|
||||
|
||||
@ -40,8 +42,8 @@ def get_feedback():
|
||||
@router.post("/eval-feedback")
|
||||
def get_feedback_metrics(filters: FilterRequest = None):
|
||||
"""
|
||||
This endpoint returns basic accuracy metrics based on user feedback,
|
||||
e.g., the ratio of correct answers or correctly identified documents.
|
||||
This endpoint returns basic accuracy metrics based on user feedback,
|
||||
e.g., the ratio of correct answers or correctly identified documents.
|
||||
You can filter the output by document or label.
|
||||
|
||||
Example:
|
||||
@ -51,13 +53,14 @@ def get_feedback_metrics(filters: FilterRequest = None):
|
||||
"""
|
||||
|
||||
if filters:
|
||||
filters = filters.filters
|
||||
filters["origin"] = ["user-feedback"]
|
||||
filters_content = filters.filters or {}
|
||||
filters_content["origin"] = ["user-feedback"]
|
||||
else:
|
||||
filters = {"origin": ["user-feedback"]}
|
||||
filters_content = {"origin": ["user-feedback"]}
|
||||
|
||||
labels = DOCUMENT_STORE.get_all_labels(filters=filters)
|
||||
labels = DOCUMENT_STORE.get_all_labels(filters=filters_content)
|
||||
|
||||
res: Dict[str, Optional[Union[float, int]]]
|
||||
if len(labels) > 0:
|
||||
answer_feedback = [1 if l.is_correct_answer else 0 for l in labels]
|
||||
doc_feedback = [1 if l.is_correct_document else 0 for l in labels]
|
||||
|
||||
@ -1,10 +1,11 @@
|
||||
from typing import Optional, List, Union
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
from typing import Optional, List
|
||||
|
||||
from fastapi import APIRouter, UploadFile, File, Form, HTTPException, Depends
|
||||
from pydantic import BaseModel
|
||||
@ -48,7 +49,8 @@ except KeyError:
|
||||
logger.warning("Indexing Pipeline not found in the YAML configuration. File Upload API will not be available.")
|
||||
|
||||
|
||||
os.makedirs(FILE_UPLOAD_PATH, exist_ok=True) # create directory for uploading files
|
||||
# create directory for uploading files
|
||||
os.makedirs(FILE_UPLOAD_PATH, exist_ok=True)
|
||||
|
||||
|
||||
@as_form
|
||||
@ -75,9 +77,10 @@ class Response(BaseModel):
|
||||
@router.post("/file-upload")
|
||||
def upload_file(
|
||||
files: List[UploadFile] = File(...),
|
||||
meta: Optional[str] = Form("null"), # JSON serialized string
|
||||
fileconverter_params: FileConverterParams = Depends(FileConverterParams.as_form),
|
||||
preprocessor_params: PreprocessorParams = Depends(PreprocessorParams.as_form),
|
||||
# JSON serialized string
|
||||
meta: Optional[str] = Form("null"), # type: ignore
|
||||
fileconverter_params: FileConverterParams = Depends(FileConverterParams.as_form), # type: ignore
|
||||
preprocessor_params: PreprocessorParams = Depends(PreprocessorParams.as_form), # type: ignore
|
||||
):
|
||||
"""
|
||||
You can use this endpoint to upload a file for indexing
|
||||
@ -88,7 +91,7 @@ def upload_file(
|
||||
|
||||
file_paths: list = []
|
||||
file_metas: list = []
|
||||
meta = json.loads(meta) or {}
|
||||
meta_form = json.loads(meta) # type: ignore
|
||||
|
||||
for file in files:
|
||||
try:
|
||||
@ -97,8 +100,8 @@ def upload_file(
|
||||
shutil.copyfileobj(file.file, buffer)
|
||||
|
||||
file_paths.append(file_path)
|
||||
meta["name"] = file.filename
|
||||
file_metas.append(meta)
|
||||
meta_form["name"] = file.filename
|
||||
file_metas.append(meta_form)
|
||||
finally:
|
||||
file.file.close()
|
||||
|
||||
|
||||
@ -44,6 +44,6 @@ def as_form(cls: Type[BaseModel]):
|
||||
|
||||
sig = inspect.signature(_as_form)
|
||||
sig = sig.replace(parameters=new_params)
|
||||
_as_form.__signature__ = sig
|
||||
_as_form.__signature__ = sig # type: ignore
|
||||
setattr(cls, "as_form", _as_form)
|
||||
return cls
|
||||
|
||||
@ -35,7 +35,7 @@ class AnswerSerialized(Answer):
|
||||
@pydantic_dataclass
|
||||
class DocumentSerialized(Document):
|
||||
content: str
|
||||
embedding: Optional[List[float]]
|
||||
embedding: Optional[List[float]] # type: ignore
|
||||
|
||||
|
||||
@pydantic_dataclass
|
||||
|
||||
@ -3,7 +3,7 @@ import logging
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
VERSION = None
|
||||
VERSION = "0.0.0"
|
||||
try:
|
||||
VERSION = open(Path(__file__).parent.parent / "VERSION.txt", "r").read()
|
||||
except Exception as e:
|
||||
|
||||
@ -42,7 +42,6 @@ def exclude_no_answer(responses):
|
||||
return responses
|
||||
|
||||
|
||||
@pytest.mark.elasticsearch
|
||||
@pytest.fixture(scope="session")
|
||||
def client() -> TestClient:
|
||||
os.environ["PIPELINE_YAML_PATH"] = str(
|
||||
@ -55,7 +54,6 @@ def client() -> TestClient:
|
||||
client.post(url="/documents/delete_by_filters", data='{"filters": {}}')
|
||||
|
||||
|
||||
@pytest.mark.elasticsearch
|
||||
@pytest.fixture(scope="session")
|
||||
def populated_client(client: TestClient) -> TestClient:
|
||||
client.post(url="/documents/delete_by_filters", data='{"filters": {}}')
|
||||
|
||||
@ -154,9 +154,7 @@ colab =
|
||||
dev =
|
||||
# Type check
|
||||
mypy
|
||||
types-Markdown
|
||||
types-requests
|
||||
types-PyYAML
|
||||
typing_extensions; python_version < '3.8'
|
||||
# Test
|
||||
pytest
|
||||
responses
|
||||
|
||||
@ -3,7 +3,7 @@ import logging
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
VERSION = None
|
||||
VERSION = "0.0.0"
|
||||
try:
|
||||
# After git clone, VERSION.txt is in the root folder
|
||||
VERSION = open(Path(__file__).parent.parent / "VERSION.txt", "r").read()
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
from typing import List, Dict, Any, Tuple
|
||||
from typing import List, Dict, Any, Tuple, Optional
|
||||
|
||||
import os
|
||||
import logging
|
||||
@ -112,7 +112,7 @@ def upload_doc(file):
|
||||
return response
|
||||
|
||||
|
||||
def get_backlink(result) -> Tuple[str, str]:
|
||||
def get_backlink(result) -> Tuple[Optional[str], Optional[str]]:
|
||||
if result.get("document", None):
|
||||
doc = result["document"]
|
||||
if isinstance(doc, dict):
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user