From 40328a57b6d5760e51eeb9366838ce2b25f5f2fa Mon Sep 17 00:00:00 2001 From: Sara Zan Date: Wed, 9 Feb 2022 18:27:12 +0100 Subject: [PATCH] Introduce pylint & other improvements on the CI (#2130) * Make mypy check also ui and rest_api, fix ui * Remove explicit type packages from extras, mypy now downloads them * Make pylint and mypy run on every file except tests * Rename tasks * Change cache key * Fix mypy errors in rest_api * Normalize python versions to avoid cache misses * Add all exclusions to make pylint pass * Run mypy on rest_api and ui as well * test if installing the package really changes outcome * Comment out installation of packages * Experiment: randomize tests * Add fallback installation steps on cache misses * Remove randomization * Add comment on cache Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- .github/workflows/linux_ci.yml | 135 ++++++++++++++++++++++++----- docs/_src/api/openapi/openapi.json | 2 +- pyproject.toml | 88 ++++++++++++++++++- rest_api/application.py | 10 +-- rest_api/controller/feedback.py | 15 ++-- rest_api/controller/file_upload.py | 19 ++-- rest_api/controller/utils.py | 2 +- rest_api/schema.py | 2 +- rest_api/setup.py | 2 +- rest_api/test/test_rest_api.py | 2 - setup.cfg | 4 +- ui/setup.py | 2 +- ui/utils.py | 4 +- 13 files changed, 231 insertions(+), 56 deletions(-) diff --git a/.github/workflows/linux_ci.yml b/.github/workflows/linux_ci.yml index 1405bf107..a5d726775 100644 --- a/.github/workflows/linux_ci.yml +++ b/.github/workflows/linux_ci.yml @@ -15,6 +15,42 @@ on: jobs: + type-check: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + with: + # Mypy can't run properly on 3.7 as it misses support for Literal types. + # FIXME once we drop support for 3.7, use the cache. + python-version: 3.8 + - name: Setup mypy + run: | + # FIXME installing the packages before running mypy raises + # a lot of errors which were never detected before! + # pip install . + # pip install rest_api/ + # pip install ui/ + + # FIXME --install-types does not work properly yet, see https://github.com/python/mypy/issues/10600 + # Hotfixing by installing type packages explicitly. + # Run mypy --install-types haystack locally to ensure the list is still up to date + # mypy --install-types --non-interactive . + + pip install mypy pydantic types-Markdown types-PyYAML types-requests types-setuptools types-six types-tabulate types-chardet types-emoji types-protobuf + + - name: Test with mypy + run: | + echo "=== haystack/ ===" + mypy haystack + + echo "=== rest_api/ ===" + mypy rest_api --exclude=rest_api/build/ --exclude=rest_api/test/ + + echo "=== ui/ ===" + mypy ui --exclude=ui/build/ --exclude=ui/test/ + + build-cache: runs-on: ubuntu-20.04 steps: @@ -31,7 +67,7 @@ jobs: with: path: ${{ env.pythonLocation }} # The cache will be rebuild every day and at every change of the dependency files - key: linux-${{ env.date }}-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('.github') }} + key: linux-${{ env.date }}-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }} - name: Install dependencies if: steps.cache-python-env.outputs.cache-hit != 'true' @@ -41,17 +77,9 @@ jobs: pip install rest_api/ pip install ui/ pip install torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cpu.html + echo "=== pip freeze ===" + pip freeze - prepare-build: - needs: build-cache - runs-on: ubuntu-20.04 - steps: - - uses: actions/checkout@v2 - - id: set-matrix - run: | - echo "::set-output name=matrix::$(find $(find . -type d -name test -not -path "./*env*/*") -type f -name test_*.py | jq -SR . | jq -cs .)" - outputs: - matrix: ${{ steps.set-matrix.outputs.matrix }} code-and-docs-updates: needs: build-cache @@ -75,11 +103,24 @@ jobs: uses: actions/cache@v2 with: path: ${{ env.pythonLocation }} - key: linux-${{ env.date }}-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('.github') }} + key: linux-${{ env.date }}-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }} + + - name: Install Dependencies (on cache miss only) + # The cache might miss during the execution of an action: there should always be a fallback step to + # rebuild it in case it goes missing + if: steps.cache.outputs.cache-hit != 'true' + run: | + pip install --upgrade pip + pip install .[test] + pip install rest_api/ + pip install ui/ + pip install torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cpu.html + echo "=== pip freeze ===" + pip freeze - # Apply black on the entire codebase + # Apply Black on the entire codebase - name: Blacken - run: python3 -m black . + run: black . # Convert the Jupyter notebooks into markdown tutorials - name: Generate Tutorials @@ -120,7 +161,8 @@ jobs: git status git push - type-check: + + linter: needs: code-and-docs-updates runs-on: ubuntu-20.04 steps: @@ -129,23 +171,53 @@ jobs: - run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_ENV - uses: actions/setup-python@v2 with: - python-version: 3.8 + python-version: 3.7 - name: Cache Python uses: actions/cache@v2 with: path: ${{ env.pythonLocation }} - key: linux-${{ env.date }}-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('.github') }} + key: linux-${{ env.date }}-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }} - - name: Test with mypy - run: mypy haystack + - name: Install Dependencies (on cache miss only) + # The cache might miss during the execution of an action: there should always be a fallback step to + # rebuild it in case it goes missing + if: steps.cache.outputs.cache-hit != 'true' + run: | + pip install --upgrade pip + pip install .[test] + pip install rest_api/ + pip install ui/ + pip install torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cpu.html + echo "=== pip freeze ===" + pip freeze - build: - needs: prepare-build + - name: Linter + run: | + pylint -ry haystack/ + pylint -ry rest_api/ + pylint -ry ui/ + + + prepare-matrix: + needs: build-cache + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v2 + - id: set-matrix + run: | + find $(find . -type d -name test -not -path "./*env*/*") -type f -name test_*.py | jq -SR . | jq -cs . + echo "::set-output name=matrix::$(find $(find . -type d -name test -not -path "./*env*/*") -type f -name test_*.py | jq -SR . | jq -cs .)" + outputs: + matrix: ${{ steps.set-matrix.outputs.matrix }} + + + tests: + needs: prepare-matrix runs-on: ubuntu-20.04 strategy: matrix: - test-path: ${{fromJson(needs.prepare-build.outputs.matrix)}} + test-path: ${{fromJson(needs.prepare-matrix.outputs.matrix)}} fail-fast: false steps: @@ -161,7 +233,7 @@ jobs: uses: actions/cache@v2 with: path: ${{ env.pythonLocation }} - key: linux-${{ env.date }}-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('.github') }} + key: linux-${{ env.date }}-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }} - name: Run Elasticsearch run: docker run -d -p 9200:9200 -e "discovery.type=single-node" -e "ES_JAVA_OPTS=-Xms128m -Xmx128m" elasticsearch:7.9.2 @@ -190,11 +262,26 @@ jobs: - name: Install tesseract run: sudo apt-get install tesseract-ocr libtesseract-dev poppler-utils + - name: Install Dependencies (on cache miss only) + # The cache might miss during the execution of an action: there should always be a fallback step to + # rebuild it in case it goes missing + if: steps.cache.outputs.cache-hit != 'true' + run: | + pip install --upgrade pip + pip install .[test] + pip install rest_api/ + pip install ui/ + pip install torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cpu.html + echo "=== pip freeze ===" + pip freeze + + # Haystack needs to be reinstalled at this stage to make sure the current commit's version is the one getting tested. + # The cache can last way longer than a specific action's run, so older Haystack version could be carried over. - name: Reinstall Haystack run: | pip install .[test] pip install rest_api/ - pip install eager ui/ + pip install ui/ - name: Run tests run: pytest -s ${{ matrix.test-path }} diff --git a/docs/_src/api/openapi/openapi.json b/docs/_src/api/openapi/openapi.json index ee901106a..b8ab396b1 100644 --- a/docs/_src/api/openapi/openapi.json +++ b/docs/_src/api/openapi/openapi.json @@ -1 +1 @@ -{"openapi": "3.0.2", "info": {"title": "Haystack-API", "version": "1.0.0"}, "paths": {"/initialized": {"get": {"tags": ["search"], "summary": "Check Status", "description": "This endpoint can be used during startup to understand if the\nserver is ready to take any requests, or is still loading.\n\nThe recommended approach is to call this endpoint with a short timeout,\nlike 500ms, and in case of no reply, consider the server busy.", "operationId": "check_status_initialized_get", "responses": {"200": {"description": "Successful Response", "content": {"application/json": {"schema": {}}}}}}}, "/hs_version": {"get": {"tags": ["search"], "summary": "Haystack Version", "description": "Get the running Haystack version.", "operationId": "haystack_version_hs_version_get", "responses": {"200": {"description": "Successful Response", "content": {"application/json": {"schema": {}}}}}}}, "/query": {"post": {"tags": ["search"], "summary": "Query", "description": "This endpoint receives the question as a string and allows the requester to set\nadditional parameters that will be passed on to the Haystack pipeline.", "operationId": "query_query_post", "requestBody": {"content": {"application/json": {"schema": {"$ref": "#/components/schemas/QueryRequest"}}}, "required": true}, "responses": {"200": {"description": "Successful Response", "content": {"application/json": {"schema": {"$ref": "#/components/schemas/QueryResponse"}}}}, "422": {"description": "Validation Error", "content": {"application/json": {"schema": {"$ref": "#/components/schemas/HTTPValidationError"}}}}}}}, "/feedback": {"get": {"tags": ["feedback"], "summary": "Get Feedback", "description": "This endpoint allows the API user to retrieve all the\nfeedback that has been sumbitted through the\n`POST /feedback` endpoint", "operationId": "get_feedback_feedback_get", "responses": {"200": {"description": "Successful Response", "content": {"application/json": {"schema": {}}}}}}, "post": {"tags": ["feedback"], "summary": "Post Feedback", "description": "This endpoint allows the API user to submit feedback on\nan answer for a particular query. For example, the user\ncan send feedback on whether the answer was correct and\nwhether the right snippet was identified as the answer.\nInformation submitted through this endpoint is used to\ntrain the underlying QA model.", "operationId": "post_feedback_feedback_post", "requestBody": {"content": {"application/json": {"schema": {"$ref": "#/components/schemas/LabelSerialized"}}}, "required": true}, "responses": {"200": {"description": "Successful Response", "content": {"application/json": {"schema": {}}}}, "422": {"description": "Validation Error", "content": {"application/json": {"schema": {"$ref": "#/components/schemas/HTTPValidationError"}}}}}}}, "/eval-feedback": {"post": {"tags": ["feedback"], "summary": "Get Feedback Metrics", "description": "This endpoint returns basic accuracy metrics based on user feedback, \ne.g., the ratio of correct answers or correctly identified documents. \nYou can filter the output by document or label.\n\nExample:\n`curl --location --request POST 'http://127.0.0.1:8000/eval-doc-qa-feedback' --header 'Content-Type: application/json' --data-raw '{ \"filters\": {\"document_id\": [\"XRR3xnEBCYVTkbTystOB\"]} }'`", "operationId": "get_feedback_metrics_eval_feedback_post", "requestBody": {"content": {"application/json": {"schema": {"$ref": "#/components/schemas/FilterRequest"}}}}, "responses": {"200": {"description": "Successful Response", "content": {"application/json": {"schema": {}}}}, "422": {"description": "Validation Error", "content": {"application/json": {"schema": {"$ref": "#/components/schemas/HTTPValidationError"}}}}}}}, "/export-feedback": {"get": {"tags": ["feedback"], "summary": "Export Feedback", "description": "This endpoint returns JSON output in the SQuAD format for question/answer pairs\nthat were marked as \"relevant\" by user feedback through the `POST /feedback` endpoint.\n\nThe context_size param can be used to limit response size for large documents.", "operationId": "export_feedback_export_feedback_get", "parameters": [{"required": false, "schema": {"title": "Context Size", "type": "integer", "default": 100000}, "name": "context_size", "in": "query"}, {"required": false, "schema": {"title": "Full Document Context", "type": "boolean", "default": true}, "name": "full_document_context", "in": "query"}, {"required": false, "schema": {"title": "Only Positive Labels", "type": "boolean", "default": false}, "name": "only_positive_labels", "in": "query"}], "responses": {"200": {"description": "Successful Response", "content": {"application/json": {"schema": {}}}}, "422": {"description": "Validation Error", "content": {"application/json": {"schema": {"$ref": "#/components/schemas/HTTPValidationError"}}}}}}}, "/file-upload": {"post": {"tags": ["file-upload"], "summary": "Upload File", "description": "You can use this endpoint to upload a file for indexing\n(see [http://localhost:3000/guides/rest-api#indexing-documents-in-the-haystack-rest-api-document-store]).", "operationId": "upload_file_file_upload_post", "requestBody": {"content": {"multipart/form-data": {"schema": {"$ref": "#/components/schemas/Body_upload_file_file_upload_post"}}}, "required": true}, "responses": {"200": {"description": "Successful Response", "content": {"application/json": {"schema": {}}}}, "422": {"description": "Validation Error", "content": {"application/json": {"schema": {"$ref": "#/components/schemas/HTTPValidationError"}}}}}}}, "/documents/get_by_filters": {"post": {"tags": ["document"], "summary": "Get Documents", "description": "This endpoint allows you to retrieve documents contained in your document store.\nYou can filter the documents to delete by metadata (like the document's name),\nor provide an empty JSON object to clear the document store.\n\nExample of filters:\n`'{\"filters\": {{\"name\": [\"some\", \"more\"], \"category\": [\"only_one\"]}}'`\n\nTo get all documents you should provide an empty dict, like:\n`'{\"filters\": {}}'`", "operationId": "get_documents_documents_get_by_filters_post", "requestBody": {"content": {"application/json": {"schema": {"$ref": "#/components/schemas/FilterRequest"}}}, "required": true}, "responses": {"200": {"description": "Successful Response", "content": {"application/json": {"schema": {"title": "Response Get Documents Documents Get By Filters Post", "type": "array", "items": {"$ref": "#/components/schemas/DocumentSerialized"}}}}}, "422": {"description": "Validation Error", "content": {"application/json": {"schema": {"$ref": "#/components/schemas/HTTPValidationError"}}}}}}}, "/documents/delete_by_filters": {"post": {"tags": ["document"], "summary": "Delete Documents", "description": "This endpoint allows you to delete documents contained in your document store.\nYou can filter the documents to delete by metadata (like the document's name),\nor provide an empty JSON object to clear the document store.\n\nExample of filters:\n`'{\"filters\": {{\"name\": [\"some\", \"more\"], \"category\": [\"only_one\"]}}'`\n\nTo get all documents you should provide an empty dict, like:\n`'{\"filters\": {}}'`", "operationId": "delete_documents_documents_delete_by_filters_post", "requestBody": {"content": {"application/json": {"schema": {"$ref": "#/components/schemas/FilterRequest"}}}, "required": true}, "responses": {"200": {"description": "Successful Response", "content": {"application/json": {"schema": {"title": "Response Delete Documents Documents Delete By Filters Post", "type": "boolean"}}}}, "422": {"description": "Validation Error", "content": {"application/json": {"schema": {"$ref": "#/components/schemas/HTTPValidationError"}}}}}}}}, "components": {"schemas": {"AnswerSerialized": {"title": "AnswerSerialized", "required": ["answer"], "type": "object", "properties": {"answer": {"title": "Answer", "type": "string"}, "type": {"title": "Type", "enum": ["generative", "extractive", "other"], "type": "string", "default": "extractive"}, "score": {"title": "Score", "type": "number"}, "context": {"title": "Context", "type": "string"}, "offsets_in_document": {"title": "Offsets In Document", "type": "array", "items": {"$ref": "#/components/schemas/Span"}}, "offsets_in_context": {"title": "Offsets In Context", "type": "array", "items": {"$ref": "#/components/schemas/Span"}}, "document_id": {"title": "Document Id", "type": "string"}, "meta": {"title": "Meta", "type": "object"}}}, "Body_upload_file_file_upload_post": {"title": "Body_upload_file_file_upload_post", "required": ["files"], "type": "object", "properties": {"files": {"title": "Files", "type": "array", "items": {"type": "string", "format": "binary"}}, "meta": {"title": "Meta", "type": "string", "default": "null"}, "remove_numeric_tables": {"title": "Remove Numeric Tables"}, "valid_languages": {"title": "Valid Languages"}, "clean_whitespace": {"title": "Clean Whitespace"}, "clean_empty_lines": {"title": "Clean Empty Lines"}, "clean_header_footer": {"title": "Clean Header Footer"}, "split_by": {"title": "Split By"}, "split_length": {"title": "Split Length"}, "split_overlap": {"title": "Split Overlap"}, "split_respect_sentence_boundary": {"title": "Split Respect Sentence Boundary"}}}, "DocumentSerialized": {"title": "DocumentSerialized", "required": ["content", "content_type", "id", "meta"], "type": "object", "properties": {"content": {"title": "Content", "type": "string"}, "content_type": {"title": "Content Type", "enum": ["text", "table", "image"], "type": "string"}, "id": {"title": "Id", "type": "string"}, "meta": {"title": "Meta", "type": "object"}, "score": {"title": "Score", "type": "number"}, "embedding": {"title": "Embedding", "type": "array", "items": {"type": "number"}}, "id_hash_keys": {"title": "Id Hash Keys", "type": "array", "items": {"type": "string"}}}}, "FilterRequest": {"title": "FilterRequest", "type": "object", "properties": {"filters": {"title": "Filters", "type": "object", "additionalProperties": {"anyOf": [{"type": "string"}, {"type": "array", "items": {"type": "string"}}]}}}}, "HTTPValidationError": {"title": "HTTPValidationError", "type": "object", "properties": {"detail": {"title": "Detail", "type": "array", "items": {"$ref": "#/components/schemas/ValidationError"}}}}, "LabelSerialized": {"title": "LabelSerialized", "required": ["id", "query", "document", "is_correct_answer", "is_correct_document", "origin"], "type": "object", "properties": {"id": {"title": "Id", "type": "string"}, "query": {"title": "Query", "type": "string"}, "document": {"$ref": "#/components/schemas/DocumentSerialized"}, "is_correct_answer": {"title": "Is Correct Answer", "type": "boolean"}, "is_correct_document": {"title": "Is Correct Document", "type": "boolean"}, "origin": {"title": "Origin", "enum": ["user-feedback", "gold-label"], "type": "string"}, "answer": {"$ref": "#/components/schemas/AnswerSerialized"}, "no_answer": {"title": "No Answer", "type": "boolean"}, "pipeline_id": {"title": "Pipeline Id", "type": "string"}, "created_at": {"title": "Created At", "type": "string"}, "updated_at": {"title": "Updated At", "type": "string"}, "meta": {"title": "Meta", "type": "object"}, "filters": {"title": "Filters", "type": "object"}}}, "QueryRequest": {"title": "QueryRequest", "required": ["query"], "type": "object", "properties": {"query": {"title": "Query", "type": "string"}, "params": {"title": "Params", "type": "object"}, "debug": {"title": "Debug", "type": "boolean", "default": false}}, "additionalProperties": false}, "QueryResponse": {"title": "QueryResponse", "required": ["query", "answers"], "type": "object", "properties": {"query": {"title": "Query", "type": "string"}, "answers": {"title": "Answers", "type": "array", "items": {"$ref": "#/components/schemas/AnswerSerialized"}}, "documents": {"title": "Documents", "type": "array", "items": {"$ref": "#/components/schemas/DocumentSerialized"}}, "_debug": {"title": " Debug", "type": "object"}}}, "Span": {"title": "Span", "required": ["start", "end"], "type": "object", "properties": {"start": {"title": "Start", "type": "integer"}, "end": {"title": "End", "type": "integer"}}}, "ValidationError": {"title": "ValidationError", "required": ["loc", "msg", "type"], "type": "object", "properties": {"loc": {"title": "Location", "type": "array", "items": {"type": "string"}}, "msg": {"title": "Message", "type": "string"}, "type": {"title": "Error Type", "type": "string"}}}}}} \ No newline at end of file +{"openapi": "3.0.2", "info": {"title": "Haystack-API", "version": "1.0.0"}, "paths": {"/initialized": {"get": {"tags": ["search"], "summary": "Check Status", "description": "This endpoint can be used during startup to understand if the\nserver is ready to take any requests, or is still loading.\n\nThe recommended approach is to call this endpoint with a short timeout,\nlike 500ms, and in case of no reply, consider the server busy.", "operationId": "check_status_initialized_get", "responses": {"200": {"description": "Successful Response", "content": {"application/json": {"schema": {}}}}}}}, "/hs_version": {"get": {"tags": ["search"], "summary": "Haystack Version", "description": "Get the running Haystack version.", "operationId": "haystack_version_hs_version_get", "responses": {"200": {"description": "Successful Response", "content": {"application/json": {"schema": {}}}}}}}, "/query": {"post": {"tags": ["search"], "summary": "Query", "description": "This endpoint receives the question as a string and allows the requester to set\nadditional parameters that will be passed on to the Haystack pipeline.", "operationId": "query_query_post", "requestBody": {"content": {"application/json": {"schema": {"$ref": "#/components/schemas/QueryRequest"}}}, "required": true}, "responses": {"200": {"description": "Successful Response", "content": {"application/json": {"schema": {"$ref": "#/components/schemas/QueryResponse"}}}}, "422": {"description": "Validation Error", "content": {"application/json": {"schema": {"$ref": "#/components/schemas/HTTPValidationError"}}}}}}}, "/feedback": {"get": {"tags": ["feedback"], "summary": "Get Feedback", "description": "This endpoint allows the API user to retrieve all the\nfeedback that has been sumbitted through the\n`POST /feedback` endpoint", "operationId": "get_feedback_feedback_get", "responses": {"200": {"description": "Successful Response", "content": {"application/json": {"schema": {}}}}}}, "post": {"tags": ["feedback"], "summary": "Post Feedback", "description": "This endpoint allows the API user to submit feedback on\nan answer for a particular query. For example, the user\ncan send feedback on whether the answer was correct and\nwhether the right snippet was identified as the answer.\nInformation submitted through this endpoint is used to\ntrain the underlying QA model.", "operationId": "post_feedback_feedback_post", "requestBody": {"content": {"application/json": {"schema": {"$ref": "#/components/schemas/LabelSerialized"}}}, "required": true}, "responses": {"200": {"description": "Successful Response", "content": {"application/json": {"schema": {}}}}, "422": {"description": "Validation Error", "content": {"application/json": {"schema": {"$ref": "#/components/schemas/HTTPValidationError"}}}}}}}, "/eval-feedback": {"post": {"tags": ["feedback"], "summary": "Get Feedback Metrics", "description": "This endpoint returns basic accuracy metrics based on user feedback,\ne.g., the ratio of correct answers or correctly identified documents.\nYou can filter the output by document or label.\n\nExample:\n`curl --location --request POST 'http://127.0.0.1:8000/eval-doc-qa-feedback' --header 'Content-Type: application/json' --data-raw '{ \"filters\": {\"document_id\": [\"XRR3xnEBCYVTkbTystOB\"]} }'`", "operationId": "get_feedback_metrics_eval_feedback_post", "requestBody": {"content": {"application/json": {"schema": {"$ref": "#/components/schemas/FilterRequest"}}}}, "responses": {"200": {"description": "Successful Response", "content": {"application/json": {"schema": {}}}}, "422": {"description": "Validation Error", "content": {"application/json": {"schema": {"$ref": "#/components/schemas/HTTPValidationError"}}}}}}}, "/export-feedback": {"get": {"tags": ["feedback"], "summary": "Export Feedback", "description": "This endpoint returns JSON output in the SQuAD format for question/answer pairs\nthat were marked as \"relevant\" by user feedback through the `POST /feedback` endpoint.\n\nThe context_size param can be used to limit response size for large documents.", "operationId": "export_feedback_export_feedback_get", "parameters": [{"required": false, "schema": {"title": "Context Size", "type": "integer", "default": 100000}, "name": "context_size", "in": "query"}, {"required": false, "schema": {"title": "Full Document Context", "type": "boolean", "default": true}, "name": "full_document_context", "in": "query"}, {"required": false, "schema": {"title": "Only Positive Labels", "type": "boolean", "default": false}, "name": "only_positive_labels", "in": "query"}], "responses": {"200": {"description": "Successful Response", "content": {"application/json": {"schema": {}}}}, "422": {"description": "Validation Error", "content": {"application/json": {"schema": {"$ref": "#/components/schemas/HTTPValidationError"}}}}}}}, "/file-upload": {"post": {"tags": ["file-upload"], "summary": "Upload File", "description": "You can use this endpoint to upload a file for indexing\n(see [http://localhost:3000/guides/rest-api#indexing-documents-in-the-haystack-rest-api-document-store]).", "operationId": "upload_file_file_upload_post", "requestBody": {"content": {"multipart/form-data": {"schema": {"$ref": "#/components/schemas/Body_upload_file_file_upload_post"}}}, "required": true}, "responses": {"200": {"description": "Successful Response", "content": {"application/json": {"schema": {}}}}, "422": {"description": "Validation Error", "content": {"application/json": {"schema": {"$ref": "#/components/schemas/HTTPValidationError"}}}}}}}, "/documents/get_by_filters": {"post": {"tags": ["document"], "summary": "Get Documents", "description": "This endpoint allows you to retrieve documents contained in your document store.\nYou can filter the documents to delete by metadata (like the document's name),\nor provide an empty JSON object to clear the document store.\n\nExample of filters:\n`'{\"filters\": {{\"name\": [\"some\", \"more\"], \"category\": [\"only_one\"]}}'`\n\nTo get all documents you should provide an empty dict, like:\n`'{\"filters\": {}}'`", "operationId": "get_documents_documents_get_by_filters_post", "requestBody": {"content": {"application/json": {"schema": {"$ref": "#/components/schemas/FilterRequest"}}}, "required": true}, "responses": {"200": {"description": "Successful Response", "content": {"application/json": {"schema": {"title": "Response Get Documents Documents Get By Filters Post", "type": "array", "items": {"$ref": "#/components/schemas/DocumentSerialized"}}}}}, "422": {"description": "Validation Error", "content": {"application/json": {"schema": {"$ref": "#/components/schemas/HTTPValidationError"}}}}}}}, "/documents/delete_by_filters": {"post": {"tags": ["document"], "summary": "Delete Documents", "description": "This endpoint allows you to delete documents contained in your document store.\nYou can filter the documents to delete by metadata (like the document's name),\nor provide an empty JSON object to clear the document store.\n\nExample of filters:\n`'{\"filters\": {{\"name\": [\"some\", \"more\"], \"category\": [\"only_one\"]}}'`\n\nTo get all documents you should provide an empty dict, like:\n`'{\"filters\": {}}'`", "operationId": "delete_documents_documents_delete_by_filters_post", "requestBody": {"content": {"application/json": {"schema": {"$ref": "#/components/schemas/FilterRequest"}}}, "required": true}, "responses": {"200": {"description": "Successful Response", "content": {"application/json": {"schema": {"title": "Response Delete Documents Documents Delete By Filters Post", "type": "boolean"}}}}, "422": {"description": "Validation Error", "content": {"application/json": {"schema": {"$ref": "#/components/schemas/HTTPValidationError"}}}}}}}}, "components": {"schemas": {"AnswerSerialized": {"title": "AnswerSerialized", "required": ["answer"], "type": "object", "properties": {"answer": {"title": "Answer", "type": "string"}, "type": {"title": "Type", "enum": ["generative", "extractive", "other"], "type": "string", "default": "extractive"}, "score": {"title": "Score", "type": "number"}, "context": {"title": "Context", "type": "string"}, "offsets_in_document": {"title": "Offsets In Document", "type": "array", "items": {"$ref": "#/components/schemas/Span"}}, "offsets_in_context": {"title": "Offsets In Context", "type": "array", "items": {"$ref": "#/components/schemas/Span"}}, "document_id": {"title": "Document Id", "type": "string"}, "meta": {"title": "Meta", "type": "object"}}}, "Body_upload_file_file_upload_post": {"title": "Body_upload_file_file_upload_post", "required": ["files"], "type": "object", "properties": {"files": {"title": "Files", "type": "array", "items": {"type": "string", "format": "binary"}}, "meta": {"title": "Meta", "type": "string", "default": "null"}, "remove_numeric_tables": {"title": "Remove Numeric Tables"}, "valid_languages": {"title": "Valid Languages"}, "clean_whitespace": {"title": "Clean Whitespace"}, "clean_empty_lines": {"title": "Clean Empty Lines"}, "clean_header_footer": {"title": "Clean Header Footer"}, "split_by": {"title": "Split By"}, "split_length": {"title": "Split Length"}, "split_overlap": {"title": "Split Overlap"}, "split_respect_sentence_boundary": {"title": "Split Respect Sentence Boundary"}}}, "DocumentSerialized": {"title": "DocumentSerialized", "required": ["content", "content_type", "id", "meta"], "type": "object", "properties": {"content": {"title": "Content", "type": "string"}, "content_type": {"title": "Content Type", "enum": ["text", "table", "image"], "type": "string"}, "id": {"title": "Id", "type": "string"}, "meta": {"title": "Meta", "type": "object"}, "score": {"title": "Score", "type": "number"}, "embedding": {"title": "Embedding", "type": "array", "items": {"type": "number"}}, "id_hash_keys": {"title": "Id Hash Keys", "type": "array", "items": {"type": "string"}}}}, "FilterRequest": {"title": "FilterRequest", "type": "object", "properties": {"filters": {"title": "Filters", "type": "object", "additionalProperties": {"anyOf": [{"type": "string"}, {"type": "array", "items": {"type": "string"}}]}}}}, "HTTPValidationError": {"title": "HTTPValidationError", "type": "object", "properties": {"detail": {"title": "Detail", "type": "array", "items": {"$ref": "#/components/schemas/ValidationError"}}}}, "LabelSerialized": {"title": "LabelSerialized", "required": ["id", "query", "document", "is_correct_answer", "is_correct_document", "origin"], "type": "object", "properties": {"id": {"title": "Id", "type": "string"}, "query": {"title": "Query", "type": "string"}, "document": {"$ref": "#/components/schemas/DocumentSerialized"}, "is_correct_answer": {"title": "Is Correct Answer", "type": "boolean"}, "is_correct_document": {"title": "Is Correct Document", "type": "boolean"}, "origin": {"title": "Origin", "enum": ["user-feedback", "gold-label"], "type": "string"}, "answer": {"$ref": "#/components/schemas/AnswerSerialized"}, "no_answer": {"title": "No Answer", "type": "boolean"}, "pipeline_id": {"title": "Pipeline Id", "type": "string"}, "created_at": {"title": "Created At", "type": "string"}, "updated_at": {"title": "Updated At", "type": "string"}, "meta": {"title": "Meta", "type": "object"}, "filters": {"title": "Filters", "type": "object"}}}, "QueryRequest": {"title": "QueryRequest", "required": ["query"], "type": "object", "properties": {"query": {"title": "Query", "type": "string"}, "params": {"title": "Params", "type": "object"}, "debug": {"title": "Debug", "type": "boolean", "default": false}}, "additionalProperties": false}, "QueryResponse": {"title": "QueryResponse", "required": ["query", "answers"], "type": "object", "properties": {"query": {"title": "Query", "type": "string"}, "answers": {"title": "Answers", "type": "array", "items": {"$ref": "#/components/schemas/AnswerSerialized"}}, "documents": {"title": "Documents", "type": "array", "items": {"$ref": "#/components/schemas/DocumentSerialized"}}, "_debug": {"title": " Debug", "type": "object"}}}, "Span": {"title": "Span", "required": ["start", "end"], "type": "object", "properties": {"start": {"title": "Start", "type": "integer"}, "end": {"title": "End", "type": "integer"}}}, "ValidationError": {"title": "ValidationError", "required": ["loc", "msg", "type"], "type": "object", "properties": {"loc": {"title": "Location", "type": "array", "items": {"type": "string"}}, "msg": {"title": "Message", "type": "string"}, "type": {"title": "Error Type", "type": "string"}}}}}} \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 887862266..4d58f97ab 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,93 @@ disable = [ "fixme", "protected-access", "too-few-public-methods", - "raise-missing-from" + "raise-missing-from", + + "invalid-name", + "logging-fstring-interpolation", + "wrong-import-position", + "too-many-locals", + "duplicate-code", + "too-many-arguments", + "arguments-differ", + "wrong-import-order", + "consider-using-f-string", + "no-else-return", + "unused-variable", + "attribute-defined-outside-init", + "too-many-instance-attributes", + "no-self-use", + "super-with-arguments", + "anomalous-backslash-in-string", + "redefined-builtin", + "logging-format-interpolation", + "f-string-without-interpolation", + "abstract-method", + "too-many-branches", + "trailing-whitespace", + "unspecified-encoding", + "unidiomatic-typecheck", + "no-name-in-module", + "dangerous-default-value", + "unused-import", + "consider-using-with", + "redefined-outer-name", + "cyclic-import", + "arguments-renamed", + "unnecessary-pass", + "ungrouped-imports", + "broad-except", + "unnecessary-comprehension", + "subprocess-run-check", + "singleton-comparison", + "no-else-raise", + "import-outside-toplevel", + "consider-iterating-dictionary", + "too-many-nested-blocks", + "undefined-loop-variable", + "too-many-statements", + "consider-using-in", + "bare-except", + "too-many-lines", + "unexpected-keyword-arg", + "simplifiable-if-expression", + "use-list-literal", + "reimported", + "no-else-continue", + "deprecated-method", + "consider-using-dict-items", + "use-a-generator", + "simplifiable-if-statement", + "import-error", + "consider-using-from-import", + "useless-object-inheritance", + "use-dict-literal", + "unsubscriptable-object", + "too-many-return-statements", + "superfluous-parens", + "no-value-for-parameter", + "no-else-break", + "inconsistent-return-statements", + "consider-using-set-comprehension", + "c-extension-no-member", + "useless-super-delegation", + "useless-else-on-loop", + "used-before-assignment", + "unsupported-membership-test", + "unneeded-not", + "unnecessary-lambda", + "trailing-newlines", + "too-many-boolean-expressions", + "super-init-not-called", + "pointless-string-statement", + "non-parent-init-called", + "invalid-sequence-index", + "import-self", + "deprecated-argument", + "access-member-before-definition", + + "invalid-envvar-default", + "logging-too-many-args", ] [tool.pylint.'DESIGN'] max-args=7 diff --git a/rest_api/application.py b/rest_api/application.py index 6eb0a20e5..4cc2267b5 100644 --- a/rest_api/application.py +++ b/rest_api/application.py @@ -42,11 +42,11 @@ def get_openapi_specs() -> dict: """ app = get_application() return get_openapi( - title=app.title if app.title else None, - version=app.version if app.version else None, - openapi_version=app.openapi_version if app.openapi_version else None, - description=app.description if app.description else None, - routes=app.routes if app.routes else None, + title=app.title, + version=app.version, + openapi_version=app.openapi_version, + description=app.description, + routes=app.routes, ) diff --git a/rest_api/controller/feedback.py b/rest_api/controller/feedback.py index f54a2e126..b2654b02b 100644 --- a/rest_api/controller/feedback.py +++ b/rest_api/controller/feedback.py @@ -1,3 +1,5 @@ +from typing import Dict, Union, Optional + import json import logging @@ -40,8 +42,8 @@ def get_feedback(): @router.post("/eval-feedback") def get_feedback_metrics(filters: FilterRequest = None): """ - This endpoint returns basic accuracy metrics based on user feedback, - e.g., the ratio of correct answers or correctly identified documents. + This endpoint returns basic accuracy metrics based on user feedback, + e.g., the ratio of correct answers or correctly identified documents. You can filter the output by document or label. Example: @@ -51,13 +53,14 @@ def get_feedback_metrics(filters: FilterRequest = None): """ if filters: - filters = filters.filters - filters["origin"] = ["user-feedback"] + filters_content = filters.filters or {} + filters_content["origin"] = ["user-feedback"] else: - filters = {"origin": ["user-feedback"]} + filters_content = {"origin": ["user-feedback"]} - labels = DOCUMENT_STORE.get_all_labels(filters=filters) + labels = DOCUMENT_STORE.get_all_labels(filters=filters_content) + res: Dict[str, Optional[Union[float, int]]] if len(labels) > 0: answer_feedback = [1 if l.is_correct_answer else 0 for l in labels] doc_feedback = [1 if l.is_correct_document else 0 for l in labels] diff --git a/rest_api/controller/file_upload.py b/rest_api/controller/file_upload.py index dd31634c0..a59add36a 100644 --- a/rest_api/controller/file_upload.py +++ b/rest_api/controller/file_upload.py @@ -1,10 +1,11 @@ +from typing import Optional, List, Union + import json import logging import os import shutil import uuid from pathlib import Path -from typing import Optional, List from fastapi import APIRouter, UploadFile, File, Form, HTTPException, Depends from pydantic import BaseModel @@ -48,7 +49,8 @@ except KeyError: logger.warning("Indexing Pipeline not found in the YAML configuration. File Upload API will not be available.") -os.makedirs(FILE_UPLOAD_PATH, exist_ok=True) # create directory for uploading files +# create directory for uploading files +os.makedirs(FILE_UPLOAD_PATH, exist_ok=True) @as_form @@ -75,9 +77,10 @@ class Response(BaseModel): @router.post("/file-upload") def upload_file( files: List[UploadFile] = File(...), - meta: Optional[str] = Form("null"), # JSON serialized string - fileconverter_params: FileConverterParams = Depends(FileConverterParams.as_form), - preprocessor_params: PreprocessorParams = Depends(PreprocessorParams.as_form), + # JSON serialized string + meta: Optional[str] = Form("null"), # type: ignore + fileconverter_params: FileConverterParams = Depends(FileConverterParams.as_form), # type: ignore + preprocessor_params: PreprocessorParams = Depends(PreprocessorParams.as_form), # type: ignore ): """ You can use this endpoint to upload a file for indexing @@ -88,7 +91,7 @@ def upload_file( file_paths: list = [] file_metas: list = [] - meta = json.loads(meta) or {} + meta_form = json.loads(meta) # type: ignore for file in files: try: @@ -97,8 +100,8 @@ def upload_file( shutil.copyfileobj(file.file, buffer) file_paths.append(file_path) - meta["name"] = file.filename - file_metas.append(meta) + meta_form["name"] = file.filename + file_metas.append(meta_form) finally: file.file.close() diff --git a/rest_api/controller/utils.py b/rest_api/controller/utils.py index bca3654bc..c80000bdc 100644 --- a/rest_api/controller/utils.py +++ b/rest_api/controller/utils.py @@ -44,6 +44,6 @@ def as_form(cls: Type[BaseModel]): sig = inspect.signature(_as_form) sig = sig.replace(parameters=new_params) - _as_form.__signature__ = sig + _as_form.__signature__ = sig # type: ignore setattr(cls, "as_form", _as_form) return cls diff --git a/rest_api/schema.py b/rest_api/schema.py index 4637c8143..92e3ff878 100644 --- a/rest_api/schema.py +++ b/rest_api/schema.py @@ -35,7 +35,7 @@ class AnswerSerialized(Answer): @pydantic_dataclass class DocumentSerialized(Document): content: str - embedding: Optional[List[float]] + embedding: Optional[List[float]] # type: ignore @pydantic_dataclass diff --git a/rest_api/setup.py b/rest_api/setup.py index 495270cf5..239f6ade6 100644 --- a/rest_api/setup.py +++ b/rest_api/setup.py @@ -3,7 +3,7 @@ import logging from pathlib import Path -VERSION = None +VERSION = "0.0.0" try: VERSION = open(Path(__file__).parent.parent / "VERSION.txt", "r").read() except Exception as e: diff --git a/rest_api/test/test_rest_api.py b/rest_api/test/test_rest_api.py index 70086d4c7..f0a66a6a8 100644 --- a/rest_api/test/test_rest_api.py +++ b/rest_api/test/test_rest_api.py @@ -42,7 +42,6 @@ def exclude_no_answer(responses): return responses -@pytest.mark.elasticsearch @pytest.fixture(scope="session") def client() -> TestClient: os.environ["PIPELINE_YAML_PATH"] = str( @@ -55,7 +54,6 @@ def client() -> TestClient: client.post(url="/documents/delete_by_filters", data='{"filters": {}}') -@pytest.mark.elasticsearch @pytest.fixture(scope="session") def populated_client(client: TestClient) -> TestClient: client.post(url="/documents/delete_by_filters", data='{"filters": {}}') diff --git a/setup.cfg b/setup.cfg index 312727731..a63d5c710 100644 --- a/setup.cfg +++ b/setup.cfg @@ -154,9 +154,7 @@ colab = dev = # Type check mypy - types-Markdown - types-requests - types-PyYAML + typing_extensions; python_version < '3.8' # Test pytest responses diff --git a/ui/setup.py b/ui/setup.py index 36946b82e..8c18e281c 100644 --- a/ui/setup.py +++ b/ui/setup.py @@ -3,7 +3,7 @@ import logging from pathlib import Path -VERSION = None +VERSION = "0.0.0" try: # After git clone, VERSION.txt is in the root folder VERSION = open(Path(__file__).parent.parent / "VERSION.txt", "r").read() diff --git a/ui/utils.py b/ui/utils.py index 2dd6a3506..f1bb72c05 100644 --- a/ui/utils.py +++ b/ui/utils.py @@ -1,4 +1,4 @@ -from typing import List, Dict, Any, Tuple +from typing import List, Dict, Any, Tuple, Optional import os import logging @@ -112,7 +112,7 @@ def upload_doc(file): return response -def get_backlink(result) -> Tuple[str, str]: +def get_backlink(result) -> Tuple[Optional[str], Optional[str]]: if result.get("document", None): doc = result["document"] if isinstance(doc, dict):