haystack/test/test_rest_api.py

from pathlib import Path

import pytest
from fastapi.testclient import TestClient


def get_test_client_and_override_dependencies():
    import os
    os.environ["PIPELINE_YAML_PATH"] = "samples/pipeline/test_pipeline.yaml"
    os.environ["QUERY_PIPELINE_NAME"] = "query_pipeline"
    os.environ["INDEXING_PIPELINE_NAME"] = "indexing_pipeline"

    from rest_api.application import app
    return TestClient(app)


@pytest.mark.slow
@pytest.mark.elasticsearch
@pytest.mark.parametrize("reader", ["farm"], indirect=True)
@pytest.mark.parametrize("document_store", ["elasticsearch"], indirect=True)
def test_api(reader, document_store):
    client = get_test_client_and_override_dependencies()

    # test file upload API
    file_to_upload = {'files': Path("samples/pdf/sample_pdf_1.pdf").open('rb')}
    response = client.post(url="/file-upload", files=file_to_upload, data={"meta": '{"meta_key": "meta_value"}'})
    assert 200 == response.status_code

    # test query API
    query_with_no_filter_value = {"query": "Who made the PDF specification?"}
    response = client.post(url="/query", json=query_with_no_filter_value)
    assert 200 == response.status_code
    response_json = response.json()
    assert response_json["answers"][0]["answer"] == "Adobe Systems"
    document_id = response_json["answers"][0]["document_id"]

    query_with_filter = {"query": "Who made the PDF specification?", "params": {"filters": {"meta_key": "meta_value"}}}
    response = client.post(url="/query", json=query_with_filter)
    assert 200 == response.status_code
    response_json = response.json()
    assert response_json["answers"][0]["answer"] == "Adobe Systems"

    query_with_filter_list = {
        "query": "Who made the PDF specification?",
        "params": {"filters": {"meta_key": ["meta_value", "another_value"]}}
    }
    response = client.post(url="/query", json=query_with_filter_list)
    assert 200 == response.status_code
    response_json = response.json()
    assert response_json["answers"][0]["answer"] == "Adobe Systems"

    query_with_invalid_filter = {
        "query": "Who made the PDF specification?", "params": {"filters": {"meta_key": "invalid_value"}}
    }
    response = client.post(url="/query", json=query_with_invalid_filter)
    assert 200 == response.status_code
    response_json = response.json()
    assert len(response_json["answers"]) == 0

    # test write feedback
    feedback = {
        "question": "Who made the PDF specification?",
        "is_correct_answer": True,
        "document_id": document_id,
        "is_correct_document": True,
        "answer": "Adobe Systems",
        "offset_start_in_doc": 60
    }
    response = client.post(url="/feedback", json=feedback)
    assert 200 == response.status_code

    # test export feedback
    feedback_urls = [
        "/export-feedback?full_document_context=true",
        "/export-feedback?full_document_context=false&context_size=50",
        "/export-feedback?full_document_context=false&context_size=50000",
    ]
    for url in feedback_urls:
        response = client.get(url=url, json=feedback)
        response_json = response.json()
        context = response_json["data"][0]["paragraphs"][0]["context"]
        answer_start = response_json["data"][0]["paragraphs"][0]["qas"][0]["answers"][0]["answer_start"]
        answer = response_json["data"][0]["paragraphs"][0]["qas"][0]["answers"][0]["text"]
        assert context[answer_start:answer_start+len(answer)] == answer
Refactor REST APIs to use Pipelines (#922) 2021-04-07 17:53:32 +02:00			`from pathlib import Path`

Add Elasticsearch Query DSL compliant Query API (#471) 2020-10-16 13:25:31 +02:00			`import pytest`
			`from fastapi.testclient import TestClient`


Refactor REST APIs to use Pipelines (#922) 2021-04-07 17:53:32 +02:00			`def get_test_client_and_override_dependencies():`
			`import os`
			`os.environ["PIPELINE_YAML_PATH"] = "samples/pipeline/test_pipeline.yaml"`
			`os.environ["QUERY_PIPELINE_NAME"] = "query_pipeline"`
			`os.environ["INDEXING_PIPELINE_NAME"] = "indexing_pipeline"`
Add Elasticsearch Query DSL compliant Query API (#471) 2020-10-16 13:25:31 +02:00
			`from rest_api.application import app`
			`return TestClient(app)`


Pytest fix memory leak and put pytest marker on slow tests (#520) * Clear faiss_index during teardown * Marking slow test with pytest markers. So In future these test can be optimized. Also command line option can be added to skip them refer https://pytest.org/en/stable/example/simple.html#control-skipping-of-tests-according-to-command-line-option * Fixing test 2020-10-26 19:19:10 +01:00			`@pytest.mark.slow`
[RAG] Integrate "Retrieval-Augmented Generation" with Haystack (#484) * Adding dummy generator implementation * Adding tutorial to try the model * Committing current non working code * Committing current update where we need to call generate function directly and need to convert embedding to tensor way * Addressing review comments. * Refactoring finder, and implementing rag_generator class. * Refined the implementation of RAGGenerator and now it is in clean shape * Renaming RAGGenerator to RAGenerator * Reverting change from finder.py and addressing review comments * Remove support for RagSequenceForGeneration * Utilizing embed_passage function from DensePassageRetriever * Adding sample test data to verify generator output * Updating testing script * Updating testing script * Fixing bug related to top_k * Updating latest farm dependency * Comment out farm dependency * Reverting changes from TransformersReader * Adding transformers dataset to compare transformers and haystack generator implementation * Using generator_encoder instead of question_encoder to generate context_input_ids * Adding workaround to install FARM dependency from master branch * Removing unnecessary changes * Fixing generator test * Removing transformers datasets * Fixing generator test * Some cleanup and updating TODO comments * Adding tutorial notebook * Updating tutorials with comments * Explicitly passing token model in RAG test * Addressing review comments * Fixing notebook * Refactoring tests to reduce memory footprint * Split generator tests in separate ci step and before running it reclaim memory by terminating containers * Moving tika dependent test to separate dir * Remove unwanted code * Brining reader under session scope * Farm is now session object hence restoring changes from default value * Updating assert for pdf converter * Dummy commit to trigger CI flow * REducing memory footprint required for generator tests * Fixing mypy issues * Marking test with tika and elasticsearch markers. Reverting changes in CI and pytest splits * reducing changes * Fixing CI * changing elastic search ci * Fixing test error * Disabling return of embedding * Marking generator test as well * Refactoring tutorials * Increasing ES memory to 750M * Trying another fix for ES CI * Reverting CI changes * Splitting tests in CI * Generator and non-generator markers split * Adding pytest.ini to add markers and enable strict-markers option * Reducing elastic search container memory * Simplifying generator test by using documents with embedding directly * Bump up farm to 0.5.0 2020-10-30 18:06:02 +01:00			`@pytest.mark.elasticsearch`
Add Elasticsearch Query DSL compliant Query API (#471) 2020-10-16 13:25:31 +02:00			`@pytest.mark.parametrize("reader", ["farm"], indirect=True)`
Refactor REST APIs to use Pipelines (#922) 2021-04-07 17:53:32 +02:00			`@pytest.mark.parametrize("document_store", ["elasticsearch"], indirect=True)`
			`def test_api(reader, document_store):`
			`client = get_test_client_and_override_dependencies()`
Allow list of filter values in REST API (#568) 2020-11-09 20:41:53 +01:00
Refactor REST APIs to use Pipelines (#922) 2021-04-07 17:53:32 +02:00			`# test file upload API`
[pipeline] Allow for batch indexing when using Pipelines fix #1168 (#1231) * [pipeline] Allow for batch indexing when using Pipelines fix #1168 * [pipeline] Test case fixed fix #1168 * [file_converter] Path.suffix updated #1168 * [file_converter] meta can be one of these three cases: A single dict that is applied to all files One dict for each file being converted None #1168 * [file_converter] mypy error fixed. * [file_converter] mypy error fixed. * [rest_api] batch file upload introduced in indexing API. * [test_case] Test_api file upload parameter name updated. * [ui] Streamlit file upload parameter updated. 2021-06-30 17:13:46 +05:00			`file_to_upload = {'files': Path("samples/pdf/sample_pdf_1.pdf").open('rb')}`
Refactor REST APIs to use Pipelines (#922) 2021-04-07 17:53:32 +02:00			`response = client.post(url="/file-upload", files=file_to_upload, data={"meta": '{"meta_key": "meta_value"}'})`
Allow list of filter values in REST API (#568) 2020-11-09 20:41:53 +01:00			`assert 200 == response.status_code`

Refactor REST APIs to use Pipelines (#922) 2021-04-07 17:53:32 +02:00			`# test query API`
			`query_with_no_filter_value = {"query": "Who made the PDF specification?"}`
			`response = client.post(url="/query", json=query_with_no_filter_value)`
Allow list of filter values in REST API (#568) 2020-11-09 20:41:53 +01:00			`assert 200 == response.status_code`
			`response_json = response.json()`
Refactor REST APIs to use Pipelines (#922) 2021-04-07 17:53:32 +02:00			`assert response_json["answers"][0]["answer"] == "Adobe Systems"`
			`document_id = response_json["answers"][0]["document_id"]`
Allow list of filter values in REST API (#568) 2020-11-09 20:41:53 +01:00
Refactor communication between Pipeline Components (#1321) 2021-09-10 11:41:16 +02:00			`query_with_filter = {"query": "Who made the PDF specification?", "params": {"filters": {"meta_key": "meta_value"}}}`
Refactor REST APIs to use Pipelines (#922) 2021-04-07 17:53:32 +02:00			`response = client.post(url="/query", json=query_with_filter)`
Allow list of filter values in REST API (#568) 2020-11-09 20:41:53 +01:00			`assert 200 == response.status_code`
			`response_json = response.json()`
Refactor REST APIs to use Pipelines (#922) 2021-04-07 17:53:32 +02:00			`assert response_json["answers"][0]["answer"] == "Adobe Systems"`
Allow list of filter values in REST API (#568) 2020-11-09 20:41:53 +01:00
Refactor communication between Pipeline Components (#1321) 2021-09-10 11:41:16 +02:00			`query_with_filter_list = {`
			`"query": "Who made the PDF specification?",`
			`"params": {"filters": {"meta_key": ["meta_value", "another_value"]}}`
			`}`
Refactor REST APIs to use Pipelines (#922) 2021-04-07 17:53:32 +02:00			`response = client.post(url="/query", json=query_with_filter_list)`
Allow list of filter values in REST API (#568) 2020-11-09 20:41:53 +01:00			`assert 200 == response.status_code`
			`response_json = response.json()`
Refactor REST APIs to use Pipelines (#922) 2021-04-07 17:53:32 +02:00			`assert response_json["answers"][0]["answer"] == "Adobe Systems"`
Add Elasticsearch Query DSL compliant Query API (#471) 2020-10-16 13:25:31 +02:00
Refactor communication between Pipeline Components (#1321) 2021-09-10 11:41:16 +02:00			`query_with_invalid_filter = {`
			`"query": "Who made the PDF specification?", "params": {"filters": {"meta_key": "invalid_value"}}`
			`}`
Refactor REST APIs to use Pipelines (#922) 2021-04-07 17:53:32 +02:00			`response = client.post(url="/query", json=query_with_invalid_filter)`
Add Elasticsearch Query DSL compliant Query API (#471) 2020-10-16 13:25:31 +02:00			`assert 200 == response.status_code`
			`response_json = response.json()`
Refactor REST APIs to use Pipelines (#922) 2021-04-07 17:53:32 +02:00			`assert len(response_json["answers"]) == 0`

			`# test write feedback`
			`feedback = {`
			`"question": "Who made the PDF specification?",`
			`"is_correct_answer": True,`
			`"document_id": document_id,`
			`"is_correct_document": True,`
			`"answer": "Adobe Systems",`
			`"offset_start_in_doc": 60`
			`}`
			`response = client.post(url="/feedback", json=feedback)`
			`assert 200 == response.status_code`
Add Elasticsearch Query DSL compliant Query API (#471) 2020-10-16 13:25:31 +02:00
Refactor REST APIs to use Pipelines (#922) 2021-04-07 17:53:32 +02:00			`# test export feedback`
			`feedback_urls = [`
			`"/export-feedback?full_document_context=true",`
			`"/export-feedback?full_document_context=false&context_size=50",`
			`"/export-feedback?full_document_context=false&context_size=50000",`
			`]`
			`for url in feedback_urls:`
			`response = client.get(url=url, json=feedback)`
			`response_json = response.json()`
			`context = response_json["data"][0]["paragraphs"][0]["context"]`
			`answer_start = response_json["data"][0]["paragraphs"][0]["qas"][0]["answers"][0]["answer_start"]`
			`answer = response_json["data"][0]["paragraphs"][0]["qas"][0]["answers"][0]["text"]`
			`assert context[answer_start:answer_start+len(answer)] == answer`
Fix file upload API (#808) 2021-02-05 12:17:38 +01:00