haystack/test/test_elastic_dsl_convertor.py

import pytest

from rest_api.controller.request import Question
from rest_api.controller.response import Answer, AnswersToIndividualQuestion


def test_query_dsl_with_without_valid_query_field():
    query = {
        "query": {
            "bool": {
                "must": [
                    {"match": {"title": "Search"}},
                    {"match": {"content": "Elasticsearch"}}
                ],
                "filter": [
                    {"term": {"status": "published"}},
                    {"range": {"publish_date": {"gte": "2015-01-01"}}}
                ]
            }
        }
    }
    with pytest.raises(Exception):
        Question.from_elastic_query_dsl(query)


def test_query_dsl_with_without_multiple_query_field():
    query = {
        "query": {
            "bool": {
                "should": [
                    {"match": {"name.first": {"query": "shay", "_name": "first"}}},
                    {"match": {"name.last": {"query": "banon", "_name": "last"}}}
                ],
                "filter": {
                    "terms": {
                        "name.last": ["banon", "kimchy"],
                        "_name": "test"
                    }
                }
            }
        }
    }
    with pytest.raises(Exception):
        Question.from_elastic_query_dsl(query)


def test_query_dsl_with_single_query():
    query = {
        "query": {
            "match": {
                "message": {
                    "query": "this is a test"
                }
            }
        }
    }
    question = Question.from_elastic_query_dsl(query)
    assert 1 == len(question.questions)
    assert question.questions.__contains__("this is a test")
    assert question.filters is None


def test_query_dsl_with_filter():
    query = {
        "query": {
            "bool": {
                "should": [
                    {"match": {"name.first": {"query": "shay", "_name": "first"}}}
                ],
                "filter": {
                    "terms": {
                        "name.last": ["banon", "kimchy"],
                        "_name": "test"
                    }
                }
            }
        }
    }
    question = Question.from_elastic_query_dsl(query)
    assert 1 == len(question.questions)
    assert question.questions.__contains__("shay")
    assert len(question.filters) == 1
    assert question.filters["_name"] == "test"


def test_query_dsl_with_complex_query():
    query = {
        "size": 17,
        "query": {
            "bool": {
                "should": [
                    {
                        "multi_match": {
                            "query": "I am test1",
                            "type": "most_fields",
                            "fields": ["text", "title"]
                        }
                    }
                ],
                "filter": [
                    {
                        "terms": {
                            "year": "2020"
                        }
                    },
                    {
                        "terms": {
                            "quarter": "1"
                        }
                    },
                    {
                        "range": {
                            "date": {
                                "gte": "12-12-12"
                            }
                        }
                    }
                ]
            }
        }
    }
    top_k_reader = 7
    question = Question.from_elastic_query_dsl(query, top_k_reader)
    assert 1 == len(question.questions)
    assert question.questions.__contains__("I am test1")
    assert 2 == len(question.filters)
    assert question.filters["year"] == "2020"
    assert question.filters["quarter"] == "1"
    assert 17 == question.top_k_retriever
    assert 7 == question.top_k_reader


def test_response_dsl_with_empty_answers():
    sample_answer = AnswersToIndividualQuestion(question="test question", answers=[])
    response = AnswersToIndividualQuestion.to_elastic_response_dsl(sample_answer.__dict__)
    assert 0 == response['hits']['total']['value']
    assert 0 == len(response['hits']['hits'])


def test_response_dsl_with_answers():
    full_answer = Answer(
        answer="answer",
        question="question",
        score=0.1234,
        probability=0.5678,
        context="context",
        offset_start=200,
        offset_end=300,
        offset_start_in_doc=2000,
        offset_end_in_doc=2100,
        document_id="id_1",
        meta={
            "meta1": "meta_value"
        }
    )
    empty_answer = Answer(
        answer=None,
        question=None,
        score=None,
        probability=None,
        context=None,
        offset_start=250,
        offset_end=350,
        offset_start_in_doc=None,
        offset_end_in_doc=None,
        document_id=None,
        meta=None
    )
    sample_answer = AnswersToIndividualQuestion(question="test question", answers=[full_answer, empty_answer])
    response = AnswersToIndividualQuestion.to_elastic_response_dsl(sample_answer.__dict__)

    # Test number of returned answers
    assert response['hits']['total']['value'] == 2

    # Test converted answers
    hits = response['hits']['hits']
    assert len(hits) == 2
    # Test full answer record
    assert hits[0]["_score"] == 0.1234
    assert hits[0]["_id"] == "id_1"
    assert hits[0]["_source"]["answer"] == "answer"
    assert hits[0]["_source"]["question"] == "question"
    assert hits[0]["_source"]["context"] == "context"
    assert hits[0]["_source"]["probability"] == 0.5678
    assert hits[0]["_source"]["offset_start"] == 200
    assert hits[0]["_source"]["offset_end"] == 300
    assert hits[0]["_source"]["offset_start_in_doc"] == 2000
    assert hits[0]["_source"]["offset_end_in_doc"] == 2100
    assert hits[0]["_source"]["meta"] == {"meta1": "meta_value"}
    # Test empty answer record
    assert hits[1]["_score"] is None
    assert hits[1]["_id"] is None
    assert hits[1]["_source"]["answer"] is None
    assert hits[1]["_source"]["question"] is None
    assert hits[1]["_source"]["context"] is None
    assert hits[1]["_source"]["probability"] is None
    assert hits[1]["_source"]["offset_start"] == 250
    assert hits[1]["_source"]["offset_end"] == 350
    assert hits[1]["_source"]["offset_start_in_doc"] is None
    assert hits[1]["_source"]["offset_end_in_doc"] is None
    assert hits[1]["_source"]["meta"] is None