import pytest @pytest.mark.parametrize("multiprocessing_chunksize", [None, 2]) @pytest.mark.parametrize("num_processes", [2, 0, None], scope="module") def test_qa_format_and_results(adaptive_model_qa, multiprocessing_chunksize): qa_inputs_dicts = [ { "questions": ["In what country is Normandy"], "text": "The Normans are an ethnic group that arose in Normandy, a northern region " "of France, from contact between Viking settlers and indigenous Franks and Gallo-Romans", }, { "questions": ["Who counted the game among the best ever made?"], "text": "Twilight Princess was released to universal critical acclaim and commercial success. It received " "perfect scores from major publications such as 1UP.com, Computer and Video Games, Electronic " "Gaming Monthly, Game Informer, GamesRadar, and GameSpy. On the review aggregators GameRankings " "and Metacritic, Twilight Princess has average scores of 95% and 95 for the Wii version and scores " "of 95% and 96 for the GameCube version. GameTrailers in their review called it one of the " "greatest games ever created.", }, ] ground_truths = ["France", "GameTrailers"] results = adaptive_model_qa.inference_from_dicts( dicts=qa_inputs_dicts, multiprocessing_chunksize=multiprocessing_chunksize ) # sample results # [ # { # "task": "qa", # "predictions": [ # { # "question": "In what country is Normandy", # "question_id": "None", # "ground_truth": None, # "answers": [ # { # "score": 1.1272038221359253, # "probability": -1, # "answer": "France", # "offset_answer_start": 54, # "offset_answer_end": 60, # "context": "The Normans gave their name to Normandy, a region in France.", # "offset_context_start": 0, # "offset_context_end": 60, # "document_id": None, # } # ] # } # ], # } # ] predictions = list(results)[0]["predictions"] for prediction, ground_truth, qa_input_dict in zip(predictions, ground_truths, qa_inputs_dicts): assert prediction["question"] == qa_input_dict["questions"][0] answer = prediction["answers"][0] assert answer["answer"] in answer["context"] assert answer["answer"] == ground_truth assert { "answer", "score", "probability", "offset_answer_start", "offset_answer_end", "context", "offset_context_start", "offset_context_end", "document_id", } == answer.keys() if __name__ == "__main__": test_qa_format_and_results()