2020-07-14 18:53:15 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								import  math  
						 
					
						
							
								
									
										
										
										
											2020-07-10 10:54:56 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2020-10-26 19:19:10 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								import  pytest  
						 
					
						
							
								
									
										
										
										
											2022-04-21 11:24:39 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								from  haystack . modeling . data_handler . inputs  import  QAInput ,  Question  
						 
					
						
							
								
									
										
										
										
											2020-10-26 19:19:10 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-10-25 15:50:23 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								from  haystack . schema  import  Document ,  Answer  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								from  haystack . nodes . reader . base  import  BaseReader  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								from  haystack . nodes . reader . farm  import  FARMReader  
						 
					
						
							
								
									
										
										
										
											2020-07-14 18:53:15 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2020-07-10 10:54:56 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								def  test_reader_basic ( reader ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  reader  is  not  None 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  isinstance ( reader ,  BaseReader ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2020-07-14 18:53:15 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								def  test_output ( prediction ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  prediction  is  not  None 
							 
						 
					
						
							
								
									
										
										
										
											2020-11-30 17:50:04 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    assert  prediction [ " query " ]  ==  " Who lives in Berlin? " 
							 
						 
					
						
							
								
									
										
										
										
											2021-10-13 14:23:23 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    assert  prediction [ " answers " ] [ 0 ] . answer  ==  " Carla " 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  prediction [ " answers " ] [ 0 ] . offsets_in_context [ 0 ] . start  ==  11 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-03 13:43:18 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    assert  prediction [ " answers " ] [ 0 ] . offsets_in_context [ 0 ] . end  ==  16 
							 
						 
					
						
							
								
									
										
										
										
											2021-10-13 14:23:23 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    assert  prediction [ " answers " ] [ 0 ] . score  < =  1 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  prediction [ " answers " ] [ 0 ] . score  > =  0 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  prediction [ " answers " ] [ 0 ] . context  ==  " My name is Carla and I live in Berlin " 
							 
						 
					
						
							
								
									
										
										
										
											2020-07-14 18:53:15 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    assert  len ( prediction [ " answers " ] )  ==  5 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-05-11 11:11:00 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								def  test_output_batch_single_query_single_doc_list ( batch_prediction_single_query_single_doc_list ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    prediction  =  batch_prediction_single_query_single_doc_list 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  prediction  is  not  None 
							 
						 
					
						
							
								
									
										
										
										
											2022-05-24 12:33:45 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    assert  prediction [ " queries " ]  ==  [ " Who lives in Berlin? " ] 
							 
						 
					
						
							
								
									
										
										
										
											2022-05-11 11:11:00 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    # Expected output: List of lists of answers 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  isinstance ( prediction [ " answers " ] ,  list ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  isinstance ( prediction [ " answers " ] [ 0 ] ,  list ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  isinstance ( prediction [ " answers " ] [ 0 ] [ 0 ] ,  Answer ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  len ( prediction [ " answers " ] )  ==  5   # Predictions for 5 docs 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								def  test_output_batch_single_query_multiple_doc_lists ( batch_prediction_single_query_multiple_doc_lists ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    prediction  =  batch_prediction_single_query_multiple_doc_lists 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  prediction  is  not  None 
							 
						 
					
						
							
								
									
										
										
										
											2022-05-24 12:33:45 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    assert  prediction [ " queries " ]  ==  [ " Who lives in Berlin? " ] 
							 
						 
					
						
							
								
									
										
										
										
											2022-05-11 11:11:00 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    # Expected output: List of lists of answers 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  isinstance ( prediction [ " answers " ] ,  list ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  isinstance ( prediction [ " answers " ] [ 0 ] ,  list ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  isinstance ( prediction [ " answers " ] [ 0 ] [ 0 ] ,  Answer ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  len ( prediction [ " answers " ] )  ==  2   # Predictions for 2 collection of docs 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  len ( prediction [ " answers " ] [ 0 ] )  ==  5   # top-k of 5 per collection of docs 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								def  test_output_batch_multiple_queries_single_doc_list ( batch_prediction_multiple_queries_single_doc_list ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    prediction  =  batch_prediction_multiple_queries_single_doc_list 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  prediction  is  not  None 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  prediction [ " queries " ]  ==  [ " Who lives in Berlin? " ,  " Who lives in New York? " ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    # Expected output: List of lists of lists of answers 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  isinstance ( prediction [ " answers " ] ,  list ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  isinstance ( prediction [ " answers " ] [ 0 ] ,  list ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  isinstance ( prediction [ " answers " ] [ 0 ] [ 0 ] ,  list ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  isinstance ( prediction [ " answers " ] [ 0 ] [ 0 ] [ 0 ] ,  Answer ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  len ( prediction [ " answers " ] )  ==  2   # Predictions for 2 queries 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  len ( prediction [ " answers " ] [ 0 ] )  ==  5   # Predictions for 5 documents 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								def  test_output_batch_multiple_queries_multiple_doc_lists ( batch_prediction_multiple_queries_multiple_doc_lists ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    prediction  =  batch_prediction_multiple_queries_multiple_doc_lists 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  prediction  is  not  None 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  prediction [ " queries " ]  ==  [ " Who lives in Berlin? " ,  " Who lives in New York? " ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    # Expected output: List of lists answers 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  isinstance ( prediction [ " answers " ] ,  list ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  isinstance ( prediction [ " answers " ] [ 0 ] ,  list ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  isinstance ( prediction [ " answers " ] [ 0 ] [ 0 ] ,  Answer ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  len ( prediction [ " answers " ] )  ==  2   # Predictions for 2 collections of documents 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  len ( prediction [ " answers " ] [ 0 ] )  ==  5   # top-k of 5 for collection of docs 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-06-07 09:23:03 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								@pytest.mark.integration  
						 
					
						
							
								
									
										
										
										
											2020-07-14 18:53:15 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								def  test_no_answer_output ( no_answer_prediction ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  no_answer_prediction  is  not  None 
							 
						 
					
						
							
								
									
										
										
										
											2020-11-30 17:50:04 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    assert  no_answer_prediction [ " query " ]  ==  " What is the meaning of life? " 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-04 13:43:12 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    assert  math . isclose ( no_answer_prediction [ " no_ans_gap " ] ,  - 11.847594738006592 ,  rel_tol = 0.0001 ) 
							 
						 
					
						
							
								
									
										
										
										
											2021-10-13 14:23:23 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    assert  no_answer_prediction [ " answers " ] [ 0 ] . answer  ==  " " 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  no_answer_prediction [ " answers " ] [ 0 ] . offsets_in_context [ 0 ] . start  ==  0 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  no_answer_prediction [ " answers " ] [ 0 ] . offsets_in_context [ 0 ] . end  ==  0 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  no_answer_prediction [ " answers " ] [ 0 ] . score  < =  1 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  no_answer_prediction [ " answers " ] [ 0 ] . score  > =  0 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  no_answer_prediction [ " answers " ] [ 0 ] . context  ==  None 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  no_answer_prediction [ " answers " ] [ 0 ] . document_id  ==  None 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    answers  =  [ x . answer  for  x  in  no_answer_prediction [ " answers " ] ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  answers . count ( " " )  ==  1 
							 
						 
					
						
							
								
									
										
										
										
											2020-07-14 18:53:15 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    assert  len ( no_answer_prediction [ " answers " ] )  ==  5 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2020-10-26 19:19:10 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2020-07-14 18:53:15 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# TODO Directly compare farm and transformers reader outputs  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# TODO checks to see that model is responsive to input arguments e.g. context_window_size - topk  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2020-10-26 19:19:10 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-06-07 09:23:03 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								@pytest.mark.integration  
						 
					
						
							
								
									
										
										
										
											2020-07-14 18:53:15 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								def  test_prediction_attributes ( prediction ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    # TODO FARM's prediction also has no_ans_gap 
							 
						 
					
						
							
								
									
										
										
										
											2020-11-30 17:50:04 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    attributes_gold  =  [ " query " ,  " answers " ] 
							 
						 
					
						
							
								
									
										
										
										
											2020-07-14 18:53:15 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    for  ag  in  attributes_gold : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        assert  ag  in  prediction 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-02-03 13:43:18 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-06-07 09:23:03 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								@pytest.mark.integration  
						 
					
						
							
								
									
										
										
										
											2021-10-18 15:47:36 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								def  test_model_download_options ( ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    # download disabled and model is not cached locally 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    with  pytest . raises ( OSError ) : 
							 
						 
					
						
							
								
									
										
										
										
											2021-12-22 17:20:23 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        impossible_reader  =  FARMReader ( " mfeb/albert-xxlarge-v2-squad2 " ,  local_files_only = True ,  num_processes = 0 ) 
							 
						 
					
						
							
								
									
										
										
										
											2020-07-14 18:53:15 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-02-03 13:43:18 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2020-07-14 18:53:15 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								def  test_answer_attributes ( prediction ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    # TODO Transformers answer also has meta key 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    answer  =  prediction [ " answers " ] [ 0 ] 
							 
						 
					
						
							
								
									
										
										
										
											2021-10-13 14:23:23 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    assert  type ( answer )  ==  Answer 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-03 13:43:18 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    attributes_gold  =  [ " answer " ,  " score " ,  " context " ,  " offsets_in_context " ,  " offsets_in_document " ,  " type " ] 
							 
						 
					
						
							
								
									
										
										
										
											2020-07-14 18:53:15 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    for  ag  in  attributes_gold : 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-03 13:43:18 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        assert  getattr ( answer ,  ag ,  None )  is  not  None 
							 
						 
					
						
							
								
									
										
										
										
											2020-07-14 18:53:15 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-06-07 09:23:03 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								@pytest.mark.integration  
						 
					
						
							
								
									
										
										
										
											2020-10-26 19:19:10 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								@pytest.mark.parametrize ( " reader " ,  [ " farm " ] ,  indirect = True )  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								@pytest.mark.parametrize ( " window_size " ,  [ 10 ,  15 ,  20 ] )  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								def  test_context_window_size ( reader ,  test_docs_xs ,  window_size ) :  
						 
					
						
							
								
									
										
										
										
											2020-07-31 11:34:06 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    docs  =  [ Document . from_dict ( d )  if  isinstance ( d ,  dict )  else  d  for  d  in  test_docs_xs ] 
							 
						 
					
						
							
								
									
										
										
										
											2020-10-26 19:19:10 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2020-10-30 18:06:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    assert  isinstance ( reader ,  FARMReader ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    old_window_size  =  reader . inferencer . model . prediction_heads [ 0 ] . context_window_size 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    reader . inferencer . model . prediction_heads [ 0 ] . context_window_size  =  window_size 
							 
						 
					
						
							
								
									
										
										
										
											2020-10-26 19:19:10 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2020-11-30 17:50:04 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    prediction  =  reader . predict ( query = " Who lives in Berlin? " ,  documents = docs ,  top_k = 5 ) 
							 
						 
					
						
							
								
									
										
										
										
											2020-10-26 19:19:10 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    for  answer  in  prediction [ " answers " ] : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        # If the extracted answer is larger than the context window, the context window is expanded. 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        # If the extracted answer is odd in length, the resulting context window is one less than context_window_size 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        # due to rounding (See FARM's QACandidate) 
							 
						 
					
						
							
								
									
										
										
										
											2020-07-14 18:53:15 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        # TODO Currently the behaviour of context_window_size in FARMReader and TransformerReader is different 
							 
						 
					
						
							
								
									
										
										
										
											2021-10-13 14:23:23 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        if  len ( answer . answer )  < =  window_size : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            assert  len ( answer . context )  in  [ window_size ,  window_size  -  1 ] 
							 
						 
					
						
							
								
									
										
										
										
											2020-10-26 19:19:10 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        else : 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-04 13:43:12 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            # If the extracted answer is larger than the context window and is odd in length, 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            # the resulting context window is one more than the answer length 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            assert  len ( answer . context )  in  [ len ( answer . answer ) ,  len ( answer . answer )  +  1 ] 
							 
						 
					
						
							
								
									
										
										
										
											2020-07-14 18:53:15 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2020-10-30 18:06:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    reader . inferencer . model . prediction_heads [ 0 ] . context_window_size  =  old_window_size 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2020-10-26 19:19:10 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    # TODO Need to test transformers reader 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    # TODO Currently the behaviour of context_window_size in FARMReader and TransformerReader is different 
							 
						 
					
						
							
								
									
										
										
										
											2020-07-14 18:53:15 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2020-07-31 11:34:06 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2020-10-26 19:19:10 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								@pytest.mark.parametrize ( " reader " ,  [ " farm " ] ,  indirect = True )  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								@pytest.mark.parametrize ( " top_k " ,  [ 2 ,  5 ,  10 ] )  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								def  test_top_k ( reader ,  test_docs_xs ,  top_k ) :  
						 
					
						
							
								
									
										
										
										
											2020-07-31 11:34:06 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    docs  =  [ Document . from_dict ( d )  if  isinstance ( d ,  dict )  else  d  for  d  in  test_docs_xs ] 
							 
						 
					
						
							
								
									
										
										
										
											2020-07-14 18:53:15 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2020-10-30 18:06:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    assert  isinstance ( reader ,  FARMReader ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    old_top_k_per_candidate  =  reader . top_k_per_candidate 
							 
						 
					
						
							
								
									
										
										
										
											2020-10-26 19:19:10 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    reader . top_k_per_candidate  =  4 
							 
						 
					
						
							
								
									
										
										
										
											2020-10-30 18:06:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    reader . inferencer . model . prediction_heads [ 0 ] . n_best  =  reader . top_k_per_candidate  +  1 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    try : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        old_top_k_per_sample  =  reader . inferencer . model . prediction_heads [ 0 ] . n_best_per_sample 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        reader . inferencer . model . prediction_heads [ 0 ] . n_best_per_sample  =  4 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    except : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        print ( " WARNING: Could not set `top_k_per_sample` in FARM. Please update FARM version. " ) 
							 
						 
					
						
							
								
									
										
										
										
											2020-07-14 18:53:15 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2020-11-30 17:50:04 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    prediction  =  reader . predict ( query = " Who lives in Berlin? " ,  documents = docs ,  top_k = top_k ) 
							 
						 
					
						
							
								
									
										
										
										
											2020-10-26 19:19:10 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    assert  len ( prediction [ " answers " ] )  ==  top_k 
							 
						 
					
						
							
								
									
										
										
										
											2020-10-30 18:06:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    reader . top_k_per_candidate  =  old_top_k_per_candidate 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    reader . inferencer . model . prediction_heads [ 0 ] . n_best  =  reader . top_k_per_candidate  +  1 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    try : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        reader . inferencer . model . prediction_heads [ 0 ] . n_best_per_sample  =  old_top_k_per_sample 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    except : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        print ( " WARNING: Could not set `top_k_per_sample` in FARM. Please update FARM version. " ) 
							 
						 
					
						
							
								
									
										
										
										
											2020-12-07 14:07:20 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								def  test_farm_reader_update_params ( test_docs_xs ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    reader  =  FARMReader ( 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-03 13:43:18 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        model_name_or_path = " deepset/roberta-base-squad2 " ,  use_gpu = False ,  no_ans_boost = 0 ,  num_processes = 0 
							 
						 
					
						
							
								
									
										
										
										
											2020-12-07 14:07:20 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    docs  =  [ Document . from_dict ( d )  if  isinstance ( d ,  dict )  else  d  for  d  in  test_docs_xs ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    # original reader 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    prediction  =  reader . predict ( query = " Who lives in Berlin? " ,  documents = docs ,  top_k = 3 ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  len ( prediction [ " answers " ] )  ==  3 
							 
						 
					
						
							
								
									
										
										
										
											2021-10-13 14:23:23 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    assert  prediction [ " answers " ] [ 0 ] . answer  ==  " Carla " 
							 
						 
					
						
							
								
									
										
										
										
											2020-12-07 14:07:20 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    # update no_ans_boost 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    reader . update_parameters ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        context_window_size = 100 ,  no_ans_boost = 100 ,  return_no_answer = True ,  max_seq_len = 384 ,  doc_stride = 128 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    prediction  =  reader . predict ( query = " Who lives in Berlin? " ,  documents = docs ,  top_k = 3 ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  len ( prediction [ " answers " ] )  ==  3 
							 
						 
					
						
							
								
									
										
										
										
											2021-10-13 14:23:23 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    assert  prediction [ " answers " ] [ 0 ] . answer  ==  " " 
							 
						 
					
						
							
								
									
										
										
										
											2020-12-07 14:07:20 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    # update no_ans_boost 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    reader . update_parameters ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        context_window_size = 100 ,  no_ans_boost = 0 ,  return_no_answer = False ,  max_seq_len = 384 ,  doc_stride = 128 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    prediction  =  reader . predict ( query = " Who lives in Berlin? " ,  documents = docs ,  top_k = 3 ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  len ( prediction [ " answers " ] )  ==  3 
							 
						 
					
						
							
								
									
										
										
										
											2021-10-13 14:23:23 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    assert  None  not  in  [ ans . answer  for  ans  in  prediction [ " answers " ] ] 
							 
						 
					
						
							
								
									
										
										
										
											2020-12-07 14:07:20 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    # update context_window_size 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    reader . update_parameters ( context_window_size = 6 ,  no_ans_boost = - 10 ,  max_seq_len = 384 ,  doc_stride = 128 ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    prediction  =  reader . predict ( query = " Who lives in Berlin? " ,  documents = docs ,  top_k = 3 ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  len ( prediction [ " answers " ] )  ==  3 
							 
						 
					
						
							
								
									
										
										
										
											2021-10-13 14:23:23 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    assert  len ( prediction [ " answers " ] [ 0 ] . context )  ==  6 
							 
						 
					
						
							
								
									
										
										
										
											2020-12-07 14:07:20 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    # update doc_stride with invalid value 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    with  pytest . raises ( Exception ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        reader . update_parameters ( context_window_size = 100 ,  no_ans_boost = - 10 ,  max_seq_len = 384 ,  doc_stride = 999 ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        reader . predict ( query = " Who lives in Berlin? " ,  documents = docs ,  top_k = 3 ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    # update max_seq_len with invalid value 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    with  pytest . raises ( Exception ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        reader . update_parameters ( context_window_size = 6 ,  no_ans_boost = - 10 ,  max_seq_len = 99 ,  doc_stride = 128 ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        reader . predict ( query = " Who lives in Berlin? " ,  documents = docs ,  top_k = 3 ) 
							 
						 
					
						
							
								
									
										
										
										
											2022-04-21 11:24:39 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								@pytest.mark.parametrize ( " use_confidence_scores " ,  [ True ,  False ] )  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								def  test_farm_reader_uses_same_sorting_as_QAPredictionHead ( use_confidence_scores ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    reader  =  FARMReader ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        model_name_or_path = " deepset/roberta-base-squad2 " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        use_gpu = False , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        num_processes = 0 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return_no_answer = True , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        use_confidence_scores = use_confidence_scores , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    text  =  """ Beer is one of the oldest[1][2][3] and most widely consumed[4] alcoholic drinks in the world, and the third most popular drink overall after water and tea.[5] It is produced by the brewing and fermentation of starches, mainly derived from cereal grains—most commonly from malted barley, though wheat, maize (corn), rice, and oats are also used. During the brewing process, fermentation of the starch sugars in the wort produces ethanol and carbonation in the resulting beer.[6] Most modern beer is brewed with hops, which add bitterness and other flavours and act as a natural preservative and stabilizing agent. Other flavouring agents such as gruit, herbs, or fruits may be included or used instead of hops. In commercial brewing, the natural carbonation effect is often removed during processing and replaced with forced carbonation.[7] 
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								Some  of  humanity ' s earliest known writings refer to the production and distribution of beer: the Code of Hammurabi included laws regulating beer and beer parlours,[8] and  " The Hymn to Ninkasi " , a prayer to the Mesopotamian goddess of beer, served as both a prayer and as a method of remembering the recipe for beer in a culture with few literate people.[9][10]  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								Beer  is  distributed  in  bottles  and  cans  and  is  also  commonly  available  on  draught ,  particularly  in  pubs  and  bars .  The  brewing  industry  is  a  global  business ,  consisting  of  several  dominant  multinational  companies  and  many  thousands  of  smaller  producers  ranging  from  brewpubs  to  regional  breweries .  The  strength  of  modern  beer  is  usually  around  4 %  to  6 %  alcohol  by  volume  ( ABV ) ,  although  it  may  vary  between  0.5 %  and  20 % ,  with  some  breweries  creating  examples  of  40 %  ABV  and  above . [ 11 ]  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								Beer  forms  part  of  the  culture  of  many  nations  and  is  associated  with  social  traditions  such  as  beer  festivals ,  as  well  as  a  rich  pub  culture  involving  activities  like  pub  crawling ,  pub  quizzes  and  pub  games .  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								When  beer  is  distilled ,  the  resulting  liquor  is  a  form  of  whisky . [ 12 ]  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								""" 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    docs  =  [ Document ( text ) ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    query  =  " What is the third most popular drink? " 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    reader_predictions  =  reader . predict ( query = query ,  documents = docs ,  top_k = 5 ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    farm_input  =  [ QAInput ( doc_text = d . content ,  questions = Question ( query ) )  for  d  in  docs ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    inferencer_predictions  =  reader . inferencer . inference_from_objects ( farm_input ,  return_json = False ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    for  answer ,  qa_cand  in  zip ( reader_predictions [ " answers " ] ,  inferencer_predictions [ 0 ] . prediction ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        assert  answer . answer  ==  ( " "  if  qa_cand . answer_type  ==  " no_answer "  else  qa_cand . answer ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        assert  answer . offsets_in_document [ 0 ] . start  ==  qa_cand . offset_answer_start 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        assert  answer . offsets_in_document [ 0 ] . end  ==  qa_cand . offset_answer_end 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  use_confidence_scores : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            assert  answer . score  ==  qa_cand . confidence 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        else : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            assert  answer . score  ==  qa_cand . score