2021-09-22 16:56:51 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								import  logging  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								import  pytest  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								from  math  import  isclose  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								import  numpy  as  np  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								from  haystack . modeling . infer  import  QAInferencer  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								from  haystack . modeling . data_handler . inputs  import  QAInput ,  Question  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-11-08 09:26:47 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								DOC_TEXT  =  """ Twilight Princess was released to universal critical acclaim and commercial success.  \
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								It  received  perfect  scores  from  major  publications  such  as  1 UP . com ,  Computer  and  Video  Games ,  \ 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								Electronic  Gaming  Monthly ,  Game  Informer ,  GamesRadar ,  and  GameSpy .  On  the  review  aggregators  \ 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								GameRankings  and  Metacritic ,  Twilight  Princess  has  average  scores  of  95 %  and  95  for  the  Wii  \ 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								version  and  scores  of  95 %  and  96  for  the  GameCube  version .  GameTrailers  in  their  review  called  \ 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								it  one  of  the  greatest  games  ever  created . """ 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-04-28 17:08:41 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								@pytest.fixture  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								def  bert_base_squad2 ( request ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    model  =  QAInferencer . load ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        " deepset/minilm-uncased-squad2 " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        task_type = " question_answering " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        batch_size = 4 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        num_processes = 0 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        multithreading_rust = False , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    return  model 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-09-22 16:56:51 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								@pytest.fixture ( )  
						 
					
						
							
								
									
										
										
										
											2023-04-28 17:08:41 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								def  span_inference_result ( bert_base_squad2 ) :  
						 
					
						
							
								
									
										
										
										
											2022-02-03 13:43:18 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    obj_input  =  [ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        QAInput ( 
							 
						 
					
						
							
								
									
										
										
										
											2022-11-08 09:26:47 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            doc_text = DOC_TEXT ,  questions = Question ( " Who counted the game among the best ever made? " ,  uid = " best_id_ever " ) 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-03 13:43:18 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    ] 
							 
						 
					
						
							
								
									
										
										
										
											2021-09-22 16:56:51 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								    result  =  bert_base_squad2 . inference_from_objects ( obj_input ,  return_json = False ) [ 0 ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    return  result 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								@pytest.fixture ( )  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								def  no_answer_inference_result ( bert_base_squad2 ,  caplog = None ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    if  caplog : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        caplog . set_level ( logging . CRITICAL ) 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-03 13:43:18 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    obj_input  =  [ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        QAInput ( 
							 
						 
					
						
							
								
									
										
										
										
											2022-11-08 09:26:47 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            doc_text = """ \
 
							 
						 
					
						
							
								
									
										
										
										
											2023-04-28 17:08:41 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								                The  majority  of  the  forest  is  contained  within  Brazil ,  with  60 %  of  the  rainforest ,  followed  by 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                Peru  with  13 % ,  Colombia  with  10 % ,  and  with  minor  amounts  in  Venezuela ,  Ecuador ,  Bolivia ,  Guyana , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                Suriname  and  French  Guiana .  States  or  departments  in  four  nations  contain  " Amazonas "  in  their  names . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                The  Amazon  represents  over  half  of  the  planet \' s remaining rainforests, and comprises the largest 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                and  most  biodiverse  tract  of  tropical  rainforest  in  the  world ,  with  an  estimated  390  billion  individual 
							 
						 
					
						
							
								
									
										
										
										
											2022-11-08 09:26:47 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								                trees  divided  into  16 , 000  species . """ , 
 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-03 13:43:18 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            questions = Question ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                " The Amazon represents less than half of the planets remaining what? " ,  uid = " best_id_ever " 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    ] 
							 
						 
					
						
							
								
									
										
										
										
											2021-09-22 16:56:51 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								    result  =  bert_base_squad2 . inference_from_objects ( obj_input ,  return_json = False ) [ 0 ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    return  result 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								def  test_inference_different_inputs ( bert_base_squad2 ) :  
						 
					
						
							
								
									
										
										
										
											2022-11-08 09:26:47 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    qa_format_1  =  [ { " questions " :  [ " Who counted the game among the best ever made? " ] ,  " text " :  DOC_TEXT } ] 
							 
						 
					
						
							
								
									
										
										
										
											2021-09-22 16:56:51 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								    q  =  Question ( text = " Who counted the game among the best ever made? " ) 
							 
						 
					
						
							
								
									
										
										
										
											2022-11-08 09:26:47 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    qa_format_2  =  QAInput ( questions = [ q ] ,  doc_text = DOC_TEXT ) 
							 
						 
					
						
							
								
									
										
										
										
											2021-09-22 16:56:51 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    result1  =  bert_base_squad2 . inference_from_dicts ( dicts = qa_format_1 ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    result2  =  bert_base_squad2 . inference_from_objects ( objects = [ qa_format_2 ] ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  result1  ==  result2 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								def  test_span_inference_result_ranking_by_confidence ( bert_base_squad2 ,  caplog = None ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    if  caplog : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        caplog . set_level ( logging . CRITICAL ) 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-03 13:43:18 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    obj_input  =  [ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        QAInput ( 
							 
						 
					
						
							
								
									
										
										
										
											2022-11-08 09:26:47 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            doc_text = DOC_TEXT ,  questions = Question ( " Who counted the game among the best ever made? " ,  uid = " best_id_ever " ) 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-03 13:43:18 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    ] 
							 
						 
					
						
							
								
									
										
										
										
											2021-09-22 16:56:51 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-04-21 11:24:39 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    # by default, result is sorted by confidence and not by score 
							 
						 
					
						
							
								
									
										
										
										
											2021-09-22 16:56:51 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								    result_ranked_by_confidence  =  bert_base_squad2 . inference_from_objects ( obj_input ,  return_json = False ) [ 0 ] 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-03 13:43:18 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    assert  all ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        result_ranked_by_confidence . prediction [ i ] . confidence  > =  result_ranked_by_confidence . prediction [ i  +  1 ] . confidence 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        for  i  in  range ( len ( result_ranked_by_confidence . prediction )  -  1 ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  not  all ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        result_ranked_by_confidence . prediction [ i ] . score  > =  result_ranked_by_confidence . prediction [ i  +  1 ] . score 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        for  i  in  range ( len ( result_ranked_by_confidence . prediction )  -  1 ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    ) 
							 
						 
					
						
							
								
									
										
										
										
											2021-09-22 16:56:51 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-04-21 11:24:39 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    # ranking can be adjusted so that result is sorted by score 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    bert_base_squad2 . model . prediction_heads [ 0 ] . use_confidence_scores_for_ranking  =  False 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    result_ranked_by_score  =  bert_base_squad2 . inference_from_objects ( obj_input ,  return_json = False ) [ 0 ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  all ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        result_ranked_by_score . prediction [ i ] . score  > =  result_ranked_by_score . prediction [ i  +  1 ] . score 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        for  i  in  range ( len ( result_ranked_by_score . prediction )  -  1 ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  not  all ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        result_ranked_by_score . prediction [ i ] . confidence  > =  result_ranked_by_score . prediction [ i  +  1 ] . confidence 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        for  i  in  range ( len ( result_ranked_by_score . prediction )  -  1 ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-09-22 16:56:51 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								def  test_inference_objs ( span_inference_result ,  caplog = None ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    if  caplog : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        caplog . set_level ( logging . CRITICAL ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  span_inference_result 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								def  test_span_performance ( span_inference_result ,  caplog = None ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    if  caplog : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        caplog . set_level ( logging . CRITICAL ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    best_pred  =  span_inference_result . prediction [ 0 ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  best_pred . answer  ==  " GameTrailers " 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    best_score_gold  =  13.4205 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    best_score  =  best_pred . score 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  isclose ( best_score ,  best_score_gold ,  rel_tol = 0.001 ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    no_answer_gap_gold  =  13.9827 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    no_answer_gap  =  span_inference_result . no_answer_gap 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  isclose ( no_answer_gap ,  no_answer_gap_gold ,  rel_tol = 0.001 ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								def  test_no_answer_performance ( no_answer_inference_result ,  caplog = None ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    if  caplog : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        caplog . set_level ( logging . CRITICAL ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    best_pred  =  no_answer_inference_result . prediction [ 0 ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  best_pred . answer  ==  " no_answer " 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    best_score_gold  =  12.1445 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    best_score  =  best_pred . score 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  isclose ( best_score ,  best_score_gold ,  rel_tol = 0.001 ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    no_answer_gap_gold  =  - 14.4646 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    no_answer_gap  =  no_answer_inference_result . no_answer_gap 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  isclose ( no_answer_gap ,  no_answer_gap_gold ,  rel_tol = 0.001 ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								def  test_qa_pred_attributes ( span_inference_result ,  caplog = None ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    if  caplog : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        caplog . set_level ( logging . CRITICAL ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    qa_pred  =  span_inference_result 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-03 13:43:18 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    attributes_gold  =  [ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        " aggregation_level " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        " answer_types " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        " context " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        " context_window_size " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        " ground_truth_answer " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        " id " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        " n_passages " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        " no_answer_gap " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        " prediction " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        " question " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        " to_json " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        " to_squad_eval " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        " token_offsets " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    ] 
							 
						 
					
						
							
								
									
										
										
										
											2021-09-22 16:56:51 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    for  ag  in  attributes_gold : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        assert  ag  in  dir ( qa_pred ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								def  test_qa_candidate_attributes ( span_inference_result ,  caplog = None ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    if  caplog : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        caplog . set_level ( logging . CRITICAL ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    qa_candidate  =  span_inference_result . prediction [ 0 ] 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-03 13:43:18 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    attributes_gold  =  [ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        " aggregation_level " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        " answer " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        " answer_support " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        " answer_type " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        " context_window " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        " n_passages_in_doc " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        " offset_answer_end " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        " offset_answer_start " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        " offset_answer_support_end " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        " offset_answer_support_start " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        " offset_context_window_end " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        " offset_context_window_start " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        " offset_unit " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        " passage_id " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        " probability " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        " score " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        " set_answer_string " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        " set_context_window " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        " to_doc_level " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        " to_list " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    ] 
							 
						 
					
						
							
								
									
										
										
										
											2021-09-22 16:56:51 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    for  ag  in  attributes_gold : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        assert  ag  in  dir ( qa_candidate ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								def  test_id ( span_inference_result ,  no_answer_inference_result ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  span_inference_result . id  ==  " best_id_ever " 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  no_answer_inference_result . id  ==  " best_id_ever " 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								def  test_duplicate_answer_filtering ( bert_base_squad2 ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    qa_input  =  [ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            " questions " :  [ " “In what country lies the Normandy?” " ] , 
							 
						 
					
						
							
								
									
										
										
										
											2023-04-28 17:08:41 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            " text " :  """ The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a region in France. They were descended from Norse ( \" Norman \"  comes from  \" Norseman \" ) 
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                raiders  and  pirates  from  Denmark ,  Iceland  and  Norway  who ,  under  their  leader  Rollo ,  agreed  to  swear  fealty  to  King  Charles  III  of  West  Francia .  Through  generations  of  assimilation  and  mixing  with  the  native  Frankish  and  Roman - Gaulish  populations ,  their  descendants  would  gradually  merge  with  the  Carolingian - based  cultures  of  West  Francia . 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-03 13:43:18 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								                The  distinct  cultural  and  ethnic  identity  of  the  Normans  emerged  initially  in  the  first  half  of  the  10 th  century ,  and  it  continued  to  evolve  over  the  succeeding  centuries .  Weird  things  happen  in  Normandy ,  France . """ , 
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    ] 
							 
						 
					
						
							
								
									
										
										
										
											2021-09-22 16:56:51 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    bert_base_squad2 . model . prediction_heads [ 0 ] . n_best  =  5 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    bert_base_squad2 . model . prediction_heads [ 0 ] . n_best_per_sample  =  5 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    bert_base_squad2 . model . prediction_heads [ 0 ] . duplicate_filtering  =  0 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    result  =  bert_base_squad2 . inference_from_dicts ( dicts = qa_input ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    offset_answer_starts  =  [ ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    offset_answer_ends  =  [ ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    for  answer  in  result [ 0 ] [ " predictions " ] [ 0 ] [ " answers " ] : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        offset_answer_starts . append ( answer [ " offset_answer_start " ] ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        offset_answer_ends . append ( answer [ " offset_answer_end " ] ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  len ( offset_answer_starts )  ==  len ( set ( offset_answer_starts ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  len ( offset_answer_ends )  ==  len ( set ( offset_answer_ends ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								def  test_no_duplicate_answer_filtering ( bert_base_squad2 ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    qa_input  =  [ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            " questions " :  [ " “In what country lies the Normandy?” " ] , 
							 
						 
					
						
							
								
									
										
										
										
											2023-04-28 17:08:41 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            " text " :  """ The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a region in France. They were descended from Norse ( \" Norman \"  comes from  \" Norseman \" ) 
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    raiders  and  pirates  from  Denmark ,  Iceland  and  Norway  who ,  under  their  leader  Rollo ,  agreed  to  swear  fealty  to  King  Charles  III  of  West  Francia .  Through  generations  of  assimilation  and  mixing  with  the  native  Frankish  and  Roman - Gaulish  populations ,  their  descendants  would  gradually  merge  with  the  Carolingian - based  cultures  of  West  Francia . 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-03 13:43:18 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								                    The  distinct  cultural  and  ethnic  identity  of  the  Normans  emerged  initially  in  the  first  half  of  the  10 th  century ,  and  it  continued  to  evolve  over  the  succeeding  centuries .  Weird  things  happen  in  Normandy ,  France . """ , 
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    ] 
							 
						 
					
						
							
								
									
										
										
										
											2021-09-22 16:56:51 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    bert_base_squad2 . model . prediction_heads [ 0 ] . n_best  =  5 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    bert_base_squad2 . model . prediction_heads [ 0 ] . n_best_per_sample  =  5 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    bert_base_squad2 . model . prediction_heads [ 0 ] . duplicate_filtering  =  - 1 
							 
						 
					
						
							
								
									
										
										
										
											2022-04-21 11:24:39 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    bert_base_squad2 . model . prediction_heads [ 0 ] . no_ans_boost  =  - 100.0 
							 
						 
					
						
							
								
									
										
										
										
											2021-09-22 16:56:51 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    result  =  bert_base_squad2 . inference_from_dicts ( dicts = qa_input ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    offset_answer_starts  =  [ ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    offset_answer_ends  =  [ ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    for  answer  in  result [ 0 ] [ " predictions " ] [ 0 ] [ " answers " ] : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        offset_answer_starts . append ( answer [ " offset_answer_start " ] ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        offset_answer_ends . append ( answer [ " offset_answer_end " ] ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  len ( offset_answer_starts )  !=  len ( set ( offset_answer_starts ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  len ( offset_answer_ends )  !=  len ( set ( offset_answer_ends ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								def  test_range_duplicate_answer_filtering ( bert_base_squad2 ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    qa_input  =  [ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            " questions " :  [ " “In what country lies the Normandy?” " ] , 
							 
						 
					
						
							
								
									
										
										
										
											2023-04-28 17:08:41 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            " text " :  """ The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a region in France. They were descended from Norse ( \" Norman \"  comes from  \" Norseman \" ) 
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    raiders  and  pirates  from  Denmark ,  Iceland  and  Norway  who ,  under  their  leader  Rollo ,  agreed  to  swear  fealty  to  King  Charles  III  of  West  Francia .  Through  generations  of  assimilation  and  mixing  with  the  native  Frankish  and  Roman - Gaulish  populations ,  their  descendants  would  gradually  merge  with  the  Carolingian - based  cultures  of  West  Francia . 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-03 13:43:18 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								                    The  distinct  cultural  and  ethnic  identity  of  the  Normans  emerged  initially  in  the  first  half  of  the  10 th  century ,  and  it  continued  to  evolve  over  the  succeeding  centuries .  Weird  things  happen  in  Normandy ,  France . """ , 
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    ] 
							 
						 
					
						
							
								
									
										
										
										
											2021-09-22 16:56:51 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    bert_base_squad2 . model . prediction_heads [ 0 ] . n_best  =  5 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    bert_base_squad2 . model . prediction_heads [ 0 ] . n_best_per_sample  =  5 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    bert_base_squad2 . model . prediction_heads [ 0 ] . duplicate_filtering  =  5 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    result  =  bert_base_squad2 . inference_from_dicts ( dicts = qa_input ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    offset_answer_starts  =  [ ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    offset_answer_ends  =  [ ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    for  answer  in  result [ 0 ] [ " predictions " ] [ 0 ] [ " answers " ] : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        offset_answer_starts . append ( answer [ " offset_answer_start " ] ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        offset_answer_ends . append ( answer [ " offset_answer_end " ] ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    offset_answer_starts . sort ( ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    offset_answer_starts . remove ( 0 ) 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-03 13:43:18 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    distances_answer_starts  =  [ j  -  i  for  i ,  j  in  zip ( offset_answer_starts [ : - 1 ] ,  offset_answer_starts [ 1 : ] ) ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  all ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        distance  >  bert_base_squad2 . model . prediction_heads [ 0 ] . duplicate_filtering 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        for  distance  in  distances_answer_starts 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    ) 
							 
						 
					
						
							
								
									
										
										
										
											2021-09-22 16:56:51 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    offset_answer_ends . sort ( ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    offset_answer_ends . remove ( 0 ) 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-03 13:43:18 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    distances_answer_ends  =  [ j  -  i  for  i ,  j  in  zip ( offset_answer_ends [ : - 1 ] ,  offset_answer_ends [ 1 : ] ) ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  all ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        distance  >  bert_base_squad2 . model . prediction_heads [ 0 ] . duplicate_filtering  for  distance  in  distances_answer_ends 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    ) 
							 
						 
					
						
							
								
									
										
										
										
											2021-09-22 16:56:51 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								def  test_qa_confidence ( ) :  
						 
					
						
							
								
									
										
										
										
											2022-02-03 13:43:18 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    inferencer  =  QAInferencer . load ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        " deepset/roberta-base-squad2 " ,  task_type = " question_answering " ,  batch_size = 40 ,  gpu = True 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    ) 
							 
						 
					
						
							
								
									
										
										
										
											2021-09-22 16:56:51 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								    QA_input  =  [ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            " questions " :  [ " Who counted the game among the best ever made? " ] , 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-03 13:43:18 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            " text " :  " Twilight Princess was released to universal critical acclaim and commercial success. It received perfect scores from major publications such as 1UP.com, Computer and Video Games, Electronic Gaming Monthly, Game Informer, GamesRadar, and GameSpy. On the review aggregators GameRankings and Metacritic, Twilight Princess has average scores of 95 % a nd 95 for the Wii version and scores of 95 % a nd 96 for the GameCube version. GameTrailers in their review called it one of the greatest games ever created. " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    ] 
							 
						 
					
						
							
								
									
										
										
										
											2021-09-22 16:56:51 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								    result  =  inferencer . inference_from_dicts ( dicts = QA_input ,  return_json = False ) [ 0 ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  np . isclose ( result . prediction [ 0 ] . confidence ,  0.990427553653717 ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    assert  result . prediction [ 0 ] . answer  ==  " GameTrailers "