Address reviewer comments

This commit is contained in:
Branden Chan 2020-10-27 12:41:11 +01:00
parent d5cb227909
commit 7c81dfdc3a
3 changed files with 12 additions and 7 deletions

View File

@ -10,10 +10,10 @@
"Speed (passages/sec)" "Speed (passages/sec)"
], ],
"data": [ "data": [
{ {"F1": 80.67985794671885, "Model": "RoBERTa", "Speed": 92.3039712094936},
"F1": 0.7384645069741224, {"F1": 78.23306265318686, "Model": "MiniLM", "Speed": 98.62387044489223},
"Speed": 4239.284314014953, {"F1": 74.90271600053505, "Model": "BERT base", "Speed": 99.92750782409666},
"Model": "MiniLM" {"F1": 82.64545708097472, "Model": "BERT large", "Speed": 39.529824033964466},
} {"F1": 85.26275190954586, "Model": "XLM-RoBERTa", "Speed": 39.29142006004379}
] ]
} }

View File

@ -19,12 +19,13 @@ from haystack.file_converter.txt import TextConverter
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def eval_data_from_file(filename: str, n_docs: Union[int, bool]=None) -> Tuple[List[Document], List[Label]]: def eval_data_from_file(filename: str, max_docs: Union[int, bool]=None) -> Tuple[List[Document], List[Label]]:
""" """
Read Documents + Labels from a SQuAD-style file. Read Documents + Labels from a SQuAD-style file.
Document and Labels can then be indexed to the DocumentStore and be used for evaluation. Document and Labels can then be indexed to the DocumentStore and be used for evaluation.
:param filename: Path to file in SQuAD format :param filename: Path to file in SQuAD format
:param max_docs: This sets the number of documents that will be loaded. By default, this is set to None, thus reading in all available eval documents.
:return: (List of Documents, List of Labels) :return: (List of Documents, List of Labels)
""" """
docs = [] docs = []

View File

@ -1,2 +1,6 @@
,EM,f1,top_n_accuracy,top_n,reader_time,seconds_per_query,passages_per_second,reader,error ,EM,f1,top_n_accuracy,top_n,reader_time,seconds_per_query,passages_per_second,reader,error
0,0.7067307692307693,0.7384645069741224,0.9567307692307693,5,2.9132275839983777,0.014005901846146047,4239.2843140149525,deepset/minilm-uncased-squad2, 0,0.7589752233271532,0.8067985794671885,0.9671329849991572,5,133.79706027999998,0.011275666634080564,92.30397120949361,deepset/roberta-base-squad2,
1,0.7359683128265633,0.7823306265318686,0.9714309792684982,5,125.22323393199997,0.010553112584864317,98.62387044489225,deepset/minilm-uncased-squad2,
2,0.700825889094893,0.7490271600053505,0.9585369964604753,5,123.58959278499992,0.010415438461570867,99.92750782409666,deepset/bert-base-cased-squad2,
3,0.7821506826226192,0.8264545708097472,0.9762346199224675,5,312.42233685099995,0.026329204184308102,39.529824033964466,deepset/bert-large-uncased-whole-word-masking-squad2,
4,0.8099612337771785,0.8526275190954586,0.9772459126917242,5,314.3179854819998,0.026488958830439897,39.29142006004379,deepset/xlm-roberta-large-squad2,
1 EM f1 top_n_accuracy top_n reader_time seconds_per_query passages_per_second reader error
2 0 0.7067307692307693 0.7589752233271532 0.7384645069741224 0.8067985794671885 0.9567307692307693 0.9671329849991572 5 2.9132275839983777 133.79706027999998 0.014005901846146047 0.011275666634080564 4239.2843140149525 92.30397120949361 deepset/minilm-uncased-squad2 deepset/roberta-base-squad2
3 1 0.7359683128265633 0.7823306265318686 0.9714309792684982 5 125.22323393199997 0.010553112584864317 98.62387044489225 deepset/minilm-uncased-squad2
4 2 0.700825889094893 0.7490271600053505 0.9585369964604753 5 123.58959278499992 0.010415438461570867 99.92750782409666 deepset/bert-base-cased-squad2
5 3 0.7821506826226192 0.8264545708097472 0.9762346199224675 5 312.42233685099995 0.026329204184308102 39.529824033964466 deepset/bert-large-uncased-whole-word-masking-squad2
6 4 0.8099612337771785 0.8526275190954586 0.9772459126917242 5 314.3179854819998 0.026488958830439897 39.29142006004379 deepset/xlm-roberta-large-squad2