Address reviewer comments

This commit is contained in:
Branden Chan 2020-10-27 12:41:11 +01:00
parent d5cb227909
commit 7c81dfdc3a
3 changed files with 12 additions and 7 deletions

View File

@ -10,10 +10,10 @@
"Speed (passages/sec)"
],
"data": [
{
"F1": 0.7384645069741224,
"Speed": 4239.284314014953,
"Model": "MiniLM"
}
{"F1": 80.67985794671885, "Model": "RoBERTa", "Speed": 92.3039712094936},
{"F1": 78.23306265318686, "Model": "MiniLM", "Speed": 98.62387044489223},
{"F1": 74.90271600053505, "Model": "BERT base", "Speed": 99.92750782409666},
{"F1": 82.64545708097472, "Model": "BERT large", "Speed": 39.529824033964466},
{"F1": 85.26275190954586, "Model": "XLM-RoBERTa", "Speed": 39.29142006004379}
]
}

View File

@ -19,12 +19,13 @@ from haystack.file_converter.txt import TextConverter
logger = logging.getLogger(__name__)
def eval_data_from_file(filename: str, n_docs: Union[int, bool]=None) -> Tuple[List[Document], List[Label]]:
def eval_data_from_file(filename: str, max_docs: Union[int, bool]=None) -> Tuple[List[Document], List[Label]]:
"""
Read Documents + Labels from a SQuAD-style file.
Document and Labels can then be indexed to the DocumentStore and be used for evaluation.
:param filename: Path to file in SQuAD format
:param max_docs: This sets the number of documents that will be loaded. By default, this is set to None, thus reading in all available eval documents.
:return: (List of Documents, List of Labels)
"""
docs = []

View File

@ -1,2 +1,6 @@
,EM,f1,top_n_accuracy,top_n,reader_time,seconds_per_query,passages_per_second,reader,error
0,0.7067307692307693,0.7384645069741224,0.9567307692307693,5,2.9132275839983777,0.014005901846146047,4239.2843140149525,deepset/minilm-uncased-squad2,
0,0.7589752233271532,0.8067985794671885,0.9671329849991572,5,133.79706027999998,0.011275666634080564,92.30397120949361,deepset/roberta-base-squad2,
1,0.7359683128265633,0.7823306265318686,0.9714309792684982,5,125.22323393199997,0.010553112584864317,98.62387044489225,deepset/minilm-uncased-squad2,
2,0.700825889094893,0.7490271600053505,0.9585369964604753,5,123.58959278499992,0.010415438461570867,99.92750782409666,deepset/bert-base-cased-squad2,
3,0.7821506826226192,0.8264545708097472,0.9762346199224675,5,312.42233685099995,0.026329204184308102,39.529824033964466,deepset/bert-large-uncased-whole-word-masking-squad2,
4,0.8099612337771785,0.8526275190954586,0.9772459126917242,5,314.3179854819998,0.026488958830439897,39.29142006004379,deepset/xlm-roberta-large-squad2,
1 EM f1 top_n_accuracy top_n reader_time seconds_per_query passages_per_second reader error
2 0 0.7067307692307693 0.7589752233271532 0.7384645069741224 0.8067985794671885 0.9567307692307693 0.9671329849991572 5 2.9132275839983777 133.79706027999998 0.014005901846146047 0.011275666634080564 4239.2843140149525 92.30397120949361 deepset/minilm-uncased-squad2 deepset/roberta-base-squad2
3 1 0.7359683128265633 0.7823306265318686 0.9714309792684982 5 125.22323393199997 0.010553112584864317 98.62387044489225 deepset/minilm-uncased-squad2
4 2 0.700825889094893 0.7490271600053505 0.9585369964604753 5 123.58959278499992 0.010415438461570867 99.92750782409666 deepset/bert-base-cased-squad2
5 3 0.7821506826226192 0.8264545708097472 0.9762346199224675 5 312.42233685099995 0.026329204184308102 39.529824033964466 deepset/bert-large-uncased-whole-word-masking-squad2
6 4 0.8099612337771785 0.8526275190954586 0.9772459126917242 5 314.3179854819998 0.026488958830439897 39.29142006004379 deepset/xlm-roberta-large-squad2