From 7c81dfdc3af276d9472bca761acf697cf1daed66 Mon Sep 17 00:00:00 2001 From: Branden Chan Date: Tue, 27 Oct 2020 12:41:11 +0100 Subject: [PATCH] Address reviewer comments --- docs/_src/benchmarks/reader_performance.json | 10 +++++----- haystack/preprocessor/utils.py | 3 ++- test/benchmarks/reader_results.csv | 6 +++++- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/docs/_src/benchmarks/reader_performance.json b/docs/_src/benchmarks/reader_performance.json index fc8d6fea5..41595406f 100644 --- a/docs/_src/benchmarks/reader_performance.json +++ b/docs/_src/benchmarks/reader_performance.json @@ -10,10 +10,10 @@ "Speed (passages/sec)" ], "data": [ - { - "F1": 0.7384645069741224, - "Speed": 4239.284314014953, - "Model": "MiniLM" - } + {"F1": 80.67985794671885, "Model": "RoBERTa", "Speed": 92.3039712094936}, + {"F1": 78.23306265318686, "Model": "MiniLM", "Speed": 98.62387044489223}, + {"F1": 74.90271600053505, "Model": "BERT base", "Speed": 99.92750782409666}, + {"F1": 82.64545708097472, "Model": "BERT large", "Speed": 39.529824033964466}, + {"F1": 85.26275190954586, "Model": "XLM-RoBERTa", "Speed": 39.29142006004379} ] } \ No newline at end of file diff --git a/haystack/preprocessor/utils.py b/haystack/preprocessor/utils.py index 9f4c6851c..06a2ad407 100644 --- a/haystack/preprocessor/utils.py +++ b/haystack/preprocessor/utils.py @@ -19,12 +19,13 @@ from haystack.file_converter.txt import TextConverter logger = logging.getLogger(__name__) -def eval_data_from_file(filename: str, n_docs: Union[int, bool]=None) -> Tuple[List[Document], List[Label]]: +def eval_data_from_file(filename: str, max_docs: Union[int, bool]=None) -> Tuple[List[Document], List[Label]]: """ Read Documents + Labels from a SQuAD-style file. Document and Labels can then be indexed to the DocumentStore and be used for evaluation. :param filename: Path to file in SQuAD format + :param max_docs: This sets the number of documents that will be loaded. By default, this is set to None, thus reading in all available eval documents. :return: (List of Documents, List of Labels) """ docs = [] diff --git a/test/benchmarks/reader_results.csv b/test/benchmarks/reader_results.csv index 5a03edcc1..5fc081050 100644 --- a/test/benchmarks/reader_results.csv +++ b/test/benchmarks/reader_results.csv @@ -1,2 +1,6 @@ ,EM,f1,top_n_accuracy,top_n,reader_time,seconds_per_query,passages_per_second,reader,error -0,0.7067307692307693,0.7384645069741224,0.9567307692307693,5,2.9132275839983777,0.014005901846146047,4239.2843140149525,deepset/minilm-uncased-squad2, +0,0.7589752233271532,0.8067985794671885,0.9671329849991572,5,133.79706027999998,0.011275666634080564,92.30397120949361,deepset/roberta-base-squad2, +1,0.7359683128265633,0.7823306265318686,0.9714309792684982,5,125.22323393199997,0.010553112584864317,98.62387044489225,deepset/minilm-uncased-squad2, +2,0.700825889094893,0.7490271600053505,0.9585369964604753,5,123.58959278499992,0.010415438461570867,99.92750782409666,deepset/bert-base-cased-squad2, +3,0.7821506826226192,0.8264545708097472,0.9762346199224675,5,312.42233685099995,0.026329204184308102,39.529824033964466,deepset/bert-large-uncased-whole-word-masking-squad2, +4,0.8099612337771785,0.8526275190954586,0.9772459126917242,5,314.3179854819998,0.026488958830439897,39.29142006004379,deepset/xlm-roberta-large-squad2, \ No newline at end of file