From 029d1b75f2cb0706e9aa1a32b792ecf3106f53a9 Mon Sep 17 00:00:00 2001 From: Malte Pietsch Date: Fri, 2 Oct 2020 13:41:33 +0200 Subject: [PATCH] Update docstring in DPR for embed_title (#459) --- haystack/retriever/dense.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/haystack/retriever/dense.py b/haystack/retriever/dense.py index 64471f07d..410f33349 100644 --- a/haystack/retriever/dense.py +++ b/haystack/retriever/dense.py @@ -50,7 +50,12 @@ class DensePassageRetriever(BaseRetriever): :param max_seq_len: Longest length of each sequence :param use_gpu: Whether to use gpu or not :param batch_size: Number of questions or passages to encode at once - :param embed_title: Whether to concatenate title and passage to a text pair that is then used to create the embedding + :param embed_title: Whether to concatenate title and passage to a text pair that is then used to create the embedding. + This is the approach used in the original paper and is likely to improve performance if your + titles contain meaningful information for retrieval (topic, entities etc.) . + The title is expected to be present in doc.meta["name"] and can be supplied in the documents + before writing them to the DocumentStore like this: + {"text": "my text", "meta": {"name": "my title"}}. :param remove_sep_tok_from_untitled_passages: If embed_title is ``True``, there are different strategies to deal with documents that don't have a title. If this param is ``True`` => Embed passage as single text, similar to embed_title = False (i.e [CLS] passage_tok1 ... [SEP]). If this param is ``False`` => Embed passage as text pair with empty title (i.e. [CLS] [SEP] passage_tok1 ... [SEP])