mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-11-02 02:39:51 +00:00
Fix docstring examples (#604)
* Fix docstring examples * Unify code example format * Add md files
This commit is contained in:
parent
3dee284f20
commit
ae530c3a41
@ -62,6 +62,7 @@ more performant with DPR embeddings. 'cosine' is recommended if you are using a
|
||||
#### write\_documents
|
||||
|
||||
```python
|
||||
| @abstractmethod
|
||||
| write_documents(documents: Union[List[dict], List[Document]], index: Optional[str] = None)
|
||||
```
|
||||
|
||||
@ -277,7 +278,7 @@ Indexes documents for later queries.
|
||||
- `documents`: a list of Python dictionaries or a list of Haystack Document objects.
|
||||
For documents as dictionaries, the format is {"text": "<the-actual-text>"}.
|
||||
Optionally: Include meta data via {"text": "<the-actual-text>",
|
||||
"meta":{"name": "<some-document-name>, "author": "somebody", ...}}
|
||||
"meta": {"name": "<some-document-name>, "author": "somebody", ...}}
|
||||
It can be used for filtering and is accessible in the responses of the Finder.
|
||||
- `index`: add an optional index attribute to documents. It can be later used for filtering. For instance,
|
||||
documents for evaluation can be indexed in a separate index than the documents for search.
|
||||
|
||||
@ -5,14 +5,16 @@
|
||||
## TextConverter Objects
|
||||
|
||||
```python
|
||||
class TextConverter(BaseConverter)
|
||||
class BaseConverter()
|
||||
```
|
||||
|
||||
<a name="txt.TextConverter.__init__"></a>
|
||||
Base class for implementing file converts to transform input documents to text format for ingestion in DocumentStore.
|
||||
|
||||
<a name="base.BaseConverter.__init__"></a>
|
||||
#### \_\_init\_\_
|
||||
|
||||
```python
|
||||
| __init__(remove_numeric_tables: Optional[bool] = False, valid_languages: Optional[List[str]] = None)
|
||||
| __init__(remove_numeric_tables: Optional[bool] = None, valid_languages: Optional[List[str]] = None)
|
||||
```
|
||||
|
||||
**Arguments**:
|
||||
@ -28,24 +30,57 @@ This option can be used to add test for encoding errors. If the extracted text i
|
||||
not one of the valid languages, then it might likely be encoding error resulting
|
||||
in garbled text.
|
||||
|
||||
<a name="txt.TextConverter.convert"></a>
|
||||
<a name="base.BaseConverter.convert"></a>
|
||||
#### convert
|
||||
|
||||
```python
|
||||
| convert(file_path: Path, meta: Optional[Dict[str, str]] = None, encoding: str = "utf-8") -> Dict[str, Any]
|
||||
| @abstractmethod
|
||||
| convert(file_path: Path, meta: Optional[Dict[str, str]]) -> Dict[str, Any]
|
||||
```
|
||||
|
||||
Reads text from a txt file and executes optional preprocessing steps.
|
||||
Convert a file to a dictionary containing the text and any associated meta data.
|
||||
|
||||
File converters may extract file meta like name or size. In addition to it, user
|
||||
supplied meta data like author, url, external IDs can be supplied as a dictionary.
|
||||
|
||||
**Arguments**:
|
||||
|
||||
- `file_path`: Path of the file to convert
|
||||
- `meta`: Optional meta data that should be associated with the the document (e.g. name)
|
||||
- `encoding`: Encoding of the file
|
||||
- `file_path`: path of the file to convert
|
||||
- `meta`: dictionary of meta data key-value pairs to append in the returned document.
|
||||
|
||||
**Returns**:
|
||||
<a name="base.BaseConverter.validate_language"></a>
|
||||
#### validate\_language
|
||||
|
||||
Dict of format {"text": "The text from file", "meta": meta}}
|
||||
```python
|
||||
| validate_language(text: str) -> bool
|
||||
```
|
||||
|
||||
Validate if the language of the text is one of valid languages.
|
||||
|
||||
<a name="docx"></a>
|
||||
# docx
|
||||
|
||||
<a name="docx.DocxToTextConverter"></a>
|
||||
## DocxToTextConverter
|
||||
|
||||
```python
|
||||
class DocxToTextConverter(BaseConverter)
|
||||
```
|
||||
|
||||
<a name="docx.DocxToTextConverter.convert"></a>
|
||||
#### convert
|
||||
|
||||
```python
|
||||
| convert(file_path: Path, meta: Optional[Dict[str, str]] = None) -> Dict[str, Any]
|
||||
```
|
||||
|
||||
Extract text from a .docx file.
|
||||
Note: As docx doesn't contain "page" information, we actually extract and return a list of paragraphs here.
|
||||
For compliance with other converters we nevertheless opted for keeping the methods name.
|
||||
|
||||
**Arguments**:
|
||||
|
||||
- `file_path`: Path to the .docx file you want to convert
|
||||
|
||||
<a name="docx"></a>
|
||||
# Module docx
|
||||
@ -125,16 +160,14 @@ a list of pages and the extracted meta data of the file.
|
||||
## BaseConverter Objects
|
||||
|
||||
```python
|
||||
class BaseConverter()
|
||||
class TextConverter(BaseConverter)
|
||||
```
|
||||
|
||||
Base class for implementing file converts to transform input documents to text format for ingestion in DocumentStore.
|
||||
|
||||
<a name="base.BaseConverter.__init__"></a>
|
||||
<a name="txt.TextConverter.__init__"></a>
|
||||
#### \_\_init\_\_
|
||||
|
||||
```python
|
||||
| __init__(remove_numeric_tables: Optional[bool] = None, valid_languages: Optional[List[str]] = None)
|
||||
| __init__(remove_numeric_tables: Optional[bool] = False, valid_languages: Optional[List[str]] = None)
|
||||
```
|
||||
|
||||
**Arguments**:
|
||||
@ -150,32 +183,24 @@ This option can be used to add test for encoding errors. If the extracted text i
|
||||
not one of the valid languages, then it might likely be encoding error resulting
|
||||
in garbled text.
|
||||
|
||||
<a name="base.BaseConverter.convert"></a>
|
||||
<a name="txt.TextConverter.convert"></a>
|
||||
#### convert
|
||||
|
||||
```python
|
||||
| @abstractmethod
|
||||
| convert(file_path: Path, meta: Optional[Dict[str, str]]) -> Dict[str, Any]
|
||||
| convert(file_path: Path, meta: Optional[Dict[str, str]] = None, encoding: str = "utf-8") -> Dict[str, Any]
|
||||
```
|
||||
|
||||
Convert a file to a dictionary containing the text and any associated meta data.
|
||||
|
||||
File converters may extract file meta like name or size. In addition to it, user
|
||||
supplied meta data like author, url, external IDs can be supplied as a dictionary.
|
||||
Reads text from a txt file and executes optional preprocessing steps.
|
||||
|
||||
**Arguments**:
|
||||
|
||||
- `file_path`: path of the file to convert
|
||||
- `meta`: dictionary of meta data key-value pairs to append in the returned document.
|
||||
- `file_path`: Path of the file to convert
|
||||
- `meta`: Optional meta data that should be associated with the the document (e.g. name)
|
||||
- `encoding`: Encoding of the file
|
||||
|
||||
<a name="base.BaseConverter.validate_language"></a>
|
||||
#### validate\_language
|
||||
**Returns**:
|
||||
|
||||
```python
|
||||
| validate_language(text: str) -> bool
|
||||
```
|
||||
|
||||
Validate if the language of the text is one of valid languages.
|
||||
Dict of format {"text": "The text from file", "meta": meta}}
|
||||
|
||||
<a name="pdf"></a>
|
||||
# Module pdf
|
||||
|
||||
@ -1,3 +1,6 @@
|
||||
<a name="base"></a>
|
||||
# base
|
||||
|
||||
<a name="farm"></a>
|
||||
# Module farm
|
||||
|
||||
@ -146,20 +149,21 @@ Use loaded QA model to find answers for a question in the supplied list of Docum
|
||||
|
||||
Returns dictionaries containing answers sorted by (desc.) probability.
|
||||
Example:
|
||||
|
||||
{'question': 'Who is the father of Arya Stark?',
|
||||
'answers': [
|
||||
{'answer': 'Eddard,',
|
||||
'context': " She travels with her father, Eddard, to King's Landing when he is ",
|
||||
'offset_answer_start': 147,
|
||||
'offset_answer_end': 154,
|
||||
'probability': 0.9787139466668613,
|
||||
'score': None,
|
||||
'document_id': '1337'
|
||||
},
|
||||
...
|
||||
]
|
||||
}
|
||||
```python
|
||||
|{
|
||||
| 'question': 'Who is the father of Arya Stark?',
|
||||
| 'answers':[
|
||||
| {'answer': 'Eddard,',
|
||||
| 'context': " She travels with her father, Eddard, to King's Landing when he is ",
|
||||
| 'offset_answer_start': 147,
|
||||
| 'offset_answer_end': 154,
|
||||
| 'probability': 0.9787139466668613,
|
||||
| 'score': None,
|
||||
| 'document_id': '1337'
|
||||
| },...
|
||||
| ]
|
||||
|}
|
||||
```
|
||||
|
||||
**Arguments**:
|
||||
|
||||
@ -223,20 +227,21 @@ Returns a dict containing the following metrics:
|
||||
Use loaded QA model to find answers for a question in the supplied list of Document.
|
||||
Returns dictionaries containing answers sorted by (desc.) probability.
|
||||
Example:
|
||||
|
||||
{
|
||||
'question': 'Who is the father of Arya Stark?',
|
||||
'answers':[
|
||||
{'answer': 'Eddard,',
|
||||
'context': " She travels with her father, Eddard, to King's Landing when he is ",
|
||||
'offset_answer_start': 147,
|
||||
'offset_answer_end': 154,
|
||||
'probability': 0.9787139466668613,
|
||||
'score': None,
|
||||
'document_id': '1337'
|
||||
},...
|
||||
]
|
||||
}
|
||||
```python
|
||||
|{
|
||||
| 'question': 'Who is the father of Arya Stark?',
|
||||
| 'answers':[
|
||||
| {'answer': 'Eddard,',
|
||||
| 'context': " She travels with her father, Eddard, to King's Landing when he is ",
|
||||
| 'offset_answer_start': 147,
|
||||
| 'offset_answer_end': 154,
|
||||
| 'probability': 0.9787139466668613,
|
||||
| 'score': None,
|
||||
| 'document_id': '1337'
|
||||
| },...
|
||||
| ]
|
||||
|}
|
||||
```
|
||||
|
||||
**Arguments**:
|
||||
|
||||
@ -343,19 +348,21 @@ Use loaded QA model to find answers for a question in the supplied list of Docum
|
||||
Returns dictionaries containing answers sorted by (desc.) probability.
|
||||
Example:
|
||||
|
||||
{'question': 'Who is the father of Arya Stark?',
|
||||
'answers': [
|
||||
{'answer': 'Eddard,',
|
||||
'context': " She travels with her father, Eddard, to King's Landing when he is ",
|
||||
'offset_answer_start': 147,
|
||||
'offset_answer_end': 154,
|
||||
'probability': 0.9787139466668613,
|
||||
'score': None,
|
||||
'document_id': '1337'
|
||||
},
|
||||
...
|
||||
]
|
||||
}
|
||||
```python
|
||||
|{
|
||||
| 'question': 'Who is the father of Arya Stark?',
|
||||
| 'answers':[
|
||||
| {'answer': 'Eddard,',
|
||||
| 'context': " She travels with her father, Eddard, to King's Landing when he is ",
|
||||
| 'offset_answer_start': 147,
|
||||
| 'offset_answer_end': 154,
|
||||
| 'probability': 0.9787139466668613,
|
||||
| 'score': None,
|
||||
| 'document_id': '1337'
|
||||
| },...
|
||||
| ]
|
||||
|}
|
||||
```
|
||||
|
||||
**Arguments**:
|
||||
|
||||
|
||||
@ -1,3 +1,65 @@
|
||||
<a name="base"></a>
|
||||
# base
|
||||
|
||||
<a name="base.BaseRetriever"></a>
|
||||
## BaseRetriever
|
||||
|
||||
```python
|
||||
class BaseRetriever(ABC)
|
||||
```
|
||||
|
||||
<a name="base.BaseRetriever.retrieve"></a>
|
||||
#### retrieve
|
||||
|
||||
```python
|
||||
| @abstractmethod
|
||||
| retrieve(query: str, filters: dict = None, top_k: int = 10, index: str = None) -> List[Document]
|
||||
```
|
||||
|
||||
Scan through documents in DocumentStore and return a small number documents
|
||||
that are most relevant to the query.
|
||||
|
||||
**Arguments**:
|
||||
|
||||
- `query`: The query
|
||||
- `filters`: A dictionary where the keys specify a metadata field and the value is a list of accepted values for that field
|
||||
- `top_k`: How many documents to return per query.
|
||||
- `index`: The name of the index in the DocumentStore from which to retrieve documents
|
||||
|
||||
<a name="base.BaseRetriever.eval"></a>
|
||||
#### eval
|
||||
|
||||
```python
|
||||
| eval(label_index: str = "label", doc_index: str = "eval_document", label_origin: str = "gold_label", top_k: int = 10, open_domain: bool = False, return_preds: bool = False) -> dict
|
||||
```
|
||||
|
||||
Performs evaluation on the Retriever.
|
||||
Retriever is evaluated based on whether it finds the correct document given the question string and at which
|
||||
position in the ranking of documents the correct document is.
|
||||
|
||||
| Returns a dict containing the following metrics:
|
||||
|
||||
- "recall": Proportion of questions for which correct document is among retrieved documents
|
||||
- "mrr": Mean of reciprocal rank. Rewards retrievers that give relevant documents a higher rank.
|
||||
Only considers the highest ranked relevant document.
|
||||
- "map": Mean of average precision for each question. Rewards retrievers that give relevant
|
||||
documents a higher rank. Considers all retrieved relevant documents. If ``open_domain=True``,
|
||||
average precision is normalized by the number of retrieved relevant documents per query.
|
||||
If ``open_domain=False``, average precision is normalized by the number of all relevant documents
|
||||
per query.
|
||||
|
||||
**Arguments**:
|
||||
|
||||
- `label_index`: Index/Table in DocumentStore where labeled questions are stored
|
||||
- `doc_index`: Index/Table in DocumentStore where documents that are used for evaluation are stored
|
||||
- `top_k`: How many documents to return per question
|
||||
- `open_domain`: If ``True``, retrieval will be evaluated by checking if the answer string to a question is
|
||||
contained in the retrieved docs (common approach in open-domain QA).
|
||||
If ``False``, retrieval uses a stricter evaluation that checks if the retrieved document ids
|
||||
are within ids explicitly stated in the labels.
|
||||
- `return_preds`: Whether to add predictions in the returned dictionary. If True, the returned dictionary
|
||||
contains the keys "predictions" and "metrics".
|
||||
|
||||
<a name="sparse"></a>
|
||||
# Module sparse
|
||||
|
||||
@ -27,28 +89,28 @@ names must match with the filters dict supplied in self.retrieve().
|
||||
|
||||
**An example custom_query:**
|
||||
```python
|
||||
{
|
||||
> "size": 10,
|
||||
> "query": {
|
||||
> "bool": {
|
||||
> "should": [{"multi_match": {
|
||||
> "query": "${question}", // mandatory $question placeholder
|
||||
> "type": "most_fields",
|
||||
> "fields": ["text", "title"]}}],
|
||||
> "filter": [ // optional custom filters
|
||||
> {"terms": {"year": "${years}"}},
|
||||
> {"terms": {"quarter": "${quarters}"}},
|
||||
> {"range": {"date": {"gte": "${date}"}}}
|
||||
> ],
|
||||
> }
|
||||
> },
|
||||
}
|
||||
| {
|
||||
| "size": 10,
|
||||
| "query": {
|
||||
| "bool": {
|
||||
| "should": [{"multi_match": {
|
||||
| "query": "${question}", // mandatory $question placeholder
|
||||
| "type": "most_fields",
|
||||
| "fields": ["text", "title"]}}],
|
||||
| "filter": [ // optional custom filters
|
||||
| {"terms": {"year": "${years}"}},
|
||||
| {"terms": {"quarter": "${quarters}"}},
|
||||
| {"range": {"date": {"gte": "${date}"}}}
|
||||
| ],
|
||||
| }
|
||||
| },
|
||||
| }
|
||||
```
|
||||
|
||||
**For this custom_query, a sample retrieve() could be:**
|
||||
```python
|
||||
self.retrieve(query="Why did the revenue increase?",
|
||||
> filters={"years": ["2019"], "quarters": ["Q1", "Q2"]})
|
||||
| self.retrieve(query="Why did the revenue increase?",
|
||||
| filters={"years": ["2019"], "quarters": ["Q1", "Q2"]})
|
||||
```
|
||||
|
||||
<a name="sparse.ElasticsearchFilterOnlyRetriever"></a>
|
||||
@ -103,14 +165,14 @@ The checkpoint format matches huggingface transformers' model format
|
||||
**Example:**
|
||||
|
||||
```python
|
||||
# remote model from FAIR
|
||||
DensePassageRetriever(document_store=your_doc_store,
|
||||
> query_embedding_model="facebook/dpr-question_encoder-single-nq-base",
|
||||
> passage_embedding_model="facebook/dpr-ctx_encoder-single-nq-base")
|
||||
# or from local path
|
||||
DensePassageRetriever(document_store=your_doc_store,
|
||||
> query_embedding_model="model_directory/question-encoder",
|
||||
> passage_embedding_model="model_directory/context-encoder")
|
||||
| # remote model from FAIR
|
||||
| DensePassageRetriever(document_store=your_doc_store,
|
||||
| query_embedding_model="facebook/dpr-question_encoder-single-nq-base",
|
||||
| passage_embedding_model="facebook/dpr-ctx_encoder-single-nq-base")
|
||||
| # or from local path
|
||||
| DensePassageRetriever(document_store=your_doc_store,
|
||||
| query_embedding_model="model_directory/question-encoder",
|
||||
| passage_embedding_model="model_directory/context-encoder")
|
||||
```
|
||||
|
||||
**Arguments**:
|
||||
|
||||
@ -34,27 +34,30 @@ class RAGenerator(BaseGenerator):
|
||||
**Example**
|
||||
|
||||
```python
|
||||
> question = "who got the first nobel prize in physics?"
|
||||
|
||||
# Retrieve related documents from retriever
|
||||
> retrieved_docs = retriever.retrieve(query=question)
|
||||
|
||||
> # Now generate answer from question and retrieved documents
|
||||
> generator.predict(
|
||||
> question=question,
|
||||
> documents=retrieved_docs,
|
||||
> top_k=1
|
||||
> )
|
||||
{'question': 'who got the first nobel prize in physics',
|
||||
'answers':
|
||||
[{'question': 'who got the first nobel prize in physics',
|
||||
'answer': ' albert einstein',
|
||||
'meta': { 'doc_ids': [...],
|
||||
'doc_scores': [80.42758 ...],
|
||||
'doc_probabilities': [40.71379089355469, ...
|
||||
'texts': ['Albert Einstein was a ...]
|
||||
'titles': ['"Albert Einstein"', ...]
|
||||
}}]}
|
||||
| question = "who got the first nobel prize in physics?"
|
||||
|
|
||||
| # Retrieve related documents from retriever
|
||||
| retrieved_docs = retriever.retrieve(query=question)
|
||||
|
|
||||
| # Now generate answer from question and retrieved documents
|
||||
| generator.predict(
|
||||
| question=question,
|
||||
| documents=retrieved_docs,
|
||||
| top_k=1
|
||||
| )
|
||||
|
|
||||
| # Answer
|
||||
|
|
||||
| {'question': 'who got the first nobel prize in physics',
|
||||
| 'answers':
|
||||
| [{'question': 'who got the first nobel prize in physics',
|
||||
| 'answer': ' albert einstein',
|
||||
| 'meta': { 'doc_ids': [...],
|
||||
| 'doc_scores': [80.42758 ...],
|
||||
| 'doc_probabilities': [40.71379089355469, ...
|
||||
| 'texts': ['Albert Einstein was a ...]
|
||||
| 'titles': ['"Albert Einstein"', ...]
|
||||
| }}]}
|
||||
```
|
||||
"""
|
||||
|
||||
@ -191,16 +194,16 @@ class RAGenerator(BaseGenerator):
|
||||
:return: Generated answers plus additional infos in a dict like this:
|
||||
|
||||
```python
|
||||
> {'question': 'who got the first nobel prize in physics',
|
||||
> 'answers':
|
||||
> [{'question': 'who got the first nobel prize in physics',
|
||||
> 'answer': ' albert einstein',
|
||||
> 'meta': { 'doc_ids': [...],
|
||||
> 'doc_scores': [80.42758 ...],
|
||||
> 'doc_probabilities': [40.71379089355469, ...
|
||||
> 'texts': ['Albert Einstein was a ...]
|
||||
> 'titles': ['"Albert Einstein"', ...]
|
||||
> }}]}
|
||||
| {'question': 'who got the first nobel prize in physics',
|
||||
| 'answers':
|
||||
| [{'question': 'who got the first nobel prize in physics',
|
||||
| 'answer': ' albert einstein',
|
||||
| 'meta': { 'doc_ids': [...],
|
||||
| 'doc_scores': [80.42758 ...],
|
||||
| 'doc_probabilities': [40.71379089355469, ...
|
||||
| 'texts': ['Albert Einstein was a ...]
|
||||
| 'titles': ['"Albert Einstein"', ...]
|
||||
| }}]}
|
||||
```
|
||||
"""
|
||||
if len(documents) == 0:
|
||||
|
||||
@ -308,20 +308,21 @@ class FARMReader(BaseReader):
|
||||
|
||||
Returns dictionaries containing answers sorted by (desc.) probability.
|
||||
Example:
|
||||
|
||||
{'question': 'Who is the father of Arya Stark?',
|
||||
'answers': [
|
||||
{'answer': 'Eddard,',
|
||||
'context': " She travels with her father, Eddard, to King's Landing when he is ",
|
||||
'offset_answer_start': 147,
|
||||
'offset_answer_end': 154,
|
||||
'probability': 0.9787139466668613,
|
||||
'score': None,
|
||||
'document_id': '1337'
|
||||
},
|
||||
...
|
||||
]
|
||||
}
|
||||
```python
|
||||
|{
|
||||
| 'question': 'Who is the father of Arya Stark?',
|
||||
| 'answers':[
|
||||
| {'answer': 'Eddard,',
|
||||
| 'context': " She travels with her father, Eddard, to King's Landing when he is ",
|
||||
| 'offset_answer_start': 147,
|
||||
| 'offset_answer_end': 154,
|
||||
| 'probability': 0.9787139466668613,
|
||||
| 'score': None,
|
||||
| 'document_id': '1337'
|
||||
| },...
|
||||
| ]
|
||||
|}
|
||||
```
|
||||
|
||||
:param question: Question string
|
||||
:param documents: List of Document in which to search for the answer
|
||||
@ -571,20 +572,21 @@ class FARMReader(BaseReader):
|
||||
Use loaded QA model to find answers for a question in the supplied list of Document.
|
||||
Returns dictionaries containing answers sorted by (desc.) probability.
|
||||
Example:
|
||||
|
||||
{
|
||||
'question': 'Who is the father of Arya Stark?',
|
||||
'answers':[
|
||||
{'answer': 'Eddard,',
|
||||
'context': " She travels with her father, Eddard, to King's Landing when he is ",
|
||||
'offset_answer_start': 147,
|
||||
'offset_answer_end': 154,
|
||||
'probability': 0.9787139466668613,
|
||||
'score': None,
|
||||
'document_id': '1337'
|
||||
},...
|
||||
]
|
||||
}
|
||||
```python
|
||||
|{
|
||||
| 'question': 'Who is the father of Arya Stark?',
|
||||
| 'answers':[
|
||||
| {'answer': 'Eddard,',
|
||||
| 'context': " She travels with her father, Eddard, to King's Landing when he is ",
|
||||
| 'offset_answer_start': 147,
|
||||
| 'offset_answer_end': 154,
|
||||
| 'probability': 0.9787139466668613,
|
||||
| 'score': None,
|
||||
| 'document_id': '1337'
|
||||
| },...
|
||||
| ]
|
||||
|}
|
||||
```
|
||||
|
||||
:param question: Question string
|
||||
:param documents: List of documents as string type
|
||||
|
||||
@ -72,19 +72,21 @@ class TransformersReader(BaseReader):
|
||||
Returns dictionaries containing answers sorted by (desc.) probability.
|
||||
Example:
|
||||
|
||||
{'question': 'Who is the father of Arya Stark?',
|
||||
'answers': [
|
||||
{'answer': 'Eddard,',
|
||||
'context': " She travels with her father, Eddard, to King's Landing when he is ",
|
||||
'offset_answer_start': 147,
|
||||
'offset_answer_end': 154,
|
||||
'probability': 0.9787139466668613,
|
||||
'score': None,
|
||||
'document_id': '1337'
|
||||
},
|
||||
...
|
||||
]
|
||||
}
|
||||
```python
|
||||
|{
|
||||
| 'question': 'Who is the father of Arya Stark?',
|
||||
| 'answers':[
|
||||
| {'answer': 'Eddard,',
|
||||
| 'context': " She travels with her father, Eddard, to King's Landing when he is ",
|
||||
| 'offset_answer_start': 147,
|
||||
| 'offset_answer_end': 154,
|
||||
| 'probability': 0.9787139466668613,
|
||||
| 'score': None,
|
||||
| 'document_id': '1337'
|
||||
| },...
|
||||
| ]
|
||||
|}
|
||||
```
|
||||
|
||||
:param question: Question string
|
||||
:param documents: List of Document in which to search for the answer
|
||||
|
||||
@ -52,14 +52,14 @@ class DensePassageRetriever(BaseRetriever):
|
||||
**Example:**
|
||||
|
||||
```python
|
||||
# remote model from FAIR
|
||||
DensePassageRetriever(document_store=your_doc_store,
|
||||
> query_embedding_model="facebook/dpr-question_encoder-single-nq-base",
|
||||
> passage_embedding_model="facebook/dpr-ctx_encoder-single-nq-base")
|
||||
# or from local path
|
||||
DensePassageRetriever(document_store=your_doc_store,
|
||||
> query_embedding_model="model_directory/question-encoder",
|
||||
> passage_embedding_model="model_directory/context-encoder")
|
||||
| # remote model from FAIR
|
||||
| DensePassageRetriever(document_store=your_doc_store,
|
||||
| query_embedding_model="facebook/dpr-question_encoder-single-nq-base",
|
||||
| passage_embedding_model="facebook/dpr-ctx_encoder-single-nq-base")
|
||||
| # or from local path
|
||||
| DensePassageRetriever(document_store=your_doc_store,
|
||||
| query_embedding_model="model_directory/question-encoder",
|
||||
| passage_embedding_model="model_directory/context-encoder")
|
||||
```
|
||||
|
||||
:param document_store: An instance of DocumentStore from which to retrieve documents.
|
||||
@ -150,6 +150,8 @@ class DensePassageRetriever(BaseRetriever):
|
||||
"external_id": '19930582'}, ...]
|
||||
:return: dictionary of embeddings for "passages" and "query"
|
||||
"""
|
||||
|
||||
|
||||
dataset, tensor_names, baskets = self.processor.dataset_from_dicts(
|
||||
dicts, indices=[i for i in range(len(dicts))], return_baskets=True
|
||||
)
|
||||
|
||||
@ -27,28 +27,28 @@ class ElasticsearchRetriever(BaseRetriever):
|
||||
|
||||
**An example custom_query:**
|
||||
```python
|
||||
{
|
||||
> "size": 10,
|
||||
> "query": {
|
||||
> "bool": {
|
||||
> "should": [{"multi_match": {
|
||||
> "query": "${question}", // mandatory $question placeholder
|
||||
> "type": "most_fields",
|
||||
> "fields": ["text", "title"]}}],
|
||||
> "filter": [ // optional custom filters
|
||||
> {"terms": {"year": "${years}"}},
|
||||
> {"terms": {"quarter": "${quarters}"}},
|
||||
> {"range": {"date": {"gte": "${date}"}}}
|
||||
> ],
|
||||
> }
|
||||
> },
|
||||
}
|
||||
| {
|
||||
| "size": 10,
|
||||
| "query": {
|
||||
| "bool": {
|
||||
| "should": [{"multi_match": {
|
||||
| "query": "${question}", // mandatory $question placeholder
|
||||
| "type": "most_fields",
|
||||
| "fields": ["text", "title"]}}],
|
||||
| "filter": [ // optional custom filters
|
||||
| {"terms": {"year": "${years}"}},
|
||||
| {"terms": {"quarter": "${quarters}"}},
|
||||
| {"range": {"date": {"gte": "${date}"}}}
|
||||
| ],
|
||||
| }
|
||||
| },
|
||||
| }
|
||||
```
|
||||
|
||||
**For this custom_query, a sample retrieve() could be:**
|
||||
```python
|
||||
self.retrieve(query="Why did the revenue increase?",
|
||||
> filters={"years": ["2019"], "quarters": ["Q1", "Q2"]})
|
||||
| self.retrieve(query="Why did the revenue increase?",
|
||||
| filters={"years": ["2019"], "quarters": ["Q1", "Q2"]})
|
||||
```
|
||||
"""
|
||||
self.document_store: ElasticsearchDocumentStore = document_store
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user