mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-11-16 10:03:44 +00:00
Fix docstring examples (#604)
* Fix docstring examples * Unify code example format * Add md files
This commit is contained in:
parent
3dee284f20
commit
ae530c3a41
@ -62,6 +62,7 @@ more performant with DPR embeddings. 'cosine' is recommended if you are using a
|
|||||||
#### write\_documents
|
#### write\_documents
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
| @abstractmethod
|
||||||
| write_documents(documents: Union[List[dict], List[Document]], index: Optional[str] = None)
|
| write_documents(documents: Union[List[dict], List[Document]], index: Optional[str] = None)
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -277,7 +278,7 @@ Indexes documents for later queries.
|
|||||||
- `documents`: a list of Python dictionaries or a list of Haystack Document objects.
|
- `documents`: a list of Python dictionaries or a list of Haystack Document objects.
|
||||||
For documents as dictionaries, the format is {"text": "<the-actual-text>"}.
|
For documents as dictionaries, the format is {"text": "<the-actual-text>"}.
|
||||||
Optionally: Include meta data via {"text": "<the-actual-text>",
|
Optionally: Include meta data via {"text": "<the-actual-text>",
|
||||||
"meta":{"name": "<some-document-name>, "author": "somebody", ...}}
|
"meta": {"name": "<some-document-name>, "author": "somebody", ...}}
|
||||||
It can be used for filtering and is accessible in the responses of the Finder.
|
It can be used for filtering and is accessible in the responses of the Finder.
|
||||||
- `index`: add an optional index attribute to documents. It can be later used for filtering. For instance,
|
- `index`: add an optional index attribute to documents. It can be later used for filtering. For instance,
|
||||||
documents for evaluation can be indexed in a separate index than the documents for search.
|
documents for evaluation can be indexed in a separate index than the documents for search.
|
||||||
|
|||||||
@ -5,14 +5,16 @@
|
|||||||
## TextConverter Objects
|
## TextConverter Objects
|
||||||
|
|
||||||
```python
|
```python
|
||||||
class TextConverter(BaseConverter)
|
class BaseConverter()
|
||||||
```
|
```
|
||||||
|
|
||||||
<a name="txt.TextConverter.__init__"></a>
|
Base class for implementing file converts to transform input documents to text format for ingestion in DocumentStore.
|
||||||
|
|
||||||
|
<a name="base.BaseConverter.__init__"></a>
|
||||||
#### \_\_init\_\_
|
#### \_\_init\_\_
|
||||||
|
|
||||||
```python
|
```python
|
||||||
| __init__(remove_numeric_tables: Optional[bool] = False, valid_languages: Optional[List[str]] = None)
|
| __init__(remove_numeric_tables: Optional[bool] = None, valid_languages: Optional[List[str]] = None)
|
||||||
```
|
```
|
||||||
|
|
||||||
**Arguments**:
|
**Arguments**:
|
||||||
@ -28,24 +30,57 @@ This option can be used to add test for encoding errors. If the extracted text i
|
|||||||
not one of the valid languages, then it might likely be encoding error resulting
|
not one of the valid languages, then it might likely be encoding error resulting
|
||||||
in garbled text.
|
in garbled text.
|
||||||
|
|
||||||
<a name="txt.TextConverter.convert"></a>
|
<a name="base.BaseConverter.convert"></a>
|
||||||
#### convert
|
#### convert
|
||||||
|
|
||||||
```python
|
```python
|
||||||
| convert(file_path: Path, meta: Optional[Dict[str, str]] = None, encoding: str = "utf-8") -> Dict[str, Any]
|
| @abstractmethod
|
||||||
|
| convert(file_path: Path, meta: Optional[Dict[str, str]]) -> Dict[str, Any]
|
||||||
```
|
```
|
||||||
|
|
||||||
Reads text from a txt file and executes optional preprocessing steps.
|
Convert a file to a dictionary containing the text and any associated meta data.
|
||||||
|
|
||||||
|
File converters may extract file meta like name or size. In addition to it, user
|
||||||
|
supplied meta data like author, url, external IDs can be supplied as a dictionary.
|
||||||
|
|
||||||
**Arguments**:
|
**Arguments**:
|
||||||
|
|
||||||
- `file_path`: Path of the file to convert
|
- `file_path`: path of the file to convert
|
||||||
- `meta`: Optional meta data that should be associated with the the document (e.g. name)
|
- `meta`: dictionary of meta data key-value pairs to append in the returned document.
|
||||||
- `encoding`: Encoding of the file
|
|
||||||
|
|
||||||
**Returns**:
|
<a name="base.BaseConverter.validate_language"></a>
|
||||||
|
#### validate\_language
|
||||||
|
|
||||||
Dict of format {"text": "The text from file", "meta": meta}}
|
```python
|
||||||
|
| validate_language(text: str) -> bool
|
||||||
|
```
|
||||||
|
|
||||||
|
Validate if the language of the text is one of valid languages.
|
||||||
|
|
||||||
|
<a name="docx"></a>
|
||||||
|
# docx
|
||||||
|
|
||||||
|
<a name="docx.DocxToTextConverter"></a>
|
||||||
|
## DocxToTextConverter
|
||||||
|
|
||||||
|
```python
|
||||||
|
class DocxToTextConverter(BaseConverter)
|
||||||
|
```
|
||||||
|
|
||||||
|
<a name="docx.DocxToTextConverter.convert"></a>
|
||||||
|
#### convert
|
||||||
|
|
||||||
|
```python
|
||||||
|
| convert(file_path: Path, meta: Optional[Dict[str, str]] = None) -> Dict[str, Any]
|
||||||
|
```
|
||||||
|
|
||||||
|
Extract text from a .docx file.
|
||||||
|
Note: As docx doesn't contain "page" information, we actually extract and return a list of paragraphs here.
|
||||||
|
For compliance with other converters we nevertheless opted for keeping the methods name.
|
||||||
|
|
||||||
|
**Arguments**:
|
||||||
|
|
||||||
|
- `file_path`: Path to the .docx file you want to convert
|
||||||
|
|
||||||
<a name="docx"></a>
|
<a name="docx"></a>
|
||||||
# Module docx
|
# Module docx
|
||||||
@ -125,16 +160,14 @@ a list of pages and the extracted meta data of the file.
|
|||||||
## BaseConverter Objects
|
## BaseConverter Objects
|
||||||
|
|
||||||
```python
|
```python
|
||||||
class BaseConverter()
|
class TextConverter(BaseConverter)
|
||||||
```
|
```
|
||||||
|
|
||||||
Base class for implementing file converts to transform input documents to text format for ingestion in DocumentStore.
|
<a name="txt.TextConverter.__init__"></a>
|
||||||
|
|
||||||
<a name="base.BaseConverter.__init__"></a>
|
|
||||||
#### \_\_init\_\_
|
#### \_\_init\_\_
|
||||||
|
|
||||||
```python
|
```python
|
||||||
| __init__(remove_numeric_tables: Optional[bool] = None, valid_languages: Optional[List[str]] = None)
|
| __init__(remove_numeric_tables: Optional[bool] = False, valid_languages: Optional[List[str]] = None)
|
||||||
```
|
```
|
||||||
|
|
||||||
**Arguments**:
|
**Arguments**:
|
||||||
@ -150,32 +183,24 @@ This option can be used to add test for encoding errors. If the extracted text i
|
|||||||
not one of the valid languages, then it might likely be encoding error resulting
|
not one of the valid languages, then it might likely be encoding error resulting
|
||||||
in garbled text.
|
in garbled text.
|
||||||
|
|
||||||
<a name="base.BaseConverter.convert"></a>
|
<a name="txt.TextConverter.convert"></a>
|
||||||
#### convert
|
#### convert
|
||||||
|
|
||||||
```python
|
```python
|
||||||
| @abstractmethod
|
| convert(file_path: Path, meta: Optional[Dict[str, str]] = None, encoding: str = "utf-8") -> Dict[str, Any]
|
||||||
| convert(file_path: Path, meta: Optional[Dict[str, str]]) -> Dict[str, Any]
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Convert a file to a dictionary containing the text and any associated meta data.
|
Reads text from a txt file and executes optional preprocessing steps.
|
||||||
|
|
||||||
File converters may extract file meta like name or size. In addition to it, user
|
|
||||||
supplied meta data like author, url, external IDs can be supplied as a dictionary.
|
|
||||||
|
|
||||||
**Arguments**:
|
**Arguments**:
|
||||||
|
|
||||||
- `file_path`: path of the file to convert
|
- `file_path`: Path of the file to convert
|
||||||
- `meta`: dictionary of meta data key-value pairs to append in the returned document.
|
- `meta`: Optional meta data that should be associated with the the document (e.g. name)
|
||||||
|
- `encoding`: Encoding of the file
|
||||||
|
|
||||||
<a name="base.BaseConverter.validate_language"></a>
|
**Returns**:
|
||||||
#### validate\_language
|
|
||||||
|
|
||||||
```python
|
Dict of format {"text": "The text from file", "meta": meta}}
|
||||||
| validate_language(text: str) -> bool
|
|
||||||
```
|
|
||||||
|
|
||||||
Validate if the language of the text is one of valid languages.
|
|
||||||
|
|
||||||
<a name="pdf"></a>
|
<a name="pdf"></a>
|
||||||
# Module pdf
|
# Module pdf
|
||||||
|
|||||||
@ -1,3 +1,6 @@
|
|||||||
|
<a name="base"></a>
|
||||||
|
# base
|
||||||
|
|
||||||
<a name="farm"></a>
|
<a name="farm"></a>
|
||||||
# Module farm
|
# Module farm
|
||||||
|
|
||||||
@ -146,20 +149,21 @@ Use loaded QA model to find answers for a question in the supplied list of Docum
|
|||||||
|
|
||||||
Returns dictionaries containing answers sorted by (desc.) probability.
|
Returns dictionaries containing answers sorted by (desc.) probability.
|
||||||
Example:
|
Example:
|
||||||
|
```python
|
||||||
{'question': 'Who is the father of Arya Stark?',
|
|{
|
||||||
'answers': [
|
| 'question': 'Who is the father of Arya Stark?',
|
||||||
{'answer': 'Eddard,',
|
| 'answers':[
|
||||||
'context': " She travels with her father, Eddard, to King's Landing when he is ",
|
| {'answer': 'Eddard,',
|
||||||
'offset_answer_start': 147,
|
| 'context': " She travels with her father, Eddard, to King's Landing when he is ",
|
||||||
'offset_answer_end': 154,
|
| 'offset_answer_start': 147,
|
||||||
'probability': 0.9787139466668613,
|
| 'offset_answer_end': 154,
|
||||||
'score': None,
|
| 'probability': 0.9787139466668613,
|
||||||
'document_id': '1337'
|
| 'score': None,
|
||||||
},
|
| 'document_id': '1337'
|
||||||
...
|
| },...
|
||||||
]
|
| ]
|
||||||
}
|
|}
|
||||||
|
```
|
||||||
|
|
||||||
**Arguments**:
|
**Arguments**:
|
||||||
|
|
||||||
@ -223,20 +227,21 @@ Returns a dict containing the following metrics:
|
|||||||
Use loaded QA model to find answers for a question in the supplied list of Document.
|
Use loaded QA model to find answers for a question in the supplied list of Document.
|
||||||
Returns dictionaries containing answers sorted by (desc.) probability.
|
Returns dictionaries containing answers sorted by (desc.) probability.
|
||||||
Example:
|
Example:
|
||||||
|
```python
|
||||||
{
|
|{
|
||||||
'question': 'Who is the father of Arya Stark?',
|
| 'question': 'Who is the father of Arya Stark?',
|
||||||
'answers':[
|
| 'answers':[
|
||||||
{'answer': 'Eddard,',
|
| {'answer': 'Eddard,',
|
||||||
'context': " She travels with her father, Eddard, to King's Landing when he is ",
|
| 'context': " She travels with her father, Eddard, to King's Landing when he is ",
|
||||||
'offset_answer_start': 147,
|
| 'offset_answer_start': 147,
|
||||||
'offset_answer_end': 154,
|
| 'offset_answer_end': 154,
|
||||||
'probability': 0.9787139466668613,
|
| 'probability': 0.9787139466668613,
|
||||||
'score': None,
|
| 'score': None,
|
||||||
'document_id': '1337'
|
| 'document_id': '1337'
|
||||||
},...
|
| },...
|
||||||
]
|
| ]
|
||||||
}
|
|}
|
||||||
|
```
|
||||||
|
|
||||||
**Arguments**:
|
**Arguments**:
|
||||||
|
|
||||||
@ -343,19 +348,21 @@ Use loaded QA model to find answers for a question in the supplied list of Docum
|
|||||||
Returns dictionaries containing answers sorted by (desc.) probability.
|
Returns dictionaries containing answers sorted by (desc.) probability.
|
||||||
Example:
|
Example:
|
||||||
|
|
||||||
{'question': 'Who is the father of Arya Stark?',
|
```python
|
||||||
'answers': [
|
|{
|
||||||
{'answer': 'Eddard,',
|
| 'question': 'Who is the father of Arya Stark?',
|
||||||
'context': " She travels with her father, Eddard, to King's Landing when he is ",
|
| 'answers':[
|
||||||
'offset_answer_start': 147,
|
| {'answer': 'Eddard,',
|
||||||
'offset_answer_end': 154,
|
| 'context': " She travels with her father, Eddard, to King's Landing when he is ",
|
||||||
'probability': 0.9787139466668613,
|
| 'offset_answer_start': 147,
|
||||||
'score': None,
|
| 'offset_answer_end': 154,
|
||||||
'document_id': '1337'
|
| 'probability': 0.9787139466668613,
|
||||||
},
|
| 'score': None,
|
||||||
...
|
| 'document_id': '1337'
|
||||||
]
|
| },...
|
||||||
}
|
| ]
|
||||||
|
|}
|
||||||
|
```
|
||||||
|
|
||||||
**Arguments**:
|
**Arguments**:
|
||||||
|
|
||||||
|
|||||||
@ -1,3 +1,65 @@
|
|||||||
|
<a name="base"></a>
|
||||||
|
# base
|
||||||
|
|
||||||
|
<a name="base.BaseRetriever"></a>
|
||||||
|
## BaseRetriever
|
||||||
|
|
||||||
|
```python
|
||||||
|
class BaseRetriever(ABC)
|
||||||
|
```
|
||||||
|
|
||||||
|
<a name="base.BaseRetriever.retrieve"></a>
|
||||||
|
#### retrieve
|
||||||
|
|
||||||
|
```python
|
||||||
|
| @abstractmethod
|
||||||
|
| retrieve(query: str, filters: dict = None, top_k: int = 10, index: str = None) -> List[Document]
|
||||||
|
```
|
||||||
|
|
||||||
|
Scan through documents in DocumentStore and return a small number documents
|
||||||
|
that are most relevant to the query.
|
||||||
|
|
||||||
|
**Arguments**:
|
||||||
|
|
||||||
|
- `query`: The query
|
||||||
|
- `filters`: A dictionary where the keys specify a metadata field and the value is a list of accepted values for that field
|
||||||
|
- `top_k`: How many documents to return per query.
|
||||||
|
- `index`: The name of the index in the DocumentStore from which to retrieve documents
|
||||||
|
|
||||||
|
<a name="base.BaseRetriever.eval"></a>
|
||||||
|
#### eval
|
||||||
|
|
||||||
|
```python
|
||||||
|
| eval(label_index: str = "label", doc_index: str = "eval_document", label_origin: str = "gold_label", top_k: int = 10, open_domain: bool = False, return_preds: bool = False) -> dict
|
||||||
|
```
|
||||||
|
|
||||||
|
Performs evaluation on the Retriever.
|
||||||
|
Retriever is evaluated based on whether it finds the correct document given the question string and at which
|
||||||
|
position in the ranking of documents the correct document is.
|
||||||
|
|
||||||
|
| Returns a dict containing the following metrics:
|
||||||
|
|
||||||
|
- "recall": Proportion of questions for which correct document is among retrieved documents
|
||||||
|
- "mrr": Mean of reciprocal rank. Rewards retrievers that give relevant documents a higher rank.
|
||||||
|
Only considers the highest ranked relevant document.
|
||||||
|
- "map": Mean of average precision for each question. Rewards retrievers that give relevant
|
||||||
|
documents a higher rank. Considers all retrieved relevant documents. If ``open_domain=True``,
|
||||||
|
average precision is normalized by the number of retrieved relevant documents per query.
|
||||||
|
If ``open_domain=False``, average precision is normalized by the number of all relevant documents
|
||||||
|
per query.
|
||||||
|
|
||||||
|
**Arguments**:
|
||||||
|
|
||||||
|
- `label_index`: Index/Table in DocumentStore where labeled questions are stored
|
||||||
|
- `doc_index`: Index/Table in DocumentStore where documents that are used for evaluation are stored
|
||||||
|
- `top_k`: How many documents to return per question
|
||||||
|
- `open_domain`: If ``True``, retrieval will be evaluated by checking if the answer string to a question is
|
||||||
|
contained in the retrieved docs (common approach in open-domain QA).
|
||||||
|
If ``False``, retrieval uses a stricter evaluation that checks if the retrieved document ids
|
||||||
|
are within ids explicitly stated in the labels.
|
||||||
|
- `return_preds`: Whether to add predictions in the returned dictionary. If True, the returned dictionary
|
||||||
|
contains the keys "predictions" and "metrics".
|
||||||
|
|
||||||
<a name="sparse"></a>
|
<a name="sparse"></a>
|
||||||
# Module sparse
|
# Module sparse
|
||||||
|
|
||||||
@ -27,28 +89,28 @@ names must match with the filters dict supplied in self.retrieve().
|
|||||||
|
|
||||||
**An example custom_query:**
|
**An example custom_query:**
|
||||||
```python
|
```python
|
||||||
{
|
| {
|
||||||
> "size": 10,
|
| "size": 10,
|
||||||
> "query": {
|
| "query": {
|
||||||
> "bool": {
|
| "bool": {
|
||||||
> "should": [{"multi_match": {
|
| "should": [{"multi_match": {
|
||||||
> "query": "${question}", // mandatory $question placeholder
|
| "query": "${question}", // mandatory $question placeholder
|
||||||
> "type": "most_fields",
|
| "type": "most_fields",
|
||||||
> "fields": ["text", "title"]}}],
|
| "fields": ["text", "title"]}}],
|
||||||
> "filter": [ // optional custom filters
|
| "filter": [ // optional custom filters
|
||||||
> {"terms": {"year": "${years}"}},
|
| {"terms": {"year": "${years}"}},
|
||||||
> {"terms": {"quarter": "${quarters}"}},
|
| {"terms": {"quarter": "${quarters}"}},
|
||||||
> {"range": {"date": {"gte": "${date}"}}}
|
| {"range": {"date": {"gte": "${date}"}}}
|
||||||
> ],
|
| ],
|
||||||
> }
|
| }
|
||||||
> },
|
| },
|
||||||
}
|
| }
|
||||||
```
|
```
|
||||||
|
|
||||||
**For this custom_query, a sample retrieve() could be:**
|
**For this custom_query, a sample retrieve() could be:**
|
||||||
```python
|
```python
|
||||||
self.retrieve(query="Why did the revenue increase?",
|
| self.retrieve(query="Why did the revenue increase?",
|
||||||
> filters={"years": ["2019"], "quarters": ["Q1", "Q2"]})
|
| filters={"years": ["2019"], "quarters": ["Q1", "Q2"]})
|
||||||
```
|
```
|
||||||
|
|
||||||
<a name="sparse.ElasticsearchFilterOnlyRetriever"></a>
|
<a name="sparse.ElasticsearchFilterOnlyRetriever"></a>
|
||||||
@ -103,14 +165,14 @@ The checkpoint format matches huggingface transformers' model format
|
|||||||
**Example:**
|
**Example:**
|
||||||
|
|
||||||
```python
|
```python
|
||||||
# remote model from FAIR
|
| # remote model from FAIR
|
||||||
DensePassageRetriever(document_store=your_doc_store,
|
| DensePassageRetriever(document_store=your_doc_store,
|
||||||
> query_embedding_model="facebook/dpr-question_encoder-single-nq-base",
|
| query_embedding_model="facebook/dpr-question_encoder-single-nq-base",
|
||||||
> passage_embedding_model="facebook/dpr-ctx_encoder-single-nq-base")
|
| passage_embedding_model="facebook/dpr-ctx_encoder-single-nq-base")
|
||||||
# or from local path
|
| # or from local path
|
||||||
DensePassageRetriever(document_store=your_doc_store,
|
| DensePassageRetriever(document_store=your_doc_store,
|
||||||
> query_embedding_model="model_directory/question-encoder",
|
| query_embedding_model="model_directory/question-encoder",
|
||||||
> passage_embedding_model="model_directory/context-encoder")
|
| passage_embedding_model="model_directory/context-encoder")
|
||||||
```
|
```
|
||||||
|
|
||||||
**Arguments**:
|
**Arguments**:
|
||||||
|
|||||||
@ -34,27 +34,30 @@ class RAGenerator(BaseGenerator):
|
|||||||
**Example**
|
**Example**
|
||||||
|
|
||||||
```python
|
```python
|
||||||
> question = "who got the first nobel prize in physics?"
|
| question = "who got the first nobel prize in physics?"
|
||||||
|
|
|
||||||
# Retrieve related documents from retriever
|
| # Retrieve related documents from retriever
|
||||||
> retrieved_docs = retriever.retrieve(query=question)
|
| retrieved_docs = retriever.retrieve(query=question)
|
||||||
|
|
|
||||||
> # Now generate answer from question and retrieved documents
|
| # Now generate answer from question and retrieved documents
|
||||||
> generator.predict(
|
| generator.predict(
|
||||||
> question=question,
|
| question=question,
|
||||||
> documents=retrieved_docs,
|
| documents=retrieved_docs,
|
||||||
> top_k=1
|
| top_k=1
|
||||||
> )
|
| )
|
||||||
{'question': 'who got the first nobel prize in physics',
|
|
|
||||||
'answers':
|
| # Answer
|
||||||
[{'question': 'who got the first nobel prize in physics',
|
|
|
||||||
'answer': ' albert einstein',
|
| {'question': 'who got the first nobel prize in physics',
|
||||||
'meta': { 'doc_ids': [...],
|
| 'answers':
|
||||||
'doc_scores': [80.42758 ...],
|
| [{'question': 'who got the first nobel prize in physics',
|
||||||
'doc_probabilities': [40.71379089355469, ...
|
| 'answer': ' albert einstein',
|
||||||
'texts': ['Albert Einstein was a ...]
|
| 'meta': { 'doc_ids': [...],
|
||||||
'titles': ['"Albert Einstein"', ...]
|
| 'doc_scores': [80.42758 ...],
|
||||||
}}]}
|
| 'doc_probabilities': [40.71379089355469, ...
|
||||||
|
| 'texts': ['Albert Einstein was a ...]
|
||||||
|
| 'titles': ['"Albert Einstein"', ...]
|
||||||
|
| }}]}
|
||||||
```
|
```
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@ -191,16 +194,16 @@ class RAGenerator(BaseGenerator):
|
|||||||
:return: Generated answers plus additional infos in a dict like this:
|
:return: Generated answers plus additional infos in a dict like this:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
> {'question': 'who got the first nobel prize in physics',
|
| {'question': 'who got the first nobel prize in physics',
|
||||||
> 'answers':
|
| 'answers':
|
||||||
> [{'question': 'who got the first nobel prize in physics',
|
| [{'question': 'who got the first nobel prize in physics',
|
||||||
> 'answer': ' albert einstein',
|
| 'answer': ' albert einstein',
|
||||||
> 'meta': { 'doc_ids': [...],
|
| 'meta': { 'doc_ids': [...],
|
||||||
> 'doc_scores': [80.42758 ...],
|
| 'doc_scores': [80.42758 ...],
|
||||||
> 'doc_probabilities': [40.71379089355469, ...
|
| 'doc_probabilities': [40.71379089355469, ...
|
||||||
> 'texts': ['Albert Einstein was a ...]
|
| 'texts': ['Albert Einstein was a ...]
|
||||||
> 'titles': ['"Albert Einstein"', ...]
|
| 'titles': ['"Albert Einstein"', ...]
|
||||||
> }}]}
|
| }}]}
|
||||||
```
|
```
|
||||||
"""
|
"""
|
||||||
if len(documents) == 0:
|
if len(documents) == 0:
|
||||||
|
|||||||
@ -308,20 +308,21 @@ class FARMReader(BaseReader):
|
|||||||
|
|
||||||
Returns dictionaries containing answers sorted by (desc.) probability.
|
Returns dictionaries containing answers sorted by (desc.) probability.
|
||||||
Example:
|
Example:
|
||||||
|
```python
|
||||||
{'question': 'Who is the father of Arya Stark?',
|
|{
|
||||||
'answers': [
|
| 'question': 'Who is the father of Arya Stark?',
|
||||||
{'answer': 'Eddard,',
|
| 'answers':[
|
||||||
'context': " She travels with her father, Eddard, to King's Landing when he is ",
|
| {'answer': 'Eddard,',
|
||||||
'offset_answer_start': 147,
|
| 'context': " She travels with her father, Eddard, to King's Landing when he is ",
|
||||||
'offset_answer_end': 154,
|
| 'offset_answer_start': 147,
|
||||||
'probability': 0.9787139466668613,
|
| 'offset_answer_end': 154,
|
||||||
'score': None,
|
| 'probability': 0.9787139466668613,
|
||||||
'document_id': '1337'
|
| 'score': None,
|
||||||
},
|
| 'document_id': '1337'
|
||||||
...
|
| },...
|
||||||
]
|
| ]
|
||||||
}
|
|}
|
||||||
|
```
|
||||||
|
|
||||||
:param question: Question string
|
:param question: Question string
|
||||||
:param documents: List of Document in which to search for the answer
|
:param documents: List of Document in which to search for the answer
|
||||||
@ -571,20 +572,21 @@ class FARMReader(BaseReader):
|
|||||||
Use loaded QA model to find answers for a question in the supplied list of Document.
|
Use loaded QA model to find answers for a question in the supplied list of Document.
|
||||||
Returns dictionaries containing answers sorted by (desc.) probability.
|
Returns dictionaries containing answers sorted by (desc.) probability.
|
||||||
Example:
|
Example:
|
||||||
|
```python
|
||||||
{
|
|{
|
||||||
'question': 'Who is the father of Arya Stark?',
|
| 'question': 'Who is the father of Arya Stark?',
|
||||||
'answers':[
|
| 'answers':[
|
||||||
{'answer': 'Eddard,',
|
| {'answer': 'Eddard,',
|
||||||
'context': " She travels with her father, Eddard, to King's Landing when he is ",
|
| 'context': " She travels with her father, Eddard, to King's Landing when he is ",
|
||||||
'offset_answer_start': 147,
|
| 'offset_answer_start': 147,
|
||||||
'offset_answer_end': 154,
|
| 'offset_answer_end': 154,
|
||||||
'probability': 0.9787139466668613,
|
| 'probability': 0.9787139466668613,
|
||||||
'score': None,
|
| 'score': None,
|
||||||
'document_id': '1337'
|
| 'document_id': '1337'
|
||||||
},...
|
| },...
|
||||||
]
|
| ]
|
||||||
}
|
|}
|
||||||
|
```
|
||||||
|
|
||||||
:param question: Question string
|
:param question: Question string
|
||||||
:param documents: List of documents as string type
|
:param documents: List of documents as string type
|
||||||
|
|||||||
@ -72,19 +72,21 @@ class TransformersReader(BaseReader):
|
|||||||
Returns dictionaries containing answers sorted by (desc.) probability.
|
Returns dictionaries containing answers sorted by (desc.) probability.
|
||||||
Example:
|
Example:
|
||||||
|
|
||||||
{'question': 'Who is the father of Arya Stark?',
|
```python
|
||||||
'answers': [
|
|{
|
||||||
{'answer': 'Eddard,',
|
| 'question': 'Who is the father of Arya Stark?',
|
||||||
'context': " She travels with her father, Eddard, to King's Landing when he is ",
|
| 'answers':[
|
||||||
'offset_answer_start': 147,
|
| {'answer': 'Eddard,',
|
||||||
'offset_answer_end': 154,
|
| 'context': " She travels with her father, Eddard, to King's Landing when he is ",
|
||||||
'probability': 0.9787139466668613,
|
| 'offset_answer_start': 147,
|
||||||
'score': None,
|
| 'offset_answer_end': 154,
|
||||||
'document_id': '1337'
|
| 'probability': 0.9787139466668613,
|
||||||
},
|
| 'score': None,
|
||||||
...
|
| 'document_id': '1337'
|
||||||
]
|
| },...
|
||||||
}
|
| ]
|
||||||
|
|}
|
||||||
|
```
|
||||||
|
|
||||||
:param question: Question string
|
:param question: Question string
|
||||||
:param documents: List of Document in which to search for the answer
|
:param documents: List of Document in which to search for the answer
|
||||||
|
|||||||
@ -52,14 +52,14 @@ class DensePassageRetriever(BaseRetriever):
|
|||||||
**Example:**
|
**Example:**
|
||||||
|
|
||||||
```python
|
```python
|
||||||
# remote model from FAIR
|
| # remote model from FAIR
|
||||||
DensePassageRetriever(document_store=your_doc_store,
|
| DensePassageRetriever(document_store=your_doc_store,
|
||||||
> query_embedding_model="facebook/dpr-question_encoder-single-nq-base",
|
| query_embedding_model="facebook/dpr-question_encoder-single-nq-base",
|
||||||
> passage_embedding_model="facebook/dpr-ctx_encoder-single-nq-base")
|
| passage_embedding_model="facebook/dpr-ctx_encoder-single-nq-base")
|
||||||
# or from local path
|
| # or from local path
|
||||||
DensePassageRetriever(document_store=your_doc_store,
|
| DensePassageRetriever(document_store=your_doc_store,
|
||||||
> query_embedding_model="model_directory/question-encoder",
|
| query_embedding_model="model_directory/question-encoder",
|
||||||
> passage_embedding_model="model_directory/context-encoder")
|
| passage_embedding_model="model_directory/context-encoder")
|
||||||
```
|
```
|
||||||
|
|
||||||
:param document_store: An instance of DocumentStore from which to retrieve documents.
|
:param document_store: An instance of DocumentStore from which to retrieve documents.
|
||||||
@ -150,6 +150,8 @@ class DensePassageRetriever(BaseRetriever):
|
|||||||
"external_id": '19930582'}, ...]
|
"external_id": '19930582'}, ...]
|
||||||
:return: dictionary of embeddings for "passages" and "query"
|
:return: dictionary of embeddings for "passages" and "query"
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
dataset, tensor_names, baskets = self.processor.dataset_from_dicts(
|
dataset, tensor_names, baskets = self.processor.dataset_from_dicts(
|
||||||
dicts, indices=[i for i in range(len(dicts))], return_baskets=True
|
dicts, indices=[i for i in range(len(dicts))], return_baskets=True
|
||||||
)
|
)
|
||||||
|
|||||||
@ -27,28 +27,28 @@ class ElasticsearchRetriever(BaseRetriever):
|
|||||||
|
|
||||||
**An example custom_query:**
|
**An example custom_query:**
|
||||||
```python
|
```python
|
||||||
{
|
| {
|
||||||
> "size": 10,
|
| "size": 10,
|
||||||
> "query": {
|
| "query": {
|
||||||
> "bool": {
|
| "bool": {
|
||||||
> "should": [{"multi_match": {
|
| "should": [{"multi_match": {
|
||||||
> "query": "${question}", // mandatory $question placeholder
|
| "query": "${question}", // mandatory $question placeholder
|
||||||
> "type": "most_fields",
|
| "type": "most_fields",
|
||||||
> "fields": ["text", "title"]}}],
|
| "fields": ["text", "title"]}}],
|
||||||
> "filter": [ // optional custom filters
|
| "filter": [ // optional custom filters
|
||||||
> {"terms": {"year": "${years}"}},
|
| {"terms": {"year": "${years}"}},
|
||||||
> {"terms": {"quarter": "${quarters}"}},
|
| {"terms": {"quarter": "${quarters}"}},
|
||||||
> {"range": {"date": {"gte": "${date}"}}}
|
| {"range": {"date": {"gte": "${date}"}}}
|
||||||
> ],
|
| ],
|
||||||
> }
|
| }
|
||||||
> },
|
| },
|
||||||
}
|
| }
|
||||||
```
|
```
|
||||||
|
|
||||||
**For this custom_query, a sample retrieve() could be:**
|
**For this custom_query, a sample retrieve() could be:**
|
||||||
```python
|
```python
|
||||||
self.retrieve(query="Why did the revenue increase?",
|
| self.retrieve(query="Why did the revenue increase?",
|
||||||
> filters={"years": ["2019"], "quarters": ["Q1", "Q2"]})
|
| filters={"years": ["2019"], "quarters": ["Q1", "Q2"]})
|
||||||
```
|
```
|
||||||
"""
|
"""
|
||||||
self.document_store: ElasticsearchDocumentStore = document_store
|
self.document_store: ElasticsearchDocumentStore = document_store
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user