diff --git a/.github/workflows/deploy_website.yml b/.github/workflows/deploy_website.yml
index 1a40435c6..b7593a3ba 100644
--- a/.github/workflows/deploy_website.yml
+++ b/.github/workflows/deploy_website.yml
@@ -13,28 +13,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v2
-
- - name: Set up Python 3.7
- uses: actions/setup-python@v2
- with:
- python-version: 3.7
-
- - name: Install dependencies
- run: |
- python -m pip install --upgrade pip
- pip install 'pydoc-markdown>=3.0.0,<4.0.0'
- pip install mkdocs
- pip install jupytercontrib
-
- # Generates the docstrings and tutorials so that we have the latest for the deployment
- - name: Generate Docstrings and Tutorials
- run: |
- cd docs/_src/api/api/
- ./generate_docstrings.sh
- cd ../../tutorials/tutorials/
- python3 convert_ipynb.py
-
+
# Creates dispatch event for haystack-website repo
- name: Repository Dispatch
uses: peter-evans/repository-dispatch@v1
diff --git a/.github/workflows/deploy_website_staging.yml b/.github/workflows/deploy_website_staging.yml
new file mode 100644
index 000000000..8b939669b
--- /dev/null
+++ b/.github/workflows/deploy_website_staging.yml
@@ -0,0 +1,26 @@
+name: Deploy website
+
+# Controls when the action will run. Triggers the workflow on push
+# events but only for the master branch
+on:
+ push:
+ branches-ignore:
+ - master
+ - benchmarks
+
+jobs:
+ # This workflow contains a single job called "build"
+ build:
+ # The type of runner that the job will run on
+ runs-on: ubuntu-latest
+
+ steps:
+
+ # Creates dispatch event for haystack-website repo
+ - name: Repository Dispatch
+ uses: peter-evans/repository-dispatch@v1
+ with:
+ token: ${{ secrets.PUBLIC_REPO_ACCESS_TOKEN }}
+ repository: deepset-ai/haystack-website
+ event-type: deploy-website-staging
+ client-payload: '{"ref": "${{ github.ref }}"}'
diff --git a/.github/workflows/update_docs.yml b/.github/workflows/update_docs.yml
new file mode 100644
index 000000000..2fc455efe
--- /dev/null
+++ b/.github/workflows/update_docs.yml
@@ -0,0 +1,55 @@
+name: Update Docstrings and Tutorials
+
+# Controls when the action will run. Triggers the workflow on push
+# events but only for the master branch
+on:
+ push:
+ branches-ignore:
+ - master
+ - benchmarks
+
+jobs:
+ # This workflow contains a single job called "build"
+ build:
+ # The type of runner that the job will run on
+ runs-on: ubuntu-latest
+
+ steps:
+ - uses: actions/checkout@v2
+ with:
+ persist-credentials: false # otherwise, the token used is the GITHUB_TOKEN, instead of your personal token
+ fetch-depth: 0 # otherwise, you will failed to push refs to dest repo
+
+ - name: Set up Python 3.7
+ uses: actions/setup-python@v2
+ with:
+ python-version: 3.7
+
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install 'pydoc-markdown>=3.0.0,<4.0.0'
+ pip install mkdocs
+ pip install jupytercontrib
+
+ # Generates the docstrings and tutorials so that we have the latest for the deployment
+ - name: Generate Docstrings and Tutorials
+ run: |
+ cd docs/_src/api/api/
+ ./generate_docstrings.sh
+ cd ../../tutorials/tutorials/
+ python3 convert_ipynb.py
+ cd ../../../../
+ git status
+
+ - name: Commit files
+ run: |
+ git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
+ git config --local user.name "github-actions[bot]"
+ git add .
+ git commit -m "Add latest docstring and tutorial changes" -a || echo "No changes to commit"
+ - name: Push changes
+ uses: ad-m/github-push-action@master
+ with:
+ github_token: ${{ secrets.GITHUB_TOKEN }}
+ branch: ${{ github.ref }}
diff --git a/docs/_src/api/api/file_converter.md b/docs/_src/api/api/file_converter.md
index b8387da33..43c9c7e79 100644
--- a/docs/_src/api/api/file_converter.md
+++ b/docs/_src/api/api/file_converter.md
@@ -1,3 +1,62 @@
+
+# Module base
+
+
+## BaseConverter Objects
+
+```python
+class BaseConverter()
+```
+
+Base class for implementing file converts to transform input documents to text format for ingestion in DocumentStore.
+
+
+#### \_\_init\_\_
+
+```python
+ | __init__(remove_numeric_tables: Optional[bool] = None, valid_languages: Optional[List[str]] = None)
+```
+
+**Arguments**:
+
+- `remove_numeric_tables`: This option uses heuristics to remove numeric rows from the tables.
+The tabular structures in documents might be noise for the reader model if it
+does not have table parsing capability for finding answers. However, tables
+may also have long strings that could possible candidate for searching answers.
+The rows containing strings are thus retained in this option.
+- `valid_languages`: validate languages from a list of languages specified in the ISO 639-1
+(https://en.wikipedia.org/wiki/ISO_639-1) format.
+This option can be used to add test for encoding errors. If the extracted text is
+not one of the valid languages, then it might likely be encoding error resulting
+in garbled text.
+
+
+#### convert
+
+```python
+ | @abstractmethod
+ | convert(file_path: Path, meta: Optional[Dict[str, str]]) -> Dict[str, Any]
+```
+
+Convert a file to a dictionary containing the text and any associated meta data.
+
+File converters may extract file meta like name or size. In addition to it, user
+supplied meta data like author, url, external IDs can be supplied as a dictionary.
+
+**Arguments**:
+
+- `file_path`: path of the file to convert
+- `meta`: dictionary of meta data key-value pairs to append in the returned document.
+
+
+#### validate\_language
+
+```python
+ | validate_language(text: str) -> bool
+```
+
+Validate if the language of the text is one of valid languages.
+
# Module txt
@@ -118,65 +177,6 @@ in garbled text.
a list of pages and the extracted meta data of the file.
-
-# Module base
-
-
-## BaseConverter Objects
-
-```python
-class BaseConverter()
-```
-
-Base class for implementing file converts to transform input documents to text format for ingestion in DocumentStore.
-
-
-#### \_\_init\_\_
-
-```python
- | __init__(remove_numeric_tables: Optional[bool] = None, valid_languages: Optional[List[str]] = None)
-```
-
-**Arguments**:
-
-- `remove_numeric_tables`: This option uses heuristics to remove numeric rows from the tables.
-The tabular structures in documents might be noise for the reader model if it
-does not have table parsing capability for finding answers. However, tables
-may also have long strings that could possible candidate for searching answers.
-The rows containing strings are thus retained in this option.
-- `valid_languages`: validate languages from a list of languages specified in the ISO 639-1
-(https://en.wikipedia.org/wiki/ISO_639-1) format.
-This option can be used to add test for encoding errors. If the extracted text is
-not one of the valid languages, then it might likely be encoding error resulting
-in garbled text.
-
-
-#### convert
-
-```python
- | @abstractmethod
- | convert(file_path: Path, meta: Optional[Dict[str, str]]) -> Dict[str, Any]
-```
-
-Convert a file to a dictionary containing the text and any associated meta data.
-
-File converters may extract file meta like name or size. In addition to it, user
-supplied meta data like author, url, external IDs can be supplied as a dictionary.
-
-**Arguments**:
-
-- `file_path`: path of the file to convert
-- `meta`: dictionary of meta data key-value pairs to append in the returned document.
-
-
-#### validate\_language
-
-```python
- | validate_language(text: str) -> bool
-```
-
-Validate if the language of the text is one of valid languages.
-
# Module pdf
diff --git a/docs/_src/api/api/generator.md b/docs/_src/api/api/generator.md
index 25baee4ad..0004b6eba 100644
--- a/docs/_src/api/api/generator.md
+++ b/docs/_src/api/api/generator.md
@@ -1,3 +1,35 @@
+
+# Module base
+
+
+## BaseGenerator Objects
+
+```python
+class BaseGenerator(ABC)
+```
+
+Abstract class for Generators
+
+
+#### predict
+
+```python
+ | @abstractmethod
+ | predict(query: str, documents: List[Document], top_k: Optional[int]) -> Dict
+```
+
+Abstract method to generate answers.
+
+**Arguments**:
+
+- `query`: Query
+- `documents`: Related documents (e.g. coming from a retriever) that the answer shall be conditioned on.
+- `top_k`: Number of returned answers
+
+**Returns**:
+
+Generated answers plus additional infos in a dict
+
# Module transformers
@@ -106,35 +138,3 @@ Generated answers plus additional infos in a dict like this:
| }}]}
```
-
-# Module base
-
-
-## BaseGenerator Objects
-
-```python
-class BaseGenerator(ABC)
-```
-
-Abstract class for Generators
-
-
-#### predict
-
-```python
- | @abstractmethod
- | predict(query: str, documents: List[Document], top_k: Optional[int]) -> Dict
-```
-
-Abstract method to generate answers.
-
-**Arguments**:
-
-- `query`: Query
-- `documents`: Related documents (e.g. coming from a retriever) that the answer shall be conditioned on.
-- `top_k`: Number of returned answers
-
-**Returns**:
-
-Generated answers plus additional infos in a dict
-
diff --git a/docs/_src/api/api/pipelines.md b/docs/_src/api/api/pipelines.md
index f2e834a4e..dbd32e25c 100644
--- a/docs/_src/api/api/pipelines.md
+++ b/docs/_src/api/api/pipelines.md
@@ -207,6 +207,44 @@ Initialize a Pipeline for Generative Question Answering.
- `generator`: Generator instance
- `retriever`: Retriever instance
+
+## SearchSummarizationPipeline Objects
+
+```python
+class SearchSummarizationPipeline(BaseStandardPipeline)
+```
+
+
+#### \_\_init\_\_
+
+```python
+ | __init__(summarizer: BaseSummarizer, retriever: BaseRetriever)
+```
+
+Initialize a Pipeline that retrieves documents for a query and then summarizes those documents.
+
+**Arguments**:
+
+- `summarizer`: Summarizer instance
+- `retriever`: Retriever instance
+
+
+#### run
+
+```python
+ | run(query: str, filters: Optional[Dict] = None, top_k_retriever: int = 10, generate_single_summary: bool = False, return_in_answer_format=False)
+```
+
+**Arguments**:
+
+- `query`: Your search query
+- `filters`:
+- `top_k_retriever`: Number of top docs the retriever should pass to the summarizer.
+The higher this value, the slower your pipeline.
+- `generate_single_summary`: Whether to generate single summary from all retrieved docs (True) or one per doc (False).
+- `return_in_answer_format`: Whether the results should be returned as documents (False) or in the answer format used in other QA pipelines (True).
+With the latter, you can use this pipeline as a "drop-in replacement" for other QA pipelines.
+
## FAQPipeline Objects
diff --git a/docs/_src/api/api/reader.md b/docs/_src/api/api/reader.md
index afa6fc428..162cf4cae 100644
--- a/docs/_src/api/api/reader.md
+++ b/docs/_src/api/api/reader.md
@@ -1,3 +1,6 @@
+
+# Module base
+
# Module farm
@@ -378,6 +381,3 @@ Example:
Dict containing query and answers
-
-# Module base
-
diff --git a/docs/_src/api/api/retriever.md b/docs/_src/api/api/retriever.md
index 7930b5fac..3b7708d6b 100644
--- a/docs/_src/api/api/retriever.md
+++ b/docs/_src/api/api/retriever.md
@@ -1,3 +1,74 @@
+
+# Module base
+
+
+## BaseRetriever Objects
+
+```python
+class BaseRetriever(ABC)
+```
+
+
+#### retrieve
+
+```python
+ | @abstractmethod
+ | retrieve(query: str, filters: dict = None, top_k: int = 10, index: str = None) -> List[Document]
+```
+
+Scan through documents in DocumentStore and return a small number documents
+that are most relevant to the query.
+
+**Arguments**:
+
+- `query`: The query
+- `filters`: A dictionary where the keys specify a metadata field and the value is a list of accepted values for that field
+- `top_k`: How many documents to return per query.
+- `index`: The name of the index in the DocumentStore from which to retrieve documents
+
+
+#### timing
+
+```python
+ | timing(fn)
+```
+
+Wrapper method used to time functions.
+
+
+#### eval
+
+```python
+ | eval(label_index: str = "label", doc_index: str = "eval_document", label_origin: str = "gold_label", top_k: int = 10, open_domain: bool = False, return_preds: bool = False) -> dict
+```
+
+Performs evaluation on the Retriever.
+Retriever is evaluated based on whether it finds the correct document given the query string and at which
+position in the ranking of documents the correct document is.
+
+| Returns a dict containing the following metrics:
+
+- "recall": Proportion of questions for which correct document is among retrieved documents
+- "mrr": Mean of reciprocal rank. Rewards retrievers that give relevant documents a higher rank.
+Only considers the highest ranked relevant document.
+- "map": Mean of average precision for each question. Rewards retrievers that give relevant
+documents a higher rank. Considers all retrieved relevant documents. If ``open_domain=True``,
+average precision is normalized by the number of retrieved relevant documents per query.
+If ``open_domain=False``, average precision is normalized by the number of all relevant documents
+per query.
+
+**Arguments**:
+
+- `label_index`: Index/Table in DocumentStore where labeled questions are stored
+- `doc_index`: Index/Table in DocumentStore where documents that are used for evaluation are stored
+- `top_k`: How many documents to return per query
+- `open_domain`: If ``True``, retrieval will be evaluated by checking if the answer string to a question is
+contained in the retrieved docs (common approach in open-domain QA).
+If ``False``, retrieval uses a stricter evaluation that checks if the retrieved document ids
+are within ids explicitly stated in the labels.
+- `return_preds`: Whether to add predictions in the returned dictionary. If True, the returned dictionary
+contains the keys "predictions" and "metrics".
+
# Module sparse
@@ -408,74 +479,3 @@ Create embeddings for a list of passages. For this Retriever type: The same as c
Embeddings, one per input passage
-
-# Module base
-
-
-## BaseRetriever Objects
-
-```python
-class BaseRetriever(ABC)
-```
-
-
-#### retrieve
-
-```python
- | @abstractmethod
- | retrieve(query: str, filters: dict = None, top_k: int = 10, index: str = None) -> List[Document]
-```
-
-Scan through documents in DocumentStore and return a small number documents
-that are most relevant to the query.
-
-**Arguments**:
-
-- `query`: The query
-- `filters`: A dictionary where the keys specify a metadata field and the value is a list of accepted values for that field
-- `top_k`: How many documents to return per query.
-- `index`: The name of the index in the DocumentStore from which to retrieve documents
-
-
-#### timing
-
-```python
- | timing(fn)
-```
-
-Wrapper method used to time functions.
-
-
-#### eval
-
-```python
- | eval(label_index: str = "label", doc_index: str = "eval_document", label_origin: str = "gold_label", top_k: int = 10, open_domain: bool = False, return_preds: bool = False) -> dict
-```
-
-Performs evaluation on the Retriever.
-Retriever is evaluated based on whether it finds the correct document given the query string and at which
-position in the ranking of documents the correct document is.
-
-| Returns a dict containing the following metrics:
-
-- "recall": Proportion of questions for which correct document is among retrieved documents
-- "mrr": Mean of reciprocal rank. Rewards retrievers that give relevant documents a higher rank.
-Only considers the highest ranked relevant document.
-- "map": Mean of average precision for each question. Rewards retrievers that give relevant
-documents a higher rank. Considers all retrieved relevant documents. If ``open_domain=True``,
-average precision is normalized by the number of retrieved relevant documents per query.
-If ``open_domain=False``, average precision is normalized by the number of all relevant documents
-per query.
-
-**Arguments**:
-
-- `label_index`: Index/Table in DocumentStore where labeled questions are stored
-- `doc_index`: Index/Table in DocumentStore where documents that are used for evaluation are stored
-- `top_k`: How many documents to return per query
-- `open_domain`: If ``True``, retrieval will be evaluated by checking if the answer string to a question is
-contained in the retrieved docs (common approach in open-domain QA).
-If ``False``, retrieval uses a stricter evaluation that checks if the retrieved document ids
-are within ids explicitly stated in the labels.
-- `return_preds`: Whether to add predictions in the returned dictionary. If True, the returned dictionary
-contains the keys "predictions" and "metrics".
-