2019-11-14 11:42:51 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								{
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								 "cells": [
							 
						 
					
						
							
								
									
										
										
										
											2019-11-25 16:14:37 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								  {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "cell_type": "markdown",
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "metadata": {},
							 
						 
					
						
							
								
									
										
										
										
											2019-11-25 16:14:37 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "source": [
							 
						 
					
						
							
								
									
										
										
										
											2020-09-18 12:57:32 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "# Build Your First QA System\n",
							 
						 
					
						
							
								
									
										
										
										
											2019-11-25 16:14:37 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "<img style=\"float: right;\" src=\"https://upload.wikimedia.org/wikipedia/en/d/d8/Game_of_Thrones_title_card.jpg\">\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
									
										
										
										
											2021-01-13 10:33:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "[](https://colab.research.google.com/github/deepset-ai/haystack/blob/master/tutorials/Tutorial1_Basic_QA_Pipeline.ipynb)\n",
							 
						 
					
						
							
								
									
										
										
										
											2020-09-18 12:57:32 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
									
										
										
										
											2019-11-25 16:14:37 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "Question Answering can be used in a variety of use cases. A very common one:  Using it to navigate through complex knowledge bases or long documents (\"search setting\").\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "A \"knowledge base\" could for example be your website, an internal wiki or a collection of financial reports. \n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "In this tutorial we will work on a slightly different domain: \"Game of Thrones\". \n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
									
										
										
										
											2020-04-29 14:01:05 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "Let's see how we can use a bunch of Wikipedia articles to answer a variety of questions about the \n",
							 
						 
					
						
							
								
									
										
										
										
											2021-09-21 14:42:18 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "marvellous seven kingdoms.\n"
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   ]
							 
						 
					
						
							
								
									
										
										
										
											2019-11-25 16:14:37 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								  },
							 
						 
					
						
							
								
									
										
										
										
											2020-12-22 07:58:12 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								  {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "cell_type": "markdown",
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "metadata": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "collapsed": false
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   },
							 
						 
					
						
							
								
									
										
										
										
											2020-12-22 07:58:12 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "source": [
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "### Prepare environment\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "#### Colab: Enable the GPU runtime\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "Make sure you enable the GPU runtime to experience decent speed in this tutorial.\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "**Runtime -> Change Runtime type -> Hardware accelerator -> GPU**\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
									
										
										
										
											2022-04-12 16:19:03 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "<img src=\"https://raw.githubusercontent.com/deepset-ai/haystack/master/docs/img/colab_gpu_runtime.jpg\">"
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   ]
							 
						 
					
						
							
								
									
										
										
										
											2020-12-22 07:58:12 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								  },
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "cell_type": "code",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "execution_count": null,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "metadata": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "collapsed": false,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "pycharm": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								     "name": "#%%\n"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    }
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   },
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "outputs": [],
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "source": [
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "# Make sure you have a GPU running\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "!nvidia-smi"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   ]
							 
						 
					
						
							
								
									
										
										
										
											2020-12-22 07:58:12 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								  },
							 
						 
					
						
							
								
									
										
										
										
											2019-11-14 11:42:51 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								  {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "cell_type": "code",
							 
						 
					
						
							
								
									
										
										
										
											2019-11-28 12:07:04 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "execution_count": null,
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "metadata": {},
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "outputs": [],
							 
						 
					
						
							
								
									
										
										
										
											2019-11-25 17:41:03 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "source": [
							 
						 
					
						
							
								
									
										
										
										
											2022-02-04 15:45:09 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "# Install the latest release of Haystack in your own environment\n",
							 
						 
					
						
							
								
									
										
										
										
											2020-08-19 14:52:50 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "#! pip install farm-haystack\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
									
										
										
										
											2020-09-21 10:26:12 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "# Install the latest master of Haystack\n",
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "!pip install --upgrade pip\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "!pip install git+https://github.com/deepset-ai/haystack.git#egg=farm-haystack[colab]"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   ]
							 
						 
					
						
							
								
									
										
										
										
											2019-11-25 17:41:03 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								  },
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "cell_type": "code",
							 
						 
					
						
							
								
									
										
										
										
											2021-09-21 14:42:18 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "execution_count": null,
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "metadata": {},
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "outputs": [],
							 
						 
					
						
							
								
									
										
										
										
											2019-11-14 11:42:51 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								   "source": [
							 
						 
					
						
							
								
									
										
										
										
											2022-03-29 13:53:35 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "from haystack.utils import clean_wiki_text, convert_files_to_docs, fetch_archive_from_http, print_answers\n",
							 
						 
					
						
							
								
									
										
										
										
											2021-10-25 15:50:23 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "from haystack.nodes import FARMReader, TransformersReader"
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   ]
							 
						 
					
						
							
								
									
										
										
										
											2019-11-14 11:42:51 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								  },
							 
						 
					
						
							
								
									
										
										
										
											2019-11-25 16:01:32 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								  {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "cell_type": "markdown",
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "metadata": {},
							 
						 
					
						
							
								
									
										
										
										
											2019-11-25 16:01:32 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "source": [
							 
						 
					
						
							
								
									
										
										
										
											2020-04-29 14:01:05 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "## Document Store\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
									
										
										
										
											2020-09-18 12:57:32 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "Haystack finds answers to queries within the documents stored in a `DocumentStore`. The current implementations of `DocumentStore` include `ElasticsearchDocumentStore`, `FAISSDocumentStore`,  `SQLDocumentStore`, and `InMemoryDocumentStore`.\n",
							 
						 
					
						
							
								
									
										
										
										
											2020-04-29 14:01:05 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "**Here:** We recommended Elasticsearch as it comes preloaded with features like [full-text queries](https://www.elastic.co/guide/en/elasticsearch/reference/current/full-text-queries.html), [BM25 retrieval](https://www.elastic.co/elasticon/conf/2016/sf/improved-text-scoring-with-bm25), and [vector storage for text embeddings](https://www.elastic.co/guide/en/elasticsearch/reference/7.6/dense-vector.html).\n",
							 
						 
					
						
							
								
									
										
										
										
											2020-04-30 19:00:41 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
									
										
										
										
											2020-04-29 14:01:05 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "**Alternatives:** If you are unable to setup an Elasticsearch instance, then follow the [Tutorial 3](https://github.com/deepset-ai/haystack/blob/master/tutorials/Tutorial3_Basic_QA_Pipeline_without_Elasticsearch.ipynb) for using SQL/InMemory document stores.\n",
							 
						 
					
						
							
								
									
										
										
										
											2020-04-30 19:00:41 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "**Hint**: This tutorial creates a new document store instance with Wikipedia articles on Game of Thrones. However, you can configure Haystack to work with your existing document stores.\n",
							 
						 
					
						
							
								
									
										
										
										
											2020-04-29 14:01:05 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "### Start an Elasticsearch server\n",
							 
						 
					
						
							
								
									
										
										
										
											2021-09-21 14:42:18 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "You can start Elasticsearch on your local machine instance using Docker. If Docker is not readily available in your environment (e.g. in Colab notebooks), then you can manually download and execute Elasticsearch from source."
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   ]
							 
						 
					
						
							
								
									
										
										
										
											2019-11-25 16:01:32 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								  },
							 
						 
					
						
							
								
									
										
										
										
											2019-11-14 11:42:51 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								  {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "cell_type": "code",
							 
						 
					
						
							
								
									
										
										
										
											2021-09-21 14:42:18 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "execution_count": null,
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "metadata": {},
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "outputs": [],
							 
						 
					
						
							
								
									
										
										
										
											2020-04-29 14:01:05 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "source": [
							 
						 
					
						
							
								
									
										
										
										
											2021-06-11 11:09:15 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "# Recommended: Start Elasticsearch using Docker via the Haystack utility function\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "from haystack.utils import launch_es\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "launch_es()"
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   ]
							 
						 
					
						
							
								
									
										
										
										
											2020-04-29 14:01:05 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								  },
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "cell_type": "code",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "execution_count": null,
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "metadata": {},
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "outputs": [],
							 
						 
					
						
							
								
									
										
										
										
											2020-04-29 14:01:05 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "source": [
							 
						 
					
						
							
								
									
										
										
										
											2020-04-30 19:00:41 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "# In Colab / No Docker environments: Start Elasticsearch from source\n",
							 
						 
					
						
							
								
									
										
										
										
											2021-02-09 14:56:54 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "! wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.9.2-linux-x86_64.tar.gz -q\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "! tar -xzf elasticsearch-7.9.2-linux-x86_64.tar.gz\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "! chown -R daemon:daemon elasticsearch-7.9.2\n",
							 
						 
					
						
							
								
									
										
										
										
											2020-04-30 19:00:41 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "import os\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "from subprocess import Popen, PIPE, STDOUT\n",
							 
						 
					
						
							
								
									
										
										
										
											2022-02-04 15:45:09 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "es_server = Popen(\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "    [\"elasticsearch-7.9.2/bin/elasticsearch\"], stdout=PIPE, stderr=STDOUT, preexec_fn=lambda: os.setuid(1)  # as daemon\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    ")\n",
							 
						 
					
						
							
								
									
										
										
										
											2020-04-30 19:00:41 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "# wait until ES has started\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "! sleep 30"
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   ]
							 
						 
					
						
							
								
									
										
										
										
											2020-04-29 14:01:05 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								  },
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "cell_type": "code",
							 
						 
					
						
							
								
									
										
										
										
											2021-09-21 14:42:18 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "execution_count": null,
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "metadata": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "pycharm": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								     "name": "#%%\n"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   },
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "outputs": [],
							 
						 
					
						
							
								
									
										
										
										
											2020-04-29 14:01:05 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "source": [
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "# Connect to Elasticsearch\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
									
										
										
										
											2021-10-25 15:50:23 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "from haystack.document_stores import ElasticsearchDocumentStore\n",
							 
						 
					
						
							
								
									
										
										
										
											2022-02-04 15:45:09 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
									
										
										
										
											2020-04-29 14:01:05 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "document_store = ElasticsearchDocumentStore(host=\"localhost\", username=\"\", password=\"\", index=\"document\")"
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   ]
							 
						 
					
						
							
								
									
										
										
										
											2021-10-25 15:50:23 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								  },
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "cell_type": "markdown",
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "metadata": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "pycharm": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								     "name": "#%% md\n"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   },
							 
						 
					
						
							
								
									
										
										
										
											2020-04-29 14:01:05 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "source": [
							 
						 
					
						
							
								
									
										
										
										
											2020-09-16 18:33:23 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "## Preprocessing of documents\n",
							 
						 
					
						
							
								
									
										
										
										
											2020-04-29 14:01:05 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
									
										
										
										
											2020-09-16 18:33:23 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "Haystack provides a customizable pipeline for:\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    " - converting files into texts\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    " - cleaning texts\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    " - splitting texts\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    " - writing them to a Document Store\n",
							 
						 
					
						
							
								
									
										
										
										
											2020-04-29 14:01:05 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
									
										
										
										
											2020-09-18 12:57:32 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "In this tutorial, we download Wikipedia articles about Game of Thrones, apply a basic cleaning function, and index them in Elasticsearch."
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   ]
							 
						 
					
						
							
								
									
										
										
										
											2020-04-29 14:01:05 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								  },
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "cell_type": "code",
							 
						 
					
						
							
								
									
										
										
										
											2021-09-21 14:42:18 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "execution_count": null,
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "metadata": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "pycharm": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								     "name": "#%%\n"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   },
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "outputs": [],
							 
						 
					
						
							
								
									
										
										
										
											2019-11-14 11:42:51 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								   "source": [
							 
						 
					
						
							
								
									
										
										
										
											2020-09-18 12:57:32 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "# Let's first fetch some documents that we want to query\n",
							 
						 
					
						
							
								
									
										
										
										
											2019-11-25 16:01:32 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "# Here: 517 Wikipedia articles for Game of Thrones\n",
							 
						 
					
						
							
								
									
										
										
										
											2022-03-21 11:58:51 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "doc_dir = \"data/tutorial1\"\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "s3_url = \"https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/documents/wiki_gameofthrones_txt1.zip\"\n",
							 
						 
					
						
							
								
									
										
										
										
											2019-11-14 11:42:51 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								    "fetch_archive_from_http(url=s3_url, output_dir=doc_dir)\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
									
										
										
										
											2020-06-08 11:07:19 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "# Convert files to dicts\n",
							 
						 
					
						
							
								
									
										
										
										
											2020-01-23 15:18:41 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "# You can optionally supply a cleaning function that is applied to each doc (e.g. to remove footers)\n",
							 
						 
					
						
							
								
									
										
										
										
											2019-11-25 16:01:32 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "# It must take a str as input, and return a str.\n",
							 
						 
					
						
							
								
									
										
										
										
											2022-03-29 13:53:35 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "docs = convert_files_to_docs(dir_path=doc_dir, clean_func=clean_wiki_text, split_paragraphs=True)\n",
							 
						 
					
						
							
								
									
										
										
										
											2020-06-08 11:07:19 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
									
										
										
										
											2020-07-07 14:59:01 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "# We now have a list of dictionaries that we can write to our document store.\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "# If your texts come from a different source (e.g. a DB), you can of course skip convert_files_to_dicts() and create the dictionaries yourself.\n",
							 
						 
					
						
							
								
									
										
										
										
											2020-09-18 12:57:32 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "# The default format here is:\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "# {\n",
							 
						 
					
						
							
								
									
										
										
										
											2022-03-23 14:55:01 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "#    'content': \"<DOCUMENT_TEXT_HERE>\",\n",
							 
						 
					
						
							
								
									
										
										
										
											2020-09-18 12:57:32 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "#    'meta': {'name': \"<DOCUMENT_NAME_HERE>\", ...}\n",
							 
						 
					
						
							
								
									
										
										
										
											2022-02-04 15:45:09 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "# }\n",
							 
						 
					
						
							
								
									
										
										
										
											2020-07-07 14:59:01 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "# (Optionally: you can also add more key-value-pairs here, that will be indexed as fields in Elasticsearch and\n",
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 11:50:59 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "# can be accessed later for filtering or shown in the responses of the Pipeline)\n",
							 
						 
					
						
							
								
									
										
										
										
											2020-07-07 14:59:01 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "# Let's have a look at the first 3 entries:\n",
							 
						 
					
						
							
								
									
										
										
										
											2022-03-29 13:53:35 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "print(docs[:3])\n",
							 
						 
					
						
							
								
									
										
										
										
											2020-07-07 14:59:01 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
									
										
										
										
											2020-06-08 11:07:19 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "# Now, let's write the dicts containing documents to our DB.\n",
							 
						 
					
						
							
								
									
										
										
										
											2022-03-29 13:53:35 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "document_store.write_documents(docs)"
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   ]
							 
						 
					
						
							
								
									
										
										
										
											2019-11-14 11:42:51 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								  },
							 
						 
					
						
							
								
									
										
										
										
											2019-11-25 16:01:32 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								  {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "cell_type": "markdown",
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "metadata": {},
							 
						 
					
						
							
								
									
										
										
										
											2019-11-25 16:01:32 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "source": [
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 11:50:59 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "## Initalize Retriever, Reader,  & Pipeline\n",
							 
						 
					
						
							
								
									
										
										
										
											2020-04-29 14:01:05 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "### Retriever\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "Retrievers help narrowing down the scope for the Reader to smaller units of text where a given question could be answered.\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "They use some simple but fast algorithm.\n",
							 
						 
					
						
							
								
									
										
										
										
											2020-04-30 19:00:41 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
									
										
										
										
											2020-04-29 14:01:05 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "**Here:** We use Elasticsearch's default BM25 algorithm\n",
							 
						 
					
						
							
								
									
										
										
										
											2020-04-30 19:00:41 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
									
										
										
										
											2020-04-29 14:01:05 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "**Alternatives:**\n",
							 
						 
					
						
							
								
									
										
										
										
											2020-04-30 19:00:41 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
									
										
										
										
											2022-04-26 16:09:39 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "- Customize the `BM25Retriever`with custom queries (e.g. boosting) and filters\n",
							 
						 
					
						
							
								
									
										
										
										
											2020-07-07 14:59:01 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "- Use `TfidfRetriever` in combination with a SQL or InMemory Document store for simple prototyping and debugging\n",
							 
						 
					
						
							
								
									
										
										
										
											2020-04-29 14:01:05 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "- Use `EmbeddingRetriever` to find candidate documents based on the similarity of embeddings (e.g. created via Sentence-BERT)\n",
							 
						 
					
						
							
								
									
										
										
										
											2020-07-07 14:59:01 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "- Use `DensePassageRetriever` to use different embedding models for passage and query (see Tutorial 6)"
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   ]
							 
						 
					
						
							
								
									
										
										
										
											2020-04-29 14:01:05 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								  },
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "cell_type": "code",
							 
						 
					
						
							
								
									
										
										
										
											2021-09-21 14:42:18 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "execution_count": null,
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "metadata": {},
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "outputs": [],
							 
						 
					
						
							
								
									
										
										
										
											2020-04-29 14:01:05 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "source": [
							 
						 
					
						
							
								
									
										
										
										
											2022-04-26 16:09:39 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "from haystack.nodes import BM25Retriever\n",
							 
						 
					
						
							
								
									
										
										
										
											2022-02-04 15:45:09 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
									
										
										
										
											2022-04-26 16:09:39 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "retriever = BM25Retriever(document_store=document_store)"
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   ]
							 
						 
					
						
							
								
									
										
										
										
											2019-11-25 16:01:32 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								  },
							 
						 
					
						
							
								
									
										
										
										
											2019-11-14 11:42:51 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								  {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "cell_type": "code",
							 
						 
					
						
							
								
									
										
										
										
											2021-09-21 14:42:18 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "execution_count": null,
							 
						 
					
						
							
								
									
										
										
										
											2019-11-14 11:42:51 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								   "metadata": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "pycharm": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								     "is_executing": false,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								     "name": "#%%\n"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    }
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   },
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "outputs": [],
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "source": [
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "# Alternative: An in-memory TfidfRetriever based on Pandas dataframes for building quick-prototypes with SQLite document store.\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "# from haystack.nodes import TfidfRetriever\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "# retriever = TfidfRetriever(document_store=document_store)"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   ]
							 
						 
					
						
							
								
									
										
										
										
											2020-04-29 14:01:05 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								  },
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "cell_type": "markdown",
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "metadata": {},
							 
						 
					
						
							
								
									
										
										
										
											2020-04-29 14:01:05 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "source": [
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "### Reader\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "A Reader scans the texts returned by retrievers in detail and extracts the k best answers. They are based\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "on powerful, but slower deep learning models.\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "Haystack currently supports Readers based on the frameworks FARM and Transformers.\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "With both you can either load a local model or one from Hugging Face's model hub (https://huggingface.co/models).\n",
							 
						 
					
						
							
								
									
										
										
										
											2020-04-30 19:00:41 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
									
										
										
										
											2020-04-29 14:01:05 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "**Here:** a medium sized RoBERTa QA model using a Reader based on FARM (https://huggingface.co/deepset/roberta-base-squad2)\n",
							 
						 
					
						
							
								
									
										
										
										
											2020-04-30 19:00:41 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
									
										
										
										
											2020-04-29 14:01:05 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "**Alternatives (Reader):** TransformersReader (leveraging the `pipeline` of the Transformers package)\n",
							 
						 
					
						
							
								
									
										
										
										
											2020-04-30 19:00:41 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
									
										
										
										
											2020-04-29 14:01:05 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "**Alternatives (Models):** e.g. \"distilbert-base-uncased-distilled-squad\" (fast) or \"deepset/bert-large-uncased-whole-word-masking-squad2\" (good accuracy)\n",
							 
						 
					
						
							
								
									
										
										
										
											2020-04-30 19:00:41 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
									
										
										
										
											2020-04-29 14:01:05 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "**Hint:** You can adjust the model to return \"no answer possible\" with the no_ans_boost. Higher values mean the model prefers \"no answer possible\"\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "#### FARMReader"
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   ]
							 
						 
					
						
							
								
									
										
										
										
											2019-11-14 11:42:51 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								  },
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "cell_type": "code",
							 
						 
					
						
							
								
									
										
										
										
											2021-09-21 14:42:18 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "execution_count": null,
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "metadata": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "pycharm": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								     "is_executing": false
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   },
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "outputs": [],
							 
						 
					
						
							
								
									
										
										
										
											2019-11-14 11:42:51 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								   "source": [
							 
						 
					
						
							
								
									
										
										
										
											2020-04-29 14:01:05 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "# Load a  local model or any of the QA models on\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "# Hugging Face's model hub (https://huggingface.co/models)\n",
							 
						 
					
						
							
								
									
										
										
										
											2020-01-13 18:56:22 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
									
										
										
										
											2020-12-22 07:58:12 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "reader = FARMReader(model_name_or_path=\"deepset/roberta-base-squad2\", use_gpu=True)"
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   ]
							 
						 
					
						
							
								
									
										
										
										
											2020-04-29 14:01:05 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								  },
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "cell_type": "markdown",
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "metadata": {},
							 
						 
					
						
							
								
									
										
										
										
											2020-04-29 14:01:05 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "source": [
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "#### TransformersReader"
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   ]
							 
						 
					
						
							
								
									
										
										
										
											2020-04-29 14:01:05 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								  },
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "cell_type": "code",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "execution_count": null,
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "metadata": {},
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "outputs": [],
							 
						 
					
						
							
								
									
										
										
										
											2020-04-29 14:01:05 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "source": [
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "# Alternative:\n",
							 
						 
					
						
							
								
									
										
										
										
											2020-10-21 17:15:35 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "# reader = TransformersReader(model_name_or_path=\"distilbert-base-uncased-distilled-squad\", tokenizer=\"distilbert-base-uncased\", use_gpu=-1)"
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   ]
							 
						 
					
						
							
								
									
										
										
										
											2019-11-14 11:42:51 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								  },
							 
						 
					
						
							
								
									
										
										
										
											2020-04-29 14:01:05 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								  {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "cell_type": "markdown",
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "metadata": {},
							 
						 
					
						
							
								
									
										
										
										
											2020-04-29 14:01:05 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "source": [
							 
						 
					
						
							
								
									
										
										
										
											2021-02-09 14:56:54 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "### Pipeline\n",
							 
						 
					
						
							
								
									
										
										
										
											2020-04-29 14:01:05 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
									
										
										
										
											2021-02-09 14:56:54 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "With a Haystack `Pipeline` you can stick together your building blocks to a search pipeline.\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "Under the hood, `Pipelines` are Directed Acyclic Graphs (DAGs) that you can easily customize for your own use cases.\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "To speed things up, Haystack also comes with a few predefined Pipelines. One of them is the `ExtractiveQAPipeline` that combines a retriever and a reader to answer our questions.\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "You can learn more about `Pipelines` in the [docs](https://haystack.deepset.ai/docs/latest/pipelinesmd)."
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   ]
							 
						 
					
						
							
								
									
										
										
										
											2020-04-29 14:01:05 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								  },
							 
						 
					
						
							
								
									
										
										
										
											2019-11-14 11:42:51 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								  {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "cell_type": "code",
							 
						 
					
						
							
								
									
										
										
										
											2021-09-21 14:42:18 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "execution_count": null,
							 
						 
					
						
							
								
									
										
										
										
											2019-11-14 11:42:51 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								   "metadata": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "pycharm": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								     "is_executing": false
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    }
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   },
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "outputs": [],
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "source": [
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "from haystack.pipelines import ExtractiveQAPipeline\n",
							 
						 
					
						
							
								
									
										
										
										
											2022-02-04 15:45:09 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "pipe = ExtractiveQAPipeline(reader, retriever)"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   ]
							 
						 
					
						
							
								
									
										
										
										
											2019-11-14 11:42:51 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								  },
							 
						 
					
						
							
								
									
										
										
										
											2019-11-25 16:01:32 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								  {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "cell_type": "markdown",
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "metadata": {},
							 
						 
					
						
							
								
									
										
										
										
											2019-11-25 16:01:32 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "source": [
							 
						 
					
						
							
								
									
										
										
										
											2020-03-17 19:58:53 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "## Voilà! Ask a question!"
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   ]
							 
						 
					
						
							
								
									
										
										
										
											2019-11-25 16:01:32 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								  },
							 
						 
					
						
							
								
									
										
										
										
											2019-11-14 11:42:51 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								  {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "cell_type": "code",
							 
						 
					
						
							
								
									
										
										
										
											2021-09-21 14:42:18 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "execution_count": null,
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "metadata": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "pycharm": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								     "is_executing": false
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   },
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "outputs": [],
							 
						 
					
						
							
								
									
										
										
										
											2019-11-14 11:42:51 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								   "source": [
							 
						 
					
						
							
								
									
										
										
										
											2019-11-25 16:14:37 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "# You can configure how many candidates the reader and retriever shall return\n",
							 
						 
					
						
							
								
									
										
										
										
											2022-02-04 15:45:09 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "# The higher top_k_retriever, the better (but also the slower) your answers.\n",
							 
						 
					
						
							
								
									
										
										
										
											2021-09-22 16:35:20 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "prediction = pipe.run(\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "    query=\"Who is the father of Arya Stark?\", params={\"Retriever\": {\"top_k\": 10}, \"Reader\": {\"top_k\": 5}}\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    ")"
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   ]
							 
						 
					
						
							
								
									
										
										
										
											2019-11-14 11:42:51 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								  },
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "cell_type": "code",
							 
						 
					
						
							
								
									
										
										
										
											2020-04-29 14:01:05 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "execution_count": null,
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "metadata": {},
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "outputs": [],
							 
						 
					
						
							
								
									
										
										
										
											2019-11-14 11:42:51 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								   "source": [
							 
						 
					
						
							
								
									
										
										
										
											2021-09-22 16:35:20 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "# prediction = pipe.run(query=\"Who created the Dothraki vocabulary?\", params={\"Reader\": {\"top_k\": 5}})\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "# prediction = pipe.run(query=\"Who is the sister of Sansa?\", params={\"Reader\": {\"top_k\": 5}})"
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   ]
							 
						 
					
						
							
								
									
										
										
										
											2019-11-14 11:42:51 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								  },
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "cell_type": "code",
							 
						 
					
						
							
								
									
										
										
										
											2021-09-21 14:42:18 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "execution_count": null,
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "metadata": {},
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "outputs": [],
							 
						 
					
						
							
								
									
										
										
										
											2021-10-25 15:50:23 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "source": [
							 
						 
					
						
							
								
									
										
										
										
											2021-11-09 15:09:26 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "# Now you can either print the object directly...\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "from pprint import pprint\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "pprint(prediction)\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
									
										
										
										
											2022-02-04 15:45:09 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "# Sample output:\n",
							 
						 
					
						
							
								
									
										
										
										
											2021-11-09 15:09:26 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "# {\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "#     'answers': [ <Answer: answer='Eddard', type='extractive', score=0.9919578731060028, offsets_in_document=[{'start': 608, 'end': 615}], offsets_in_context=[{'start': 72, 'end': 79}], document_id='cc75f739897ecbf8c14657b13dda890e', meta={'name': '454_Music_of_Game_of_Thrones.txt'}}, context='...' >,\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "#                  <Answer: answer='Ned', type='extractive', score=0.9767240881919861, offsets_in_document=[{'start': 3687, 'end': 3801}], offsets_in_context=[{'start': 18, 'end': 132}], document_id='9acf17ec9083c4022f69eb4a37187080', meta={'name': '454_Music_of_Game_of_Thrones.txt'}}, context='...' >,\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "#                  ...\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "#                ]\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "#     'documents': [ <Document: content_type='text', score=0.8034909798951382, meta={'name': '332_Sansa_Stark.txt'}, embedding=None, id=d1f36ec7170e4c46cde65787fe125dfe', content='\\n===\\'\\'A Game of Thrones\\'\\'===\\nSansa Stark begins the novel by being betrothed to Crown ...'>,\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "#                    <Document: content_type='text', score=0.8002150354529785, meta={'name': '191_Gendry.txt'}, embedding=None, id='dd4e070a22896afa81748d6510006d2', 'content='\\n===Season 2===\\nGendry travels North with Yoren and other Night's Watch recruits, including Arya ...'>,\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "#                    ...\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "#                  ],\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "#     'no_ans_gap':  11.688868522644043,\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "#     'node_id': 'Reader',\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "#     'params': {'Reader': {'top_k': 5}, 'Retriever': {'top_k': 5}},\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "#     'query': 'Who is the father of Arya Stark?',\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "#     'root_node': 'Query'\n",
							 
						 
					
						
							
								
									
										
										
										
											2022-02-04 15:45:09 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "# }"
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   ]
							 
						 
					
						
							
								
									
										
										
										
											2021-11-09 15:09:26 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								  },
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "cell_type": "code",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "execution_count": null,
							 
						 
					
						
							
								
									
										
										
										
											2019-11-14 11:42:51 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								   "metadata": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "pycharm": {
							 
						 
					
						
							
								
									
										
										
										
											2019-11-25 16:01:32 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								     "is_executing": false,
							 
						 
					
						
							
								
									
										
										
										
											2019-11-14 11:42:51 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								     "name": "#%%\n"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    }
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   },
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "outputs": [],
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "source": [
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "# ...or use a util to simplify the output\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "# Change `minimum` to `medium` or `all` to raise the level of detail\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "print_answers(prediction, details=\"minimum\")"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   ]
							 
						 
					
						
							
								
									
										
										
										
											2021-06-11 11:09:15 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								  },
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "cell_type": "markdown",
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "metadata": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "collapsed": false,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "pycharm": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								     "name": "#%% md\n"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   },
							 
						 
					
						
							
								
									
										
										
										
											2021-06-11 11:09:15 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "source": [
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "## About us\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "This [Haystack](https://github.com/deepset-ai/haystack/) notebook was made with love by [deepset](https://deepset.ai/) in Berlin, Germany\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "We bring NLP to the industry via open source!  \n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "Our focus: Industry specific language models & large scale QA systems.  \n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "  \n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "Some of our other work: \n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "- [German BERT](https://deepset.ai/german-bert)\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "- [GermanQuAD and GermanDPR](https://deepset.ai/germanquad)\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "- [FARM](https://github.com/deepset-ai/FARM)\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "Get in touch:\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "[Twitter](https://twitter.com/deepset_ai) | [LinkedIn](https://www.linkedin.com/company/deepset-ai/) | [Slack](https://haystack.deepset.ai/community/join) | [GitHub Discussions](https://github.com/deepset-ai/haystack/discussions) | [Website](https://deepset.ai)\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
									
										
										
										
											2021-10-21 11:42:32 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "By the way: [we're hiring!](https://www.deepset.ai/jobs)\n"
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   ]
							 
						 
					
						
							
								
									
										
										
										
											2019-11-14 11:42:51 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								  }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								 ],
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								 "metadata": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  "kernelspec": {
							 
						 
					
						
							
								
									
										
										
										
											2020-04-29 14:01:05 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "display_name": "Python 3",
							 
						 
					
						
							
								
									
										
										
										
											2019-11-14 11:42:51 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								   "language": "python",
							 
						 
					
						
							
								
									
										
										
										
											2020-04-29 14:01:05 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "name": "python3"
							 
						 
					
						
							
								
									
										
										
										
											2019-11-14 11:42:51 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								  },
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  "language_info": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "codemirror_mode": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "name": "ipython",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "version": 3
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   },
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "file_extension": ".py",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "mimetype": "text/x-python",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "name": "python",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "nbconvert_exporter": "python",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "pygments_lexer": "ipython3",
							 
						 
					
						
							
								
									
										
										
										
											2022-01-26 18:12:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   "version": "3.9.7"
							 
						 
					
						
							
								
									
										
										
										
											2019-11-14 11:42:51 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								  }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								 },
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								 "nbformat": 4,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								 "nbformat_minor": 2
							 
						 
					
						
							
								
									
										
										
										
											2022-03-23 14:55:01 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								}