2021-07-26 17:20:43 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								{
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								 "cells": [
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "cell_type": "markdown",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "metadata": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "collapsed": true,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "pycharm": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								     "name": "#%% md\n"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   },
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "source": [
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "# Question Generation\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "[](https://colab.research.google.com/github/deepset-ai/haystack/blob/master/tutorials/Tutorial13_Question_generation.ipynb)\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "This is a bare bones tutorial showing what is possible with the QuestionGenerator Nodes and Pipelines which automatically\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "generate questions which the question generation model thinks can be answered by a given document."
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   ]
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  },
							 
						 
					
						
							
								
									
										
										
										
											2021-10-20 09:55:56 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								  {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "cell_type": "markdown",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "metadata": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "id": "yaaKv3_ZN-gb",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "pycharm": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								     "name": "#%% md\n"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   },
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "source": [
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "### Prepare environment\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "#### Colab: Enable the GPU runtime\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "Make sure you enable the GPU runtime to experience decent speed in this tutorial.  \n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "**Runtime -> Change Runtime type -> Hardware accelerator -> GPU**\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "<img src=\"https://raw.githubusercontent.com/deepset-ai/haystack/master/docs/_src/img/colab_gpu_runtime.jpg\">"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   ]
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  },
							 
						 
					
						
							
								
									
										
										
										
											2021-07-26 17:20:43 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								  {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "cell_type": "code",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "execution_count": null,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "outputs": [],
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "source": [
							 
						 
					
						
							
								
									
										
										
										
											2021-08-31 15:22:51 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "# Install needed libraries\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "!pip install grpcio-tools==1.34.1\n",
							 
						 
					
						
							
								
									
										
										
										
											2021-09-23 14:36:20 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "!pip install git+https://github.com/deepset-ai/haystack.git\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "# If you run this notebook on Google Colab, you might need to\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "# restart the runtime after installing haystack."
							 
						 
					
						
							
								
									
										
										
										
											2021-08-31 15:22:51 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								   ],
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "metadata": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "collapsed": false,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "pycharm": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								     "name": "#%%\n"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  },
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "cell_type": "code",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "execution_count": null,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "outputs": [],
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "source": [
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "# Imports needed to run this notebook\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
									
										
										
										
											2021-07-26 17:20:43 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								    "from haystack.question_generator import QuestionGenerator\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "from haystack.utils import launch_es\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "from haystack.document_store import ElasticsearchDocumentStore\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "from haystack.retriever import ElasticsearchRetriever\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "from pprint import pprint\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "from haystack.reader import FARMReader\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "from tqdm import tqdm\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "from haystack.pipeline import QuestionGenerationPipeline, RetrieverQuestionGenerationPipeline, QuestionAnswerGenerationPipeline"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   ],
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "metadata": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "collapsed": false,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "pycharm": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								     "name": "#%%\n"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  },
							 
						 
					
						
							
								
									
										
										
										
											2021-08-31 15:22:51 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								  {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "cell_type": "markdown",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "source": [
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "Let's start an Elasticsearch instance with one of the options below:"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   ],
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "metadata": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "collapsed": false,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "pycharm": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								     "name": "#%% md\n"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  },
							 
						 
					
						
							
								
									
										
										
										
											2021-07-26 17:20:43 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								  {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "cell_type": "code",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "execution_count": null,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "outputs": [],
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "source": [
							 
						 
					
						
							
								
									
										
										
										
											2021-08-31 15:22:51 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "# Option 1: Start Elasticsearch service via Docker\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "launch_es()"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   ],
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "metadata": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "collapsed": false,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "pycharm": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								     "name": "#%%\n"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  },
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "cell_type": "code",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "execution_count": null,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "outputs": [],
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "source": [
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "# Option 2: In Colab / No Docker environments: Start Elasticsearch from source\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "! wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.9.2-linux-x86_64.tar.gz -q\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "! tar -xzf elasticsearch-7.9.2-linux-x86_64.tar.gz\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "! chown -R daemon:daemon elasticsearch-7.9.2\n",
							 
						 
					
						
							
								
									
										
										
										
											2021-07-26 17:20:43 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
									
										
										
										
											2021-08-31 15:22:51 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "import os\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "from subprocess import Popen, PIPE, STDOUT\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "es_server = Popen(['elasticsearch-7.9.2/bin/elasticsearch'],\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "                   stdout=PIPE, stderr=STDOUT,\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "                   preexec_fn=lambda: os.setuid(1)  # as daemon\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "                  )\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "# wait until ES has started\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "! sleep 30"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   ],
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "metadata": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "collapsed": false,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "pycharm": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								     "name": "#%% \n"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  },
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "cell_type": "markdown",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "source": [
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "Let's initialize some core components"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   ],
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "metadata": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "collapsed": false,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "pycharm": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								     "name": "#%% md\n"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  },
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "cell_type": "code",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "execution_count": null,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "outputs": [],
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "source": [
							 
						 
					
						
							
								
									
										
										
										
											2021-07-26 17:20:43 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								    "text1 = \"Python is an interpreted, high-level, general-purpose programming language. Created by Guido van Rossum and first released in 1991, Python's design philosophy emphasizes code readability with its notable use of significant whitespace.\"\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "text2 = \"Princess Arya Stark is the third child and second daughter of Lord Eddard Stark and his wife, Lady Catelyn Stark. She is the sister of the incumbent Westerosi monarchs, Sansa, Queen in the North, and Brandon, King of the Andals and the First Men. After narrowly escaping the persecution of House Stark by House Lannister, Arya is trained as a Faceless Man at the House of Black and White in Braavos, using her abilities to avenge her family. Upon her return to Westeros, she exacts retribution for the Red Wedding by exterminating the Frey male line.\"\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "text3 = \"Dry Cleaning are an English post-punk band who formed in South London in 2018.[3] The band is composed of vocalist Florence Shaw, guitarist Tom Dowse, bassist Lewis Maynard and drummer Nick Buxton. They are noted for their use of spoken word primarily in lieu of sung vocals, as well as their unconventional lyrics. Their musical stylings have been compared to Wire, Magazine and Joy Division.[4] The band released their debut single, 'Magic of Meghan' in 2019. Shaw wrote the song after going through a break-up and moving out of her former partner's apartment the same day that Meghan Markle and Prince Harry announced they were engaged.[5] This was followed by the release of two EPs that year: Sweet Princess in August and Boundary Road Snacks and Drinks in October. The band were included as part of the NME 100 of 2020,[6] as well as DIY magazine's Class of 2020.[7] The band signed to 4AD in late 2020 and shared a new single, 'Scratchcard Lanyard'.[8] In February 2021, the band shared details of their debut studio album, New Long Leg. They also shared the single 'Strong Feelings'.[9] The album, which was produced by John Parish, was released on 2 April 2021.[10]\"\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
									
										
										
										
											2021-10-13 14:23:23 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "docs = [{\"content\": text1},\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "        {\"content\": text2},\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "        {\"content\": text3}]\n",
							 
						 
					
						
							
								
									
										
										
										
											2021-07-26 17:20:43 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "# Initialize document store and write in the documents\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "document_store = ElasticsearchDocumentStore()\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "document_store.write_documents(docs)\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "# Initialize Question Generator\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "question_generator = QuestionGenerator()"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   ],
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "metadata": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "collapsed": false,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "pycharm": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								     "name": "#%%\n"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  },
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "cell_type": "markdown",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "source": [
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "## Question Generation Pipeline\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "The most basic version of a question generator pipeline takes a document as input and outputs generated questions\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "which the the document can answer."
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   ],
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "metadata": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "collapsed": false,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "pycharm": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								     "name": "#%% md\n"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  },
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "cell_type": "code",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "execution_count": null,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "outputs": [],
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "source": [
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "question_generation_pipeline = QuestionGenerationPipeline(question_generator)\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "for document in document_store:\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "        result = question_generation_pipeline.run(documents=[document])\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "        pprint(result)"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   ],
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "metadata": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "collapsed": false,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "pycharm": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								     "name": "#%%\n"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  },
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "cell_type": "markdown",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "source": [
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "## Retriever Question Generation Pipeline\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "This pipeline takes a query as input. It retrieves relevant documents and then generates questions based on these."
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   ],
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "metadata": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "collapsed": false,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "pycharm": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								     "name": "#%% md\n"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  },
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "cell_type": "code",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "execution_count": null,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "outputs": [],
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "source": [
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "retriever = ElasticsearchRetriever(document_store=document_store)\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "rqg_pipeline = RetrieverQuestionGenerationPipeline(retriever, question_generator)\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "result = rqg_pipeline.run(query=\"Arya Stark\")\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "pprint(result)"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   ],
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "metadata": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "collapsed": false,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "pycharm": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								     "name": "#%%\n"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  },
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "cell_type": "markdown",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "source": [
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "## Question Answer Generation Pipeline\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "This pipeline takes a document as input, generates questions on it, and attempts to answer these questions using\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "a Reader model"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   ],
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "metadata": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "collapsed": false,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "pycharm": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								     "name": "#%% md\n"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  },
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "cell_type": "code",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "execution_count": null,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "outputs": [],
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "source": [
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "reader = FARMReader(\"deepset/roberta-base-squad2\")\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "qag_pipeline = QuestionAnswerGenerationPipeline(question_generator, reader)\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "for document in tqdm(document_store):\n",
							 
						 
					
						
							
								
									
										
										
										
											2021-10-14 11:49:35 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "    result = qag_pipeline.run(documents=[document])\n",
							 
						 
					
						
							
								
									
										
										
										
											2021-07-26 17:20:43 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								    "    pprint(result)"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   ],
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "metadata": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "collapsed": false,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "pycharm": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								     "name": "#%%\n"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  },
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "cell_type": "markdown",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "source": [
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "## About us\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "This [Haystack](https://github.com/deepset-ai/haystack/) notebook was made with love by [deepset](https://deepset.ai/) in Berlin, Germany\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "We bring NLP to the industry via open source!\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "Our focus: Industry specific language models & large scale QA systems.\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "Some of our other work:\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "- [German BERT](https://deepset.ai/german-bert)\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "- [GermanQuAD and GermanDPR](https://deepset.ai/germanquad)\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "- [FARM](https://github.com/deepset-ai/FARM)\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "Get in touch:\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "[Twitter](https://twitter.com/deepset_ai) | [LinkedIn](https://www.linkedin.com/company/deepset-ai/) | [Slack](https://haystack.deepset.ai/community/join) | [GitHub Discussions](https://github.com/deepset-ai/haystack/discussions) | [Website](https://deepset.ai)\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "\n",
							 
						 
					
						
							
								
									
										
										
										
											2021-10-21 11:42:32 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    "By the way: [we're hiring!](https://www.deepset.ai/jobs)"
							 
						 
					
						
							
								
									
										
										
										
											2021-07-26 17:20:43 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								   ],
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "metadata": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "collapsed": false
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								 ],
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								 "metadata": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  "kernelspec": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "display_name": "Python 3",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "language": "python",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "name": "python3"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  },
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  "language_info": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "codemirror_mode": {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "name": "ipython",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    "version": 2
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   },
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "file_extension": ".py",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "mimetype": "text/x-python",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "name": "python",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "nbconvert_exporter": "python",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "pygments_lexer": "ipython2",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   "version": "2.7.6"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								 },
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								 "nbformat": 4,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								 "nbformat_minor": 0
							 
						 
					
						
							
								
									
										
										
										
											2021-10-20 09:55:56 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								}