mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-08-29 10:56:40 +00:00
Update Tutorial 6
This commit is contained in:
parent
8a9f97fad3
commit
c36f8c991e
@ -65,37 +65,9 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Fri Jul 3 09:43:18 2020 \r\n",
|
||||
"+-----------------------------------------------------------------------------+\r\n",
|
||||
"| NVIDIA-SMI 435.21 Driver Version: 435.21 CUDA Version: 10.1 |\r\n",
|
||||
"|-------------------------------+----------------------+----------------------+\r\n",
|
||||
"| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\r\n",
|
||||
"| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\r\n",
|
||||
"|===============================+======================+======================|\r\n",
|
||||
"| 0 GeForce 940MX Off | 00000000:02:00.0 Off | N/A |\r\n",
|
||||
"| N/A 41C P0 N/A / N/A | 567MiB / 2004MiB | 4% Default |\r\n",
|
||||
"+-------------------------------+----------------------+----------------------+\r\n",
|
||||
" \r\n",
|
||||
"+-----------------------------------------------------------------------------+\r\n",
|
||||
"| Processes: GPU Memory |\r\n",
|
||||
"| GPU PID Type Process name Usage |\r\n",
|
||||
"|=============================================================================|\r\n",
|
||||
"| 0 1507 G /usr/lib/xorg/Xorg 212MiB |\r\n",
|
||||
"| 0 1735 G /usr/bin/gnome-shell 85MiB |\r\n",
|
||||
"| 0 3310 G ...uest-channel-token=10103706267471532991 48MiB |\r\n",
|
||||
"| 0 3507 G ...AAAAAAAAAAAACAAAAAAAAAA= --shared-files 195MiB |\r\n",
|
||||
"| 0 22962 G ...p/pycharm-professional/201/jbr/bin/java 22MiB |\r\n",
|
||||
"+-----------------------------------------------------------------------------+\r\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Make sure you have a GPU running\n",
|
||||
"!nvidia-smi"
|
||||
@ -107,7 +79,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"! pip install git+git://github.com/deepset-ai/haystack.git@07ecfb60b944d9682f6d50317a15ffe5501ae456"
|
||||
"! pip install git+git://github.com/deepset-ai/haystack.git@8a9f97fad37241b0101c4561d10a49f2fbc6ee52"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -153,9 +125,9 @@
|
||||
],
|
||||
"source": [
|
||||
"# Recommended: Start Elasticsearch using Docker\n",
|
||||
"! docker run -d -p 9200:9200 -e \"discovery.type=single-node\" elasticsearch:7.6.2\n",
|
||||
"#! docker run -d -p 9200:9200 -e \"discovery.type=single-node\" elasticsearch:7.6.2\n",
|
||||
"# wait until ES has started\n",
|
||||
"! sleep 30"
|
||||
"#! sleep 30"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -165,18 +137,18 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# In Colab / No Docker environments: Start Elasticsearch from source\n",
|
||||
"#! wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.6.2-linux-x86_64.tar.gz -q\n",
|
||||
"#! tar -xzf elasticsearch-7.6.2-linux-x86_64.tar.gz\n",
|
||||
"#! chown -R daemon:daemon elasticsearch-7.6.2\n",
|
||||
"! wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.6.2-linux-x86_64.tar.gz -q\n",
|
||||
"! tar -xzf elasticsearch-7.6.2-linux-x86_64.tar.gz\n",
|
||||
"! chown -R daemon:daemon elasticsearch-7.6.2\n",
|
||||
"\n",
|
||||
"#import os\n",
|
||||
"#from subprocess import Popen, PIPE, STDOUT\n",
|
||||
"#es_server = Popen(['elasticsearch-7.6.2/bin/elasticsearch'],\n",
|
||||
"# stdout=PIPE, stderr=STDOUT,\n",
|
||||
"# preexec_fn=lambda: os.setuid(1) # as daemon\n",
|
||||
"# )\n",
|
||||
"import os\n",
|
||||
"from subprocess import Popen, PIPE, STDOUT\n",
|
||||
"es_server = Popen(['elasticsearch-7.6.2/bin/elasticsearch'],\n",
|
||||
" stdout=PIPE, stderr=STDOUT,\n",
|
||||
" preexec_fn=lambda: os.setuid(1) # as daemon\n",
|
||||
" )\n",
|
||||
"# wait until ES has started\n",
|
||||
"#! sleep 30"
|
||||
"! sleep 30"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -340,7 +312,7 @@
|
||||
" do_lower_case=True, gpu=True)\n",
|
||||
"\n",
|
||||
"# Important: \n",
|
||||
"# Now that we have the DPR initialized, we need to call update_embeddings() to iterate over all \n",
|
||||
"# Now that after we have the DPR initialized, we need to call update_embeddings() to iterate over all\n",
|
||||
"# previously indexed documents and update their embedding representation. \n",
|
||||
"# While this can be a time consuming operation (depending on corpus size), it only needs to be done once. \n",
|
||||
"# At query time, we only need to embed the query and compare it the existing doc embeddings which is very fast.\n",
|
||||
@ -396,7 +368,7 @@
|
||||
"# Load a local model or any of the QA models on\n",
|
||||
"# Hugging Face's model hub (https://huggingface.co/models)\n",
|
||||
"\n",
|
||||
"reader = FARMReader(model_name_or_path=\"deepset/roberta-base-squad2\", use_gpu=False)"
|
||||
"reader = FARMReader(model_name_or_path=\"deepset/roberta-base-squad2\", use_gpu=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -459,15 +431,8 @@
|
||||
"source": [
|
||||
"# You can configure how many candidates the reader and retriever shall return\n",
|
||||
"# The higher top_k_retriever, the better (but also the slower) your answers. \n",
|
||||
"prediction = finder.get_answers(question=\"Who created the Dothraki vocabulary?\", top_k_retriever=10, top_k_reader=5)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"prediction = finder.get_answers(question=\"Who created the Dothraki vocabulary?\", top_k_retriever=10, top_k_reader=5)\n",
|
||||
"\n",
|
||||
"#prediction = finder.get_answers(question=\"Who is the father of Arya Stark?\", top_k_retriever=10, top_k_reader=5)\n",
|
||||
"#prediction = finder.get_answers(question=\"Who is the sister of Sansa?\", top_k_retriever=10, top_k_reader=5)"
|
||||
]
|
||||
@ -516,13 +481,6 @@
|
||||
"source": [
|
||||
"print_answers(prediction, details=\"minimal\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@ -546,4 +504,4 @@
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
}
|
@ -48,6 +48,11 @@ document_store.write_documents(dicts[:16])
|
||||
### Retriever
|
||||
retriever = DensePassageRetriever(document_store=document_store, embedding_model="dpr-bert-base-nq",
|
||||
do_lower_case=True, gpu=True)
|
||||
# Important:
|
||||
# Now that after we have the DPR initialized, we need to call update_embeddings() to iterate over all
|
||||
# previously indexed documents and update their embedding representation.
|
||||
# While this can be a time consuming operation (depending on corpus size), it only needs to be done once.
|
||||
# At query time, we only need to embed the query and compare it the existing doc embeddings which is very fast.
|
||||
document_store.update_embeddings(retriever)
|
||||
|
||||
### Reader
|
||||
|
Loading…
x
Reference in New Issue
Block a user