mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-08-29 19:06:21 +00:00
Update Tutorial 6
This commit is contained in:
parent
8a9f97fad3
commit
c36f8c991e
@ -65,37 +65,9 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 2,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [],
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Fri Jul 3 09:43:18 2020 \r\n",
|
|
||||||
"+-----------------------------------------------------------------------------+\r\n",
|
|
||||||
"| NVIDIA-SMI 435.21 Driver Version: 435.21 CUDA Version: 10.1 |\r\n",
|
|
||||||
"|-------------------------------+----------------------+----------------------+\r\n",
|
|
||||||
"| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\r\n",
|
|
||||||
"| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\r\n",
|
|
||||||
"|===============================+======================+======================|\r\n",
|
|
||||||
"| 0 GeForce 940MX Off | 00000000:02:00.0 Off | N/A |\r\n",
|
|
||||||
"| N/A 41C P0 N/A / N/A | 567MiB / 2004MiB | 4% Default |\r\n",
|
|
||||||
"+-------------------------------+----------------------+----------------------+\r\n",
|
|
||||||
" \r\n",
|
|
||||||
"+-----------------------------------------------------------------------------+\r\n",
|
|
||||||
"| Processes: GPU Memory |\r\n",
|
|
||||||
"| GPU PID Type Process name Usage |\r\n",
|
|
||||||
"|=============================================================================|\r\n",
|
|
||||||
"| 0 1507 G /usr/lib/xorg/Xorg 212MiB |\r\n",
|
|
||||||
"| 0 1735 G /usr/bin/gnome-shell 85MiB |\r\n",
|
|
||||||
"| 0 3310 G ...uest-channel-token=10103706267471532991 48MiB |\r\n",
|
|
||||||
"| 0 3507 G ...AAAAAAAAAAAACAAAAAAAAAA= --shared-files 195MiB |\r\n",
|
|
||||||
"| 0 22962 G ...p/pycharm-professional/201/jbr/bin/java 22MiB |\r\n",
|
|
||||||
"+-----------------------------------------------------------------------------+\r\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
"source": [
|
||||||
"# Make sure you have a GPU running\n",
|
"# Make sure you have a GPU running\n",
|
||||||
"!nvidia-smi"
|
"!nvidia-smi"
|
||||||
@ -107,7 +79,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"! pip install git+git://github.com/deepset-ai/haystack.git@07ecfb60b944d9682f6d50317a15ffe5501ae456"
|
"! pip install git+git://github.com/deepset-ai/haystack.git@8a9f97fad37241b0101c4561d10a49f2fbc6ee52"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -153,9 +125,9 @@
|
|||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"# Recommended: Start Elasticsearch using Docker\n",
|
"# Recommended: Start Elasticsearch using Docker\n",
|
||||||
"! docker run -d -p 9200:9200 -e \"discovery.type=single-node\" elasticsearch:7.6.2\n",
|
"#! docker run -d -p 9200:9200 -e \"discovery.type=single-node\" elasticsearch:7.6.2\n",
|
||||||
"# wait until ES has started\n",
|
"# wait until ES has started\n",
|
||||||
"! sleep 30"
|
"#! sleep 30"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -165,18 +137,18 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# In Colab / No Docker environments: Start Elasticsearch from source\n",
|
"# In Colab / No Docker environments: Start Elasticsearch from source\n",
|
||||||
"#! wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.6.2-linux-x86_64.tar.gz -q\n",
|
"! wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.6.2-linux-x86_64.tar.gz -q\n",
|
||||||
"#! tar -xzf elasticsearch-7.6.2-linux-x86_64.tar.gz\n",
|
"! tar -xzf elasticsearch-7.6.2-linux-x86_64.tar.gz\n",
|
||||||
"#! chown -R daemon:daemon elasticsearch-7.6.2\n",
|
"! chown -R daemon:daemon elasticsearch-7.6.2\n",
|
||||||
"\n",
|
"\n",
|
||||||
"#import os\n",
|
"import os\n",
|
||||||
"#from subprocess import Popen, PIPE, STDOUT\n",
|
"from subprocess import Popen, PIPE, STDOUT\n",
|
||||||
"#es_server = Popen(['elasticsearch-7.6.2/bin/elasticsearch'],\n",
|
"es_server = Popen(['elasticsearch-7.6.2/bin/elasticsearch'],\n",
|
||||||
"# stdout=PIPE, stderr=STDOUT,\n",
|
" stdout=PIPE, stderr=STDOUT,\n",
|
||||||
"# preexec_fn=lambda: os.setuid(1) # as daemon\n",
|
" preexec_fn=lambda: os.setuid(1) # as daemon\n",
|
||||||
"# )\n",
|
" )\n",
|
||||||
"# wait until ES has started\n",
|
"# wait until ES has started\n",
|
||||||
"#! sleep 30"
|
"! sleep 30"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -340,7 +312,7 @@
|
|||||||
" do_lower_case=True, gpu=True)\n",
|
" do_lower_case=True, gpu=True)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Important: \n",
|
"# Important: \n",
|
||||||
"# Now that we have the DPR initialized, we need to call update_embeddings() to iterate over all \n",
|
"# Now that after we have the DPR initialized, we need to call update_embeddings() to iterate over all\n",
|
||||||
"# previously indexed documents and update their embedding representation. \n",
|
"# previously indexed documents and update their embedding representation. \n",
|
||||||
"# While this can be a time consuming operation (depending on corpus size), it only needs to be done once. \n",
|
"# While this can be a time consuming operation (depending on corpus size), it only needs to be done once. \n",
|
||||||
"# At query time, we only need to embed the query and compare it the existing doc embeddings which is very fast.\n",
|
"# At query time, we only need to embed the query and compare it the existing doc embeddings which is very fast.\n",
|
||||||
@ -396,7 +368,7 @@
|
|||||||
"# Load a local model or any of the QA models on\n",
|
"# Load a local model or any of the QA models on\n",
|
||||||
"# Hugging Face's model hub (https://huggingface.co/models)\n",
|
"# Hugging Face's model hub (https://huggingface.co/models)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"reader = FARMReader(model_name_or_path=\"deepset/roberta-base-squad2\", use_gpu=False)"
|
"reader = FARMReader(model_name_or_path=\"deepset/roberta-base-squad2\", use_gpu=True)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -459,15 +431,8 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# You can configure how many candidates the reader and retriever shall return\n",
|
"# You can configure how many candidates the reader and retriever shall return\n",
|
||||||
"# The higher top_k_retriever, the better (but also the slower) your answers. \n",
|
"# The higher top_k_retriever, the better (but also the slower) your answers. \n",
|
||||||
"prediction = finder.get_answers(question=\"Who created the Dothraki vocabulary?\", top_k_retriever=10, top_k_reader=5)"
|
"prediction = finder.get_answers(question=\"Who created the Dothraki vocabulary?\", top_k_retriever=10, top_k_reader=5)\n",
|
||||||
]
|
"\n",
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 26,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"#prediction = finder.get_answers(question=\"Who is the father of Arya Stark?\", top_k_retriever=10, top_k_reader=5)\n",
|
"#prediction = finder.get_answers(question=\"Who is the father of Arya Stark?\", top_k_retriever=10, top_k_reader=5)\n",
|
||||||
"#prediction = finder.get_answers(question=\"Who is the sister of Sansa?\", top_k_retriever=10, top_k_reader=5)"
|
"#prediction = finder.get_answers(question=\"Who is the sister of Sansa?\", top_k_retriever=10, top_k_reader=5)"
|
||||||
]
|
]
|
||||||
@ -516,13 +481,6 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"print_answers(prediction, details=\"minimal\")"
|
"print_answers(prediction, details=\"minimal\")"
|
||||||
]
|
]
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": []
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
@ -48,6 +48,11 @@ document_store.write_documents(dicts[:16])
|
|||||||
### Retriever
|
### Retriever
|
||||||
retriever = DensePassageRetriever(document_store=document_store, embedding_model="dpr-bert-base-nq",
|
retriever = DensePassageRetriever(document_store=document_store, embedding_model="dpr-bert-base-nq",
|
||||||
do_lower_case=True, gpu=True)
|
do_lower_case=True, gpu=True)
|
||||||
|
# Important:
|
||||||
|
# Now that after we have the DPR initialized, we need to call update_embeddings() to iterate over all
|
||||||
|
# previously indexed documents and update their embedding representation.
|
||||||
|
# While this can be a time consuming operation (depending on corpus size), it only needs to be done once.
|
||||||
|
# At query time, we only need to embed the query and compare it the existing doc embeddings which is very fast.
|
||||||
document_store.update_embeddings(retriever)
|
document_store.update_embeddings(retriever)
|
||||||
|
|
||||||
### Reader
|
### Reader
|
||||||
|
Loading…
x
Reference in New Issue
Block a user