mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-12-28 15:38:36 +00:00
Fix tutorials 4, 7 and 8 (#2526)
* Fix tutorials 4, 7 and 8 * Update Documentation & Code Style Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
This commit is contained in:
parent
4581b91e83
commit
5378a9ab48
@ -64,7 +64,7 @@ You can start Elasticsearch on your local machine instance using Docker. If Dock
|
||||
|
||||
```python
|
||||
# Recommended: Start Elasticsearch using Docker via the Haystack utility function
|
||||
from haystack.utils import launch_es, fetch_archive_from_http
|
||||
from haystack.utils import launch_es
|
||||
|
||||
launch_es()
|
||||
```
|
||||
@ -125,6 +125,8 @@ Here: We download some question-answer pairs related to COVID-19
|
||||
|
||||
|
||||
```python
|
||||
from haystack.utils import fetch_archive_from_http
|
||||
|
||||
# Download
|
||||
doc_dir = "data/tutorial4"
|
||||
s3_url = "https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/documents/small_faq_covid.csv.zip"
|
||||
|
||||
@ -65,7 +65,7 @@ s3_url = "https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/smal
|
||||
fetch_archive_from_http(url=s3_url, output_dir=doc_dir)
|
||||
|
||||
# Create dataframe with columns "title" and "text"
|
||||
df = pd.read_csv("small_generator_dataset.csv", sep=",")
|
||||
df = pd.read_csv(f"{doc_dir}/small_generator_dataset.csv", sep=",")
|
||||
# Minimal cleaning
|
||||
df.fillna(value="", inplace=True)
|
||||
|
||||
|
||||
@ -38,7 +38,7 @@ This tutorial will show you all the tools that Haystack provides to help you cas
|
||||
|
||||
# For Colab/linux based machines
|
||||
!wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.04.tar.gz
|
||||
!tar -xvf xpdf-tools-linux-4.03.tar.gz && sudo cp xpdf-tools-linux-4.03/bin64/pdftotext /usr/local/bin
|
||||
!tar -xvf xpdf-tools-linux-4.04.tar.gz && sudo cp xpdf-tools-linux-4.04/bin64/pdftotext /usr/local/bin
|
||||
|
||||
# For Macos machines
|
||||
# !wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-mac-4.03.tar.gz
|
||||
|
||||
@ -99,7 +99,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Recommended: Start Elasticsearch using Docker via the Haystack utility function\n",
|
||||
"from haystack.utils import launch_es, fetch_archive_from_http\n",
|
||||
"from haystack.utils import launch_es\n",
|
||||
"\n",
|
||||
"launch_es()"
|
||||
]
|
||||
@ -219,6 +219,8 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from haystack.utils import fetch_archive_from_http\n",
|
||||
"\n",
|
||||
"# Download\n",
|
||||
"doc_dir = \"data/tutorial4\"\n",
|
||||
"s3_url = \"https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/documents/small_faq_covid.csv.zip\"\n",
|
||||
|
||||
@ -121,7 +121,7 @@
|
||||
"fetch_archive_from_http(url=s3_url, output_dir=doc_dir)\n",
|
||||
"\n",
|
||||
"# Create dataframe with columns \"title\" and \"text\"\n",
|
||||
"df = pd.read_csv(\"small_generator_dataset.csv\", sep=\",\")\n",
|
||||
"df = pd.read_csv(f\"{doc_dir}/small_generator_dataset.csv\", sep=\",\")\n",
|
||||
"# Minimal cleaning\n",
|
||||
"df.fillna(value=\"\", inplace=True)\n",
|
||||
"\n",
|
||||
|
||||
@ -16,7 +16,7 @@ def tutorial7_rag_generator():
|
||||
fetch_archive_from_http(url=s3_url, output_dir=doc_dir)
|
||||
|
||||
# Get dataframe with columns "title", and "text"
|
||||
df = pd.read_csv("small_generator_dataset.csv", sep=",")
|
||||
df = pd.read_csv(f"{doc_dir}/small_generator_dataset.csv", sep=",")
|
||||
# Minimal cleaning
|
||||
df.fillna(value="", inplace=True)
|
||||
|
||||
|
||||
@ -67,7 +67,7 @@
|
||||
"\n",
|
||||
"# For Colab/linux based machines\n",
|
||||
"!wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.04.tar.gz\n",
|
||||
"!tar -xvf xpdf-tools-linux-4.03.tar.gz && sudo cp xpdf-tools-linux-4.03/bin64/pdftotext /usr/local/bin\n",
|
||||
"!tar -xvf xpdf-tools-linux-4.04.tar.gz && sudo cp xpdf-tools-linux-4.04/bin64/pdftotext /usr/local/bin\n",
|
||||
"\n",
|
||||
"# For Macos machines\n",
|
||||
"# !wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-mac-4.03.tar.gz\n",
|
||||
@ -513,4 +513,4 @@
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user