Fix tutorials 4, 7 and 8 (#2526)

* Fix tutorials 4, 7 and 8

* Update Documentation & Code Style

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
This commit is contained in:
bogdankostic 2022-05-11 09:17:05 +02:00 committed by GitHub
parent 4581b91e83
commit 5378a9ab48
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 12 additions and 8 deletions

View File

@ -64,7 +64,7 @@ You can start Elasticsearch on your local machine instance using Docker. If Dock
```python
# Recommended: Start Elasticsearch using Docker via the Haystack utility function
from haystack.utils import launch_es, fetch_archive_from_http
from haystack.utils import launch_es
launch_es()
```
@ -125,6 +125,8 @@ Here: We download some question-answer pairs related to COVID-19
```python
from haystack.utils import fetch_archive_from_http
# Download
doc_dir = "data/tutorial4"
s3_url = "https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/documents/small_faq_covid.csv.zip"

View File

@ -65,7 +65,7 @@ s3_url = "https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/smal
fetch_archive_from_http(url=s3_url, output_dir=doc_dir)
# Create dataframe with columns "title" and "text"
df = pd.read_csv("small_generator_dataset.csv", sep=",")
df = pd.read_csv(f"{doc_dir}/small_generator_dataset.csv", sep=",")
# Minimal cleaning
df.fillna(value="", inplace=True)

View File

@ -38,7 +38,7 @@ This tutorial will show you all the tools that Haystack provides to help you cas
# For Colab/linux based machines
!wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.04.tar.gz
!tar -xvf xpdf-tools-linux-4.03.tar.gz && sudo cp xpdf-tools-linux-4.03/bin64/pdftotext /usr/local/bin
!tar -xvf xpdf-tools-linux-4.04.tar.gz && sudo cp xpdf-tools-linux-4.04/bin64/pdftotext /usr/local/bin
# For Macos machines
# !wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-mac-4.03.tar.gz

View File

@ -99,7 +99,7 @@
"outputs": [],
"source": [
"# Recommended: Start Elasticsearch using Docker via the Haystack utility function\n",
"from haystack.utils import launch_es, fetch_archive_from_http\n",
"from haystack.utils import launch_es\n",
"\n",
"launch_es()"
]
@ -219,6 +219,8 @@
},
"outputs": [],
"source": [
"from haystack.utils import fetch_archive_from_http\n",
"\n",
"# Download\n",
"doc_dir = \"data/tutorial4\"\n",
"s3_url = \"https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/documents/small_faq_covid.csv.zip\"\n",

View File

@ -121,7 +121,7 @@
"fetch_archive_from_http(url=s3_url, output_dir=doc_dir)\n",
"\n",
"# Create dataframe with columns \"title\" and \"text\"\n",
"df = pd.read_csv(\"small_generator_dataset.csv\", sep=\",\")\n",
"df = pd.read_csv(f\"{doc_dir}/small_generator_dataset.csv\", sep=\",\")\n",
"# Minimal cleaning\n",
"df.fillna(value=\"\", inplace=True)\n",
"\n",

View File

@ -16,7 +16,7 @@ def tutorial7_rag_generator():
fetch_archive_from_http(url=s3_url, output_dir=doc_dir)
# Get dataframe with columns "title", and "text"
df = pd.read_csv("small_generator_dataset.csv", sep=",")
df = pd.read_csv(f"{doc_dir}/small_generator_dataset.csv", sep=",")
# Minimal cleaning
df.fillna(value="", inplace=True)

View File

@ -67,7 +67,7 @@
"\n",
"# For Colab/linux based machines\n",
"!wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.04.tar.gz\n",
"!tar -xvf xpdf-tools-linux-4.03.tar.gz && sudo cp xpdf-tools-linux-4.03/bin64/pdftotext /usr/local/bin\n",
"!tar -xvf xpdf-tools-linux-4.04.tar.gz && sudo cp xpdf-tools-linux-4.04/bin64/pdftotext /usr/local/bin\n",
"\n",
"# For Macos machines\n",
"# !wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-mac-4.03.tar.gz\n",
@ -513,4 +513,4 @@
},
"nbformat": 4,
"nbformat_minor": 2
}
}