Fix timeout issue related to spam data backup url (#544)

* Add backup url for Spam Dataset

* import urllib

* fix url

* fix timeout issue
This commit is contained in:
Sebastian Raschka 2025-02-20 09:26:23 -06:00 committed by GitHub
parent c39aa32ef5
commit d1e99f6092
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 10 additions and 9 deletions

View File

@ -207,9 +207,8 @@
"\n",
"try:\n",
" download_and_unzip_spam_data(url, zip_path, extracted_path, data_file_path)\n",
"except urllib.error.HTTPError:\n",
" print(\"UCI Machine Learning Repository (https://archive.ics.uci.edu)\"\n",
" \" temporary unavailable. Using backup URL.\")\n",
"except (urllib.error.HTTPError, urllib.error.URLError, TimeoutError) as e:\n",
" print(f\"Primary URL failed: {e}. Trying backup URL...\")\n",
" url = \"https://f001.backblazeb2.com/file/LLMs-from-scratch/sms%2Bspam%2Bcollection.zip\"\n",
" download_and_unzip_spam_data(url, zip_path, extracted_path, data_file_path)\n",
"\n",

View File

@ -217,9 +217,8 @@
"\n",
"try:\n",
" download_and_unzip_spam_data(url, zip_path, extracted_path, data_file_path)\n",
"except urllib.error.HTTPError:\n",
" print(\"UCI Machine Learning Repository (https://archive.ics.uci.edu)\"\n",
" \" temporary unavailable. Using backup URL.\")\n",
"except (urllib.error.HTTPError, urllib.error.URLError, TimeoutError) as e:\n",
" print(f\"Primary URL failed: {e}. Trying backup URL...\")\n",
" url = \"https://f001.backblazeb2.com/file/LLMs-from-scratch/sms%2Bspam%2Bcollection.zip\"\n",
" download_and_unzip_spam_data(url, zip_path, extracted_path, data_file_path) "
]

View File

@ -280,7 +280,8 @@ if __name__ == "__main__":
download_and_unzip_spam_data(
url, zip_path, extracted_path, data_file_path, test_mode=args.test_mode
)
except urllib.error.HTTPError:
except (urllib.error.HTTPError, urllib.error.URLError, TimeoutError) as e:
print(f"Primary URL failed: {e}. Trying backup URL...")
backup_url = "https://f001.backblazeb2.com/file/LLMs-from-scratch/sms%2Bspam%2Bcollection.zip"
download_and_unzip_spam_data(
backup_url, zip_path, extracted_path, data_file_path, test_mode=args.test_mode

View File

@ -605,7 +605,8 @@ if __name__ == "__main__":
if not all_exist:
try:
download_and_unzip(url, zip_path, extract_to, new_file_path)
except urllib.error.HTTPError:
except (urllib.error.HTTPError, urllib.error.URLError, TimeoutError) as e:
print(f"Primary URL failed: {e}. Trying backup URL...")
backup_url = "https://f001.backblazeb2.com/file/LLMs-from-scratch/sms%2Bspam%2Bcollection.zip"
download_and_unzip(backup_url, zip_path, extract_to, new_file_path)
create_dataset_csvs(new_file_path)

View File

@ -412,7 +412,8 @@ if __name__ == "__main__":
if not all_exist:
try:
download_and_unzip(url, zip_path, extract_to, new_file_path)
except urllib.error.HTTPError:
except (urllib.error.HTTPError, urllib.error.URLError, TimeoutError) as e:
print(f"Primary URL failed: {e}. Trying backup URL...")
backup_url = "https://f001.backblazeb2.com/file/LLMs-from-scratch/sms%2Bspam%2Bcollection.zip"
download_and_unzip(backup_url, zip_path, extract_to, new_file_path)
create_dataset_csvs(new_file_path)