diff --git a/appendix-E/01_main-chapter-code/gpt_download.py b/appendix-E/01_main-chapter-code/gpt_download.py index 0d695d2..aa0ea1e 100644 --- a/appendix-E/01_main-chapter-code/gpt_download.py +++ b/appendix-E/01_main-chapter-code/gpt_download.py @@ -5,7 +5,9 @@ import os -import requests +import urllib.request + +# import requests import json import numpy as np import tensorflow as tf @@ -42,6 +44,46 @@ def download_and_load_gpt2(model_size, models_dir): return settings, params +def download_file(url, destination): + # Send a GET request to download the file + + try: + with urllib.request.urlopen(url) as response: + # Get the total file size from headers, defaulting to 0 if not present + file_size = int(response.headers.get("Content-Length", 0)) + + # Check if file exists and has the same size + if os.path.exists(destination): + file_size_local = os.path.getsize(destination) + if file_size == file_size_local: + print(f"File already exists and is up-to-date: {destination}") + return + + # Define the block size for reading the file + block_size = 1024 # 1 Kilobyte + + # Initialize the progress bar with total file size + progress_bar_description = os.path.basename(url) # Extract filename from URL + with tqdm(total=file_size, unit="iB", unit_scale=True, desc=progress_bar_description) as progress_bar: + # Open the destination file in binary write mode + with open(destination, "wb") as file: + # Read the file in chunks and write to destination + while True: + chunk = response.read(block_size) + if not chunk: + break + file.write(chunk) + progress_bar.update(len(chunk)) # Update progress bar + except urllib.error.HTTPError: + s = ( + f"The specified URL ({url}) is incorrect, the internet connection cannot be established," + "\nor the requested file is temporarily unavailable.\nPlease visit the following website" + " for help: https://github.com/rasbt/LLMs-from-scratch/discussions/273") + print(s) + + +# Alternative way using `requests` +""" def download_file(url, destination): # Send a GET request to download the file in streaming mode response = requests.get(url, stream=True) @@ -68,6 +110,7 @@ def download_file(url, destination): for chunk in response.iter_content(block_size): progress_bar.update(len(chunk)) # Update progress bar file.write(chunk) # Write the chunk to the file +""" def load_gpt2_params_from_tf_ckpt(ckpt_path, settings): diff --git a/ch05/01_main-chapter-code/gpt_download.py b/ch05/01_main-chapter-code/gpt_download.py index 3ad6778..aa0ea1e 100644 --- a/ch05/01_main-chapter-code/gpt_download.py +++ b/ch05/01_main-chapter-code/gpt_download.py @@ -44,6 +44,45 @@ def download_and_load_gpt2(model_size, models_dir): return settings, params +def download_file(url, destination): + # Send a GET request to download the file + + try: + with urllib.request.urlopen(url) as response: + # Get the total file size from headers, defaulting to 0 if not present + file_size = int(response.headers.get("Content-Length", 0)) + + # Check if file exists and has the same size + if os.path.exists(destination): + file_size_local = os.path.getsize(destination) + if file_size == file_size_local: + print(f"File already exists and is up-to-date: {destination}") + return + + # Define the block size for reading the file + block_size = 1024 # 1 Kilobyte + + # Initialize the progress bar with total file size + progress_bar_description = os.path.basename(url) # Extract filename from URL + with tqdm(total=file_size, unit="iB", unit_scale=True, desc=progress_bar_description) as progress_bar: + # Open the destination file in binary write mode + with open(destination, "wb") as file: + # Read the file in chunks and write to destination + while True: + chunk = response.read(block_size) + if not chunk: + break + file.write(chunk) + progress_bar.update(len(chunk)) # Update progress bar + except urllib.error.HTTPError: + s = ( + f"The specified URL ({url}) is incorrect, the internet connection cannot be established," + "\nor the requested file is temporarily unavailable.\nPlease visit the following website" + " for help: https://github.com/rasbt/LLMs-from-scratch/discussions/273") + print(s) + + +# Alternative way using `requests` """ def download_file(url, destination): # Send a GET request to download the file in streaming mode @@ -74,36 +113,6 @@ def download_file(url, destination): """ -def download_file(url, destination): - # Send a GET request to download the file - with urllib.request.urlopen(url) as response: - # Get the total file size from headers, defaulting to 0 if not present - file_size = int(response.headers.get("Content-Length", 0)) - - # Check if file exists and has the same size - if os.path.exists(destination): - file_size_local = os.path.getsize(destination) - if file_size == file_size_local: - print(f"File already exists and is up-to-date: {destination}") - return - - # Define the block size for reading the file - block_size = 1024 # 1 Kilobyte - - # Initialize the progress bar with total file size - progress_bar_description = os.path.basename(url) # Extract filename from URL - with tqdm(total=file_size, unit="iB", unit_scale=True, desc=progress_bar_description) as progress_bar: - # Open the destination file in binary write mode - with open(destination, "wb") as file: - # Read the file in chunks and write to destination - while True: - chunk = response.read(block_size) - if not chunk: - break - file.write(chunk) - progress_bar.update(len(chunk)) # Update progress bar - - def load_gpt2_params_from_tf_ckpt(ckpt_path, settings): # Initialize parameters dictionary with empty blocks for each layer params = {"blocks": [{} for _ in range(settings["n_layer"])]} diff --git a/ch06/01_main-chapter-code/gpt_download.py b/ch06/01_main-chapter-code/gpt_download.py index 0d695d2..aa0ea1e 100644 --- a/ch06/01_main-chapter-code/gpt_download.py +++ b/ch06/01_main-chapter-code/gpt_download.py @@ -5,7 +5,9 @@ import os -import requests +import urllib.request + +# import requests import json import numpy as np import tensorflow as tf @@ -42,6 +44,46 @@ def download_and_load_gpt2(model_size, models_dir): return settings, params +def download_file(url, destination): + # Send a GET request to download the file + + try: + with urllib.request.urlopen(url) as response: + # Get the total file size from headers, defaulting to 0 if not present + file_size = int(response.headers.get("Content-Length", 0)) + + # Check if file exists and has the same size + if os.path.exists(destination): + file_size_local = os.path.getsize(destination) + if file_size == file_size_local: + print(f"File already exists and is up-to-date: {destination}") + return + + # Define the block size for reading the file + block_size = 1024 # 1 Kilobyte + + # Initialize the progress bar with total file size + progress_bar_description = os.path.basename(url) # Extract filename from URL + with tqdm(total=file_size, unit="iB", unit_scale=True, desc=progress_bar_description) as progress_bar: + # Open the destination file in binary write mode + with open(destination, "wb") as file: + # Read the file in chunks and write to destination + while True: + chunk = response.read(block_size) + if not chunk: + break + file.write(chunk) + progress_bar.update(len(chunk)) # Update progress bar + except urllib.error.HTTPError: + s = ( + f"The specified URL ({url}) is incorrect, the internet connection cannot be established," + "\nor the requested file is temporarily unavailable.\nPlease visit the following website" + " for help: https://github.com/rasbt/LLMs-from-scratch/discussions/273") + print(s) + + +# Alternative way using `requests` +""" def download_file(url, destination): # Send a GET request to download the file in streaming mode response = requests.get(url, stream=True) @@ -68,6 +110,7 @@ def download_file(url, destination): for chunk in response.iter_content(block_size): progress_bar.update(len(chunk)) # Update progress bar file.write(chunk) # Write the chunk to the file +""" def load_gpt2_params_from_tf_ckpt(ckpt_path, settings): diff --git a/ch06/02_bonus_additional-experiments/gpt_download.py b/ch06/02_bonus_additional-experiments/gpt_download.py index 0d695d2..aa0ea1e 100644 --- a/ch06/02_bonus_additional-experiments/gpt_download.py +++ b/ch06/02_bonus_additional-experiments/gpt_download.py @@ -5,7 +5,9 @@ import os -import requests +import urllib.request + +# import requests import json import numpy as np import tensorflow as tf @@ -42,6 +44,46 @@ def download_and_load_gpt2(model_size, models_dir): return settings, params +def download_file(url, destination): + # Send a GET request to download the file + + try: + with urllib.request.urlopen(url) as response: + # Get the total file size from headers, defaulting to 0 if not present + file_size = int(response.headers.get("Content-Length", 0)) + + # Check if file exists and has the same size + if os.path.exists(destination): + file_size_local = os.path.getsize(destination) + if file_size == file_size_local: + print(f"File already exists and is up-to-date: {destination}") + return + + # Define the block size for reading the file + block_size = 1024 # 1 Kilobyte + + # Initialize the progress bar with total file size + progress_bar_description = os.path.basename(url) # Extract filename from URL + with tqdm(total=file_size, unit="iB", unit_scale=True, desc=progress_bar_description) as progress_bar: + # Open the destination file in binary write mode + with open(destination, "wb") as file: + # Read the file in chunks and write to destination + while True: + chunk = response.read(block_size) + if not chunk: + break + file.write(chunk) + progress_bar.update(len(chunk)) # Update progress bar + except urllib.error.HTTPError: + s = ( + f"The specified URL ({url}) is incorrect, the internet connection cannot be established," + "\nor the requested file is temporarily unavailable.\nPlease visit the following website" + " for help: https://github.com/rasbt/LLMs-from-scratch/discussions/273") + print(s) + + +# Alternative way using `requests` +""" def download_file(url, destination): # Send a GET request to download the file in streaming mode response = requests.get(url, stream=True) @@ -68,6 +110,7 @@ def download_file(url, destination): for chunk in response.iter_content(block_size): progress_bar.update(len(chunk)) # Update progress bar file.write(chunk) # Write the chunk to the file +""" def load_gpt2_params_from_tf_ckpt(ckpt_path, settings): diff --git a/ch06/03_bonus_imdb-classification/gpt_download.py b/ch06/03_bonus_imdb-classification/gpt_download.py index 0d695d2..aa0ea1e 100644 --- a/ch06/03_bonus_imdb-classification/gpt_download.py +++ b/ch06/03_bonus_imdb-classification/gpt_download.py @@ -5,7 +5,9 @@ import os -import requests +import urllib.request + +# import requests import json import numpy as np import tensorflow as tf @@ -42,6 +44,46 @@ def download_and_load_gpt2(model_size, models_dir): return settings, params +def download_file(url, destination): + # Send a GET request to download the file + + try: + with urllib.request.urlopen(url) as response: + # Get the total file size from headers, defaulting to 0 if not present + file_size = int(response.headers.get("Content-Length", 0)) + + # Check if file exists and has the same size + if os.path.exists(destination): + file_size_local = os.path.getsize(destination) + if file_size == file_size_local: + print(f"File already exists and is up-to-date: {destination}") + return + + # Define the block size for reading the file + block_size = 1024 # 1 Kilobyte + + # Initialize the progress bar with total file size + progress_bar_description = os.path.basename(url) # Extract filename from URL + with tqdm(total=file_size, unit="iB", unit_scale=True, desc=progress_bar_description) as progress_bar: + # Open the destination file in binary write mode + with open(destination, "wb") as file: + # Read the file in chunks and write to destination + while True: + chunk = response.read(block_size) + if not chunk: + break + file.write(chunk) + progress_bar.update(len(chunk)) # Update progress bar + except urllib.error.HTTPError: + s = ( + f"The specified URL ({url}) is incorrect, the internet connection cannot be established," + "\nor the requested file is temporarily unavailable.\nPlease visit the following website" + " for help: https://github.com/rasbt/LLMs-from-scratch/discussions/273") + print(s) + + +# Alternative way using `requests` +""" def download_file(url, destination): # Send a GET request to download the file in streaming mode response = requests.get(url, stream=True) @@ -68,6 +110,7 @@ def download_file(url, destination): for chunk in response.iter_content(block_size): progress_bar.update(len(chunk)) # Update progress bar file.write(chunk) # Write the chunk to the file +""" def load_gpt2_params_from_tf_ckpt(ckpt_path, settings): diff --git a/ch07/01_main-chapter-code/gpt_download.py b/ch07/01_main-chapter-code/gpt_download.py index 0d695d2..aa0ea1e 100644 --- a/ch07/01_main-chapter-code/gpt_download.py +++ b/ch07/01_main-chapter-code/gpt_download.py @@ -5,7 +5,9 @@ import os -import requests +import urllib.request + +# import requests import json import numpy as np import tensorflow as tf @@ -42,6 +44,46 @@ def download_and_load_gpt2(model_size, models_dir): return settings, params +def download_file(url, destination): + # Send a GET request to download the file + + try: + with urllib.request.urlopen(url) as response: + # Get the total file size from headers, defaulting to 0 if not present + file_size = int(response.headers.get("Content-Length", 0)) + + # Check if file exists and has the same size + if os.path.exists(destination): + file_size_local = os.path.getsize(destination) + if file_size == file_size_local: + print(f"File already exists and is up-to-date: {destination}") + return + + # Define the block size for reading the file + block_size = 1024 # 1 Kilobyte + + # Initialize the progress bar with total file size + progress_bar_description = os.path.basename(url) # Extract filename from URL + with tqdm(total=file_size, unit="iB", unit_scale=True, desc=progress_bar_description) as progress_bar: + # Open the destination file in binary write mode + with open(destination, "wb") as file: + # Read the file in chunks and write to destination + while True: + chunk = response.read(block_size) + if not chunk: + break + file.write(chunk) + progress_bar.update(len(chunk)) # Update progress bar + except urllib.error.HTTPError: + s = ( + f"The specified URL ({url}) is incorrect, the internet connection cannot be established," + "\nor the requested file is temporarily unavailable.\nPlease visit the following website" + " for help: https://github.com/rasbt/LLMs-from-scratch/discussions/273") + print(s) + + +# Alternative way using `requests` +""" def download_file(url, destination): # Send a GET request to download the file in streaming mode response = requests.get(url, stream=True) @@ -68,6 +110,7 @@ def download_file(url, destination): for chunk in response.iter_content(block_size): progress_bar.update(len(chunk)) # Update progress bar file.write(chunk) # Write the chunk to the file +""" def load_gpt2_params_from_tf_ckpt(ckpt_path, settings):