fix: add retry for fetch_url (#8958)

This commit is contained in:
Hyejin Yoon 2023-10-06 14:36:32 +09:00 committed by GitHub
parent 26bc039b96
commit ea87febd2b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1,6 +1,7 @@
import json
import os
import tarfile
import time
import urllib.request
repo_url = "https://api.github.com/repos/datahub-project/static-assets"
@ -16,17 +17,30 @@ def download_file(url, destination):
f.write(chunk)
def fetch_urls(repo_url: str, folder_path: str, file_format: str):
def fetch_urls(
repo_url: str, folder_path: str, file_format: str, max_retries=3, retry_delay=5
):
api_url = f"{repo_url}/contents/{folder_path}"
response = urllib.request.urlopen(api_url)
data = response.read().decode("utf-8")
urls = [
file["download_url"]
for file in json.loads(data)
if file["name"].endswith(file_format)
]
print(urls)
return urls
for attempt in range(max_retries + 1):
try:
response = urllib.request.urlopen(api_url)
if response.status == 403 or (500 <= response.status < 600):
raise Exception(f"HTTP Error {response.status}: {response.reason}")
data = response.read().decode("utf-8")
urls = [
file["download_url"]
for file in json.loads(data)
if file["name"].endswith(file_format)
]
print(urls)
return urls
except Exception as e:
if attempt < max_retries:
print(f"Attempt {attempt + 1}/{max_retries}: {e}")
time.sleep(retry_delay)
else:
print(f"Max retries reached. Unable to fetch data.")
raise
def extract_tar_file(destination_path):