mirror of
https://github.com/datahub-project/datahub.git
synced 2025-12-26 09:26:22 +00:00
fix: add retry for fetch_url (#8958)
This commit is contained in:
parent
26bc039b96
commit
ea87febd2b
@ -1,6 +1,7 @@
|
||||
import json
|
||||
import os
|
||||
import tarfile
|
||||
import time
|
||||
import urllib.request
|
||||
|
||||
repo_url = "https://api.github.com/repos/datahub-project/static-assets"
|
||||
@ -16,17 +17,30 @@ def download_file(url, destination):
|
||||
f.write(chunk)
|
||||
|
||||
|
||||
def fetch_urls(repo_url: str, folder_path: str, file_format: str):
|
||||
def fetch_urls(
|
||||
repo_url: str, folder_path: str, file_format: str, max_retries=3, retry_delay=5
|
||||
):
|
||||
api_url = f"{repo_url}/contents/{folder_path}"
|
||||
response = urllib.request.urlopen(api_url)
|
||||
data = response.read().decode("utf-8")
|
||||
urls = [
|
||||
file["download_url"]
|
||||
for file in json.loads(data)
|
||||
if file["name"].endswith(file_format)
|
||||
]
|
||||
print(urls)
|
||||
return urls
|
||||
for attempt in range(max_retries + 1):
|
||||
try:
|
||||
response = urllib.request.urlopen(api_url)
|
||||
if response.status == 403 or (500 <= response.status < 600):
|
||||
raise Exception(f"HTTP Error {response.status}: {response.reason}")
|
||||
data = response.read().decode("utf-8")
|
||||
urls = [
|
||||
file["download_url"]
|
||||
for file in json.loads(data)
|
||||
if file["name"].endswith(file_format)
|
||||
]
|
||||
print(urls)
|
||||
return urls
|
||||
except Exception as e:
|
||||
if attempt < max_retries:
|
||||
print(f"Attempt {attempt + 1}/{max_retries}: {e}")
|
||||
time.sleep(retry_delay)
|
||||
else:
|
||||
print(f"Max retries reached. Unable to fetch data.")
|
||||
raise
|
||||
|
||||
|
||||
def extract_tar_file(destination_path):
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user