mirror of
https://github.com/datahub-project/datahub.git
synced 2025-07-03 07:09:21 +00:00
61 lines
1.8 KiB
Python
61 lines
1.8 KiB
Python
![]() |
import os
|
||
|
import tarfile
|
||
|
import urllib.request
|
||
|
import json
|
||
|
|
||
|
repo_url = "https://api.github.com/repos/datahub-project/static-assets"
|
||
|
|
||
|
|
||
|
def download_file(url, destination):
|
||
|
with urllib.request.urlopen(url) as response:
|
||
|
with open(destination, "wb") as f:
|
||
|
while True:
|
||
|
chunk = response.read(8192)
|
||
|
if not chunk:
|
||
|
break
|
||
|
f.write(chunk)
|
||
|
|
||
|
|
||
|
def fetch_tar_urls(repo_url, folder_path):
|
||
|
api_url = f"{repo_url}/contents/{folder_path}"
|
||
|
response = urllib.request.urlopen(api_url)
|
||
|
data = response.read().decode('utf-8')
|
||
|
tar_urls = [
|
||
|
file["download_url"] for file in json.loads(data) if file["name"].endswith(".tar.gz")
|
||
|
]
|
||
|
print(tar_urls)
|
||
|
return tar_urls
|
||
|
|
||
|
|
||
|
def main():
|
||
|
folder_path = "versioned_docs"
|
||
|
destination_dir = "versioned_docs"
|
||
|
if not os.path.exists(destination_dir):
|
||
|
os.makedirs(destination_dir)
|
||
|
|
||
|
tar_urls = fetch_tar_urls(repo_url, folder_path)
|
||
|
|
||
|
for url in tar_urls:
|
||
|
filename = os.path.basename(url)
|
||
|
destination_path = os.path.join(destination_dir, filename)
|
||
|
|
||
|
version = '.'.join(filename.split('.')[:3])
|
||
|
extracted_path = os.path.join(destination_dir, version)
|
||
|
print("extracted_path", extracted_path)
|
||
|
if os.path.exists(extracted_path):
|
||
|
print(f"{extracted_path} already exists, skipping downloads")
|
||
|
continue
|
||
|
try:
|
||
|
download_file(url, destination_path)
|
||
|
print(f"Downloaded {filename} to {destination_dir}")
|
||
|
with tarfile.open(destination_path, "r:gz") as tar:
|
||
|
tar.extractall()
|
||
|
os.remove(destination_path)
|
||
|
except urllib.error.URLError as e:
|
||
|
print(f"Error while downloading {filename}: {e}")
|
||
|
continue
|
||
|
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
main()
|