Issue 4886: Add support for Azure Blob (#8334)

* Issue 4686: Add support for Azure Blob

* ISSUE-4868: make changes as suggested in PR review

* ISSUE-4868: run py_format

* ISSUE-4868: Make changes to enum and formatting

* ISSUE-4868: fix linting issues and update setup.py
This commit is contained in:
amymareemc 2022-10-27 01:14:51 +11:00 committed by GitHub
parent 552ccd5340
commit 12bc9df0b2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 91 additions and 7 deletions

View File

@ -68,7 +68,7 @@ plugins: Dict[str, Set[str]] = {
},
"bigquery-usage": {"google-cloud-logging", "cachetools"},
"docker": {"python_on_whales==0.34.0"},
"backup": {"boto3~=1.19.12"},
"backup": {"boto3~=1.19.12", "azure-identity", "azure-storage-blob"},
"dagster": {"pymysql>=1.0.2", "psycopg2-binary", "GeoAlchemy2", "dagster_graphql"},
"datalake": {
"google-cloud-storage==1.43.0",

View File

@ -14,6 +14,7 @@ Backup utility for the metadata CLI
"""
import traceback
from datetime import datetime
from enum import Enum
from pathlib import Path
from typing import List, Optional, Tuple
@ -21,8 +22,15 @@ import click
from metadata.cli.db_dump import dump
from metadata.cli.utils import get_engine
from metadata.utils.constants import UTF_8
from metadata.utils.logger import cli_logger
class UploadDestinationType(Enum):
AWS = "AWS"
AZURE = "Azure"
logger = cli_logger()
@ -49,7 +57,7 @@ def get_output(output: Optional[str] = None) -> Path:
return Path(name)
def upload_backup(endpoint: str, bucket: str, key: str, file: Path) -> None:
def upload_backup_aws(endpoint: str, bucket: str, key: str, file: Path) -> None:
"""
Upload the mysqldump backup file.
We will use boto3 to upload the file to the endpoint
@ -98,13 +106,65 @@ def upload_backup(endpoint: str, bucket: str, key: str, file: Path) -> None:
raise err
def run_backup( # pylint: disable=too-many-arguments
def upload_backup_azure(account_url: str, container: str, file: Path) -> None:
"""
Upload the mysqldump backup file.
:param account_url: Azure account url
:param container: Azure container to upload file to
:param file: file to upload
"""
try:
# pylint: disable=import-outside-toplevel
from azure.identity import DefaultAzureCredential
from azure.storage.blob import BlobServiceClient
default_credential = DefaultAzureCredential()
# Create the BlobServiceClient object
blob_service_client = BlobServiceClient(
account_url, credential=default_credential
)
except ModuleNotFoundError as err:
logger.debug(traceback.format_exc())
logger.error(
"Trying to import DefaultAzureCredential to run the backup upload."
)
raise err
click.secho(
f"Uploading {file} to {account_url}/{container}...",
fg="bright_green",
)
try:
# Create a blob client using the local file name as the name for the blob
blob_client = blob_service_client.get_blob_client(
container=container, blob=file.name
)
# Upload the created file
with open(file=file.absolute, mode="rb", encoding=UTF_8) as data:
blob_client.upload_blob(data)
except ValueError as err:
logger.debug(traceback.format_exc())
logger.error("Revisit the values of --upload")
raise err
except Exception as err:
logger.debug(traceback.format_exc())
logger.error(err)
raise err
def run_backup( # pylint: disable=too-many-arguments, too-many-locals
host: str,
user: str,
password: str,
database: str,
port: str,
output: Optional[str],
upload_destination_type: Optional[UploadDestinationType],
upload: Optional[Tuple[str, str, str]],
options: List[str],
arguments: List[str],
@ -143,5 +203,10 @@ def run_backup( # pylint: disable=too-many-arguments
)
if upload:
endpoint, bucket, key = upload
upload_backup(endpoint, bucket, key, out)
if upload_destination_type == UploadDestinationType.AWS.value:
endpoint, bucket, key = upload
upload_backup_aws(endpoint, bucket, key, out)
elif upload_destination_type == UploadDestinationType.AZURE.value:
# only need two parameters from upload, key would be null
account_url, container, key = upload
upload_backup_azure(account_url, container, out)

View File

@ -21,9 +21,10 @@ from http.server import BaseHTTPRequestHandler, HTTPServer
from typing import List, Optional, Tuple
import click
import const
from metadata.__version__ import get_metadata_version
from metadata.cli.backup import run_backup
from metadata.cli.backup import UploadDestinationType, run_backup
from metadata.cli.docker import BACKEND_DATABASES, run_docker
from metadata.cli.ingest import run_ingest
from metadata.cli.openmetadata_imports_migration import (
@ -282,6 +283,13 @@ def docker(
default=None,
required=False,
)
@click.option(
"--upload_destination_type",
help="AWS or AZURE",
type=click.Choice(const.Upload_Destination_Type),
default=None,
required=False,
)
@click.option(
"--upload",
help="S3 endpoint, bucket & key to upload the backup file",
@ -314,6 +322,7 @@ def backup(
database: str,
port: str,
output: Optional[str],
upload_destination_type: Optional[UploadDestinationType],
upload: Optional[Tuple[str, str, str]],
options: List[str],
arguments: List[str],
@ -334,7 +343,17 @@ def backup(
tables.
"""
run_backup(
host, user, password, database, port, output, upload, options, arguments, schema
host,
user,
password,
database,
port,
output,
upload_destination_type,
upload,
options,
arguments,
schema,
)