diff --git a/ingestion/setup.py b/ingestion/setup.py index 619691be8fb..c3ec1fd8f0f 100644 --- a/ingestion/setup.py +++ b/ingestion/setup.py @@ -68,7 +68,7 @@ plugins: Dict[str, Set[str]] = { }, "bigquery-usage": {"google-cloud-logging", "cachetools"}, "docker": {"python_on_whales==0.34.0"}, - "backup": {"boto3~=1.19.12"}, + "backup": {"boto3~=1.19.12", "azure-identity", "azure-storage-blob"}, "dagster": {"pymysql>=1.0.2", "psycopg2-binary", "GeoAlchemy2", "dagster_graphql"}, "datalake": { "google-cloud-storage==1.43.0", diff --git a/ingestion/src/metadata/cli/backup.py b/ingestion/src/metadata/cli/backup.py index f2b40914542..8be51cca028 100644 --- a/ingestion/src/metadata/cli/backup.py +++ b/ingestion/src/metadata/cli/backup.py @@ -14,6 +14,7 @@ Backup utility for the metadata CLI """ import traceback from datetime import datetime +from enum import Enum from pathlib import Path from typing import List, Optional, Tuple @@ -21,8 +22,15 @@ import click from metadata.cli.db_dump import dump from metadata.cli.utils import get_engine +from metadata.utils.constants import UTF_8 from metadata.utils.logger import cli_logger + +class UploadDestinationType(Enum): + AWS = "AWS" + AZURE = "Azure" + + logger = cli_logger() @@ -49,7 +57,7 @@ def get_output(output: Optional[str] = None) -> Path: return Path(name) -def upload_backup(endpoint: str, bucket: str, key: str, file: Path) -> None: +def upload_backup_aws(endpoint: str, bucket: str, key: str, file: Path) -> None: """ Upload the mysqldump backup file. We will use boto3 to upload the file to the endpoint @@ -98,13 +106,65 @@ def upload_backup(endpoint: str, bucket: str, key: str, file: Path) -> None: raise err -def run_backup( # pylint: disable=too-many-arguments +def upload_backup_azure(account_url: str, container: str, file: Path) -> None: + """ + Upload the mysqldump backup file. + + :param account_url: Azure account url + :param container: Azure container to upload file to + :param file: file to upload + """ + + try: + # pylint: disable=import-outside-toplevel + from azure.identity import DefaultAzureCredential + from azure.storage.blob import BlobServiceClient + + default_credential = DefaultAzureCredential() + # Create the BlobServiceClient object + blob_service_client = BlobServiceClient( + account_url, credential=default_credential + ) + except ModuleNotFoundError as err: + logger.debug(traceback.format_exc()) + logger.error( + "Trying to import DefaultAzureCredential to run the backup upload." + ) + raise err + + click.secho( + f"Uploading {file} to {account_url}/{container}...", + fg="bright_green", + ) + + try: + # Create a blob client using the local file name as the name for the blob + blob_client = blob_service_client.get_blob_client( + container=container, blob=file.name + ) + + # Upload the created file + with open(file=file.absolute, mode="rb", encoding=UTF_8) as data: + blob_client.upload_blob(data) + + except ValueError as err: + logger.debug(traceback.format_exc()) + logger.error("Revisit the values of --upload") + raise err + except Exception as err: + logger.debug(traceback.format_exc()) + logger.error(err) + raise err + + +def run_backup( # pylint: disable=too-many-arguments, too-many-locals host: str, user: str, password: str, database: str, port: str, output: Optional[str], + upload_destination_type: Optional[UploadDestinationType], upload: Optional[Tuple[str, str, str]], options: List[str], arguments: List[str], @@ -143,5 +203,10 @@ def run_backup( # pylint: disable=too-many-arguments ) if upload: - endpoint, bucket, key = upload - upload_backup(endpoint, bucket, key, out) + if upload_destination_type == UploadDestinationType.AWS.value: + endpoint, bucket, key = upload + upload_backup_aws(endpoint, bucket, key, out) + elif upload_destination_type == UploadDestinationType.AZURE.value: + # only need two parameters from upload, key would be null + account_url, container, key = upload + upload_backup_azure(account_url, container, out) diff --git a/ingestion/src/metadata/cmd.py b/ingestion/src/metadata/cmd.py index e40f4de2ae4..96b2511b963 100644 --- a/ingestion/src/metadata/cmd.py +++ b/ingestion/src/metadata/cmd.py @@ -21,9 +21,10 @@ from http.server import BaseHTTPRequestHandler, HTTPServer from typing import List, Optional, Tuple import click +import const from metadata.__version__ import get_metadata_version -from metadata.cli.backup import run_backup +from metadata.cli.backup import UploadDestinationType, run_backup from metadata.cli.docker import BACKEND_DATABASES, run_docker from metadata.cli.ingest import run_ingest from metadata.cli.openmetadata_imports_migration import ( @@ -282,6 +283,13 @@ def docker( default=None, required=False, ) +@click.option( + "--upload_destination_type", + help="AWS or AZURE", + type=click.Choice(const.Upload_Destination_Type), + default=None, + required=False, +) @click.option( "--upload", help="S3 endpoint, bucket & key to upload the backup file", @@ -314,6 +322,7 @@ def backup( database: str, port: str, output: Optional[str], + upload_destination_type: Optional[UploadDestinationType], upload: Optional[Tuple[str, str, str]], options: List[str], arguments: List[str], @@ -334,7 +343,17 @@ def backup( tables. """ run_backup( - host, user, password, database, port, output, upload, options, arguments, schema + host, + user, + password, + database, + port, + output, + upload_destination_type, + upload, + options, + arguments, + schema, )