From a15600e4e64349508a0b21077b66e554cecb45ec Mon Sep 17 00:00:00 2001 From: Milan Bariya <52292922+MilanBariya@users.noreply.github.com> Date: Fri, 16 Dec 2022 18:59:00 +0530 Subject: [PATCH] Fix: Metadata backup Azure blob issue (#9328) * Fix: Metadata backup Azure blob issue * Fix: Metadata backup Azure blob issue * Fix: Metadata backup Azure blob issue * Doc update for Backup * Doc update for Backup --- ingestion/src/metadata/cli/backup.py | 7 +- .../deployment/backup-restore-metadata.md | 76 ++++++++++++------- 2 files changed, 51 insertions(+), 32 deletions(-) diff --git a/ingestion/src/metadata/cli/backup.py b/ingestion/src/metadata/cli/backup.py index 35c0ef58017..f22d5206ab8 100644 --- a/ingestion/src/metadata/cli/backup.py +++ b/ingestion/src/metadata/cli/backup.py @@ -21,7 +21,6 @@ from typing import Optional, Tuple from metadata.cli.db_dump import dump from metadata.cli.utils import get_engine from metadata.utils.ansi import ANSI, print_ansi_encoded_string -from metadata.utils.constants import UTF_8 from metadata.utils.helpers import BackupRestoreArgs from metadata.utils.logger import cli_logger @@ -145,7 +144,7 @@ def upload_backup_azure(account_url: str, container: str, file: Path) -> None: ) # Upload the created file - with open(file=file.absolute, mode="rb", encoding=UTF_8) as data: + with open(file=file, mode="rb") as data: blob_client.upload_blob(data) except ValueError as err: @@ -191,10 +190,10 @@ def run_backup( ) if upload: - if upload_destination_type == UploadDestinationType.AWS.value: + if upload_destination_type.title() == UploadDestinationType.AWS.value: endpoint, bucket, key = upload upload_backup_aws(endpoint, bucket, key, out) - elif upload_destination_type == UploadDestinationType.AZURE.value: + elif upload_destination_type.title() == UploadDestinationType.AZURE.value: # only need two parameters from upload, key would be null account_url, container, key = upload upload_backup_azure(account_url, container, out) diff --git a/openmetadata-docs/content/deployment/backup-restore-metadata.md b/openmetadata-docs/content/deployment/backup-restore-metadata.md index a62240f3b58..cb14e3df319 100644 --- a/openmetadata-docs/content/deployment/backup-restore-metadata.md +++ b/openmetadata-docs/content/deployment/backup-restore-metadata.md @@ -26,7 +26,7 @@ The CLI comes bundled in the base `openmetadata-ingestion` Python package. You c pip install openmetadata-ingestion ``` -One of the `backup` features is to upload the generated backup to cloud storage (currently supporting S3). To use this, +One of the `backup` features is to upload the generated backup to cloud storage (currently supporting S3 and Azure Blob). To use this, you can instead install the package with the backup plugin: ```commandline @@ -64,30 +64,26 @@ After the installation, we can take a look at the different options to run the C ```commandline > metadata backup --help -Usage: metadata backup [OPTIONS] +usage: metadata backup [-h] -H HOST -u USER -p PASSWORD -d DATABASE [--port PORT] [--output OUTPUT] [--upload-destination-type {AWS,AZURE}] + [--upload UPLOAD UPLOAD UPLOAD] [-o OPTIONS] [-a ARGUMENTS] [-s SCHEMA] - Run a backup for the metadata DB. Uses a custom dump strategy for - OpenMetadata tables. - - We can pass as many connection options as required with `-o , -o - [...]` Same with connection arguments `-a , -a [...]` - - To run the upload, provide the information as `--upload endpoint bucket key` - and properly configure the environment variables AWS_ACCESS_KEY_ID & - AWS_SECRET_ACCESS_KEY - -Options: - -h, --host TEXT Host that runs the database [required] - -u, --user TEXT User to run the backup [required] - -p, --password TEXT Credentials for the user [required] - -d, --database TEXT Database to backup [required] - --port TEXT Database service port - --output PATH Local path to store the backup - --upload ... S3 endpoint, bucket & key to upload the backup - file - -o, --options TEXT - -a, --arguments TEXT - --help Show this message and exit. +options: + -h, --help show this help message and exit + -H HOST, --host HOST Host that runs the database + -u USER, --user USER User to run the backup + -p PASSWORD, --password PASSWORD + Credentials for the user + -d DATABASE, --database DATABASE + Database to backup + --port PORT Database service port + --output OUTPUT Local path to store the backup + --upload-destination-type {AWS,AZURE} + AWS or AZURE + --upload UPLOAD UPLOAD UPLOAD + S3 endpoint, bucket & key to upload the backup file + -o OPTIONS, --options OPTIONS + -a ARGUMENTS, --arguments ARGUMENTS + -s SCHEMA, --schema SCHEMA ``` ### Database Connection @@ -104,11 +100,18 @@ date each backup was generated. We can also specify an output path, which we'll ### Uploading to S3 -We currently support uploading the backup files to S3. To run this, make sure to have `AWS_ACCESS_KEY_ID` and +To run this, make sure to have `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` as environment variables with permissions to the bucket that you'd like to point to. Afterwards, we can just use `--upload ` to have the CLI upload the file. In this case, you'll get both the local dump file and the one in the cloud. +### Uploading to Azure Blob + + +To run this, make sure to have Azure CLI configured with permissions to the Blob that you'd like to point to. Afterwards, +we can just use `--upload ` to have the CLI upload the file. In this case, you'll get both the +local dump file and the one in the cloud. + ### Connection Options and Arguments You can pass any required connection options or arguments to the MySQL connection via `-o , -o [...]` @@ -134,11 +137,12 @@ We can do a test locally preparing some containers: export AWS_SECRET_ACCESS_KEY=minioadmin ``` -An example CLI call will look as: +An example of S3 CLI call will look as: ```commandline metadata backup -u openmetadata_user -p openmetadata_password \ - -h localhost -d openmetadata_db --output=dir1/dir2 \ + -H localhost -d openmetadata_db --output=dir1/dir2 \ + --upload-destination-type AWS \ --upload http://localhost:9000 my-bucket backup/ ``` @@ -153,6 +157,22 @@ Uploading dir1/dir2/openmetadata_202201250823_backup.sql to http://localhost:900 If we now head to the minio console and check the `my-backup` bucket, we'll see our SQL dump in there. minio + +An example of Azure Blob CLI call will look as: + +```commandline +metadata backup -u openmetadata_user -p openmetadata_password \ + -H localhost -d openmetadata_db --output=dir1/dir2 \ + --upload-destination-type AZURE \ + --upload https://container.blob.core.windows.net/ container-name Folder-name/ +``` +And we'll get the following output: + +```commandline +Creating OpenMetadata backup for localhost:3306/openmetadata_db... +Backup stored locally under openmetadata_202212161559_backup.sql +Uploading openmetadata_202212161559_backup.sql to https://container.blob.core.windows.net//container-name... +```

@@ -206,7 +226,7 @@ The CLI will give messages like this `Backup restored from openmetadata_20220930 An example CLI call will look as: ```commandline -metadata restore -u openmetadata_user -p openmetadata_password -h localhost -d openmetadata_db --input openmetadata_202209301715_backup.sql +metadata restore -u openmetadata_user -p openmetadata_password -H localhost -d openmetadata_db --input openmetadata_202209301715_backup.sql ``` And we'll get the following output: