diff --git a/ingestion/src/metadata/cli/db_dump.py b/ingestion/src/metadata/cli/db_dump.py index effd9aaded0..57512a0ebbf 100644 --- a/ingestion/src/metadata/cli/db_dump.py +++ b/ingestion/src/metadata/cli/db_dump.py @@ -13,8 +13,10 @@ Database Dumping utility for the metadata CLI """ +import json +from functools import singledispatch from pathlib import Path -from typing import List +from typing import List, Optional, Union from sqlalchemy import inspect, text from sqlalchemy.engine import Engine @@ -40,12 +42,36 @@ STATEMENT_TRUNCATE = "TRUNCATE TABLE {table};\n" STATEMENT_ALL_NEW = "SELECT {cols} FROM {table}" -def clean_col(column_raw: str) -> str: +def single_quote_wrap(raw: str) -> str: """ - Prepare the column to be inserted to MySQL + Add single quote wrap to string. From `str` to `'str'` + """ + return f"'{raw}'" + + +@singledispatch +def clean_col(column_raw: Optional[Union[dict, str]]) -> str: + return single_quote_wrap(str(column_raw)) if column_raw is not None else "null" + + +@clean_col.register(dict) +@clean_col.register(list) +def _(column_raw: Optional[Union[dict, list]]) -> str: + """ + Prepare the JSON column to be inserted to MySQL + + Handle: + - quotes + - True/False values """ return ( - repr(str(column_raw)).replace('"', '\\"') if column_raw is not None else "null" + single_quote_wrap( + json.dumps( + column_raw, default=str + ) # If we don't know how to serialize, convert to str + ) + if column_raw is not None + else "null" ) diff --git a/ingestion/src/metadata/cli/restore.py b/ingestion/src/metadata/cli/restore.py index 10c0da86cb3..e90abee7675 100644 --- a/ingestion/src/metadata/cli/restore.py +++ b/ingestion/src/metadata/cli/restore.py @@ -12,6 +12,8 @@ """ Restore utility for the metadata CLI """ +import traceback + from sqlalchemy.engine import Engine from metadata.cli.utils import get_engine @@ -28,12 +30,35 @@ def execute_sql_file(engine: Engine, sql_file: str) -> None: """ with open(sql_file, encoding="utf-8") as file: - for query in file.readlines(): + failed_queries = 0 + all_queries = file.readlines() + print_ansi_encoded_string( + color=ANSI.GREEN, + bold=False, + message=f"Queries to process for restore: {len(all_queries)}", + ) + + for query in all_queries: # `%` is a reserved syntax in SQLAlchemy to bind parameters. Escaping it with `%%` clean_query = query.replace("%", "%%") - with engine.connect() as conn: - conn.execute(clean_query) + try: + with engine.connect() as conn: + conn.execute(clean_query) + + except Exception as err: + failed_queries += 1 + logger.debug(traceback.format_exc()) + logger.warning( + f"Error processing the following query while restoring - {err}" + ) + logger.warning(clean_query) + + print_ansi_encoded_string( + color=ANSI.GREEN, + bold=False, + message=f"Restore finished. {failed_queries} queries failed.", + ) def run_restore( diff --git a/openmetadata-docs/content/deployment/backup-restore-metadata.md b/openmetadata-docs/content/deployment/backup-restore-metadata.md index cb14e3df319..eaef916332c 100644 --- a/openmetadata-docs/content/deployment/backup-restore-metadata.md +++ b/openmetadata-docs/content/deployment/backup-restore-metadata.md @@ -58,16 +58,26 @@ replacing it with whatever comes from the SQL script. + + + +Running the backup or restore commands with version 0.12.3 or lower? The host parameter is `-h`. + +For 0.13 or higher, `-h` is the flag used for the help command. Pass the host via `-H`. + + + ## Backup CLI After the installation, we can take a look at the different options to run the CLI: ```commandline -> metadata backup --help -usage: metadata backup [-h] -H HOST -u USER -p PASSWORD -d DATABASE [--port PORT] [--output OUTPUT] [--upload-destination-type {AWS,AZURE}] - [--upload UPLOAD UPLOAD UPLOAD] [-o OPTIONS] [-a ARGUMENTS] [-s SCHEMA] +> metadata backup -h +usage: metadata backup [-h] -H HOST -u USER -p PASSWORD -d DATABASE [--port PORT] [--output OUTPUT] + [--upload-destination-type {AWS,AZURE}] [--upload UPLOAD UPLOAD UPLOAD] [-o OPTIONS] [-a ARGUMENTS] + [-s SCHEMA] -options: +optional arguments: -h, --help show this help message and exit -H HOST, --host HOST Host that runs the database -u USER, --user USER User to run the backup @@ -142,7 +152,6 @@ An example of S3 CLI call will look as: ```commandline metadata backup -u openmetadata_user -p openmetadata_password \ -H localhost -d openmetadata_db --output=dir1/dir2 \ - --upload-destination-type AWS \ --upload http://localhost:9000 my-bucket backup/ ``` @@ -192,29 +201,23 @@ The restore CLI needs to be used with `openmetadata-ingestion` version 0.12.1 or After the installation, we can take a look at the different options to run the CLI: ```commandline -> metadata restore --help -Usage: metadata restore [OPTIONS] +> metadata restore -h +usage: metadata restore [-h] -H HOST -u USER -p PASSWORD -d DATABASE [--port PORT] --input INPUT [-o OPTIONS] + [-a ARGUMENTS] [-s SCHEMA] - Run a restore for the metadata DB. - - We can pass as many connection options as required with `-o , -o - [...]` Same with connection arguments `-a , -a [...]` - - If `-s` or `--schema` is provided, we will trigger a Postgres Restore - instead of a MySQL restore. This is the value of the schema containing the - OpenMetadata tables. - -Options: - -h, --host TEXT Host that runs the database [required] - -u, --user TEXT User to run the restore backup [required] - -p, --password TEXT Credentials for the user [required] - -d, --database TEXT Database to restore [required] - --port TEXT Database service port - --input PATH Local backup file path for restore [required] - -o, --options TEXT - -a, --arguments TEXT - -s, --schema TEXT - --help Show this message and exit. +optional arguments: + -h, --help show this help message and exit + -H HOST, --host HOST Host that runs the database + -u USER, --user USER User to run the restore backup + -p PASSWORD, --password PASSWORD + Credentials for the user + -d DATABASE, --database DATABASE + Database to restore + --port PORT Database service port + --input INPUT Local backup file path for restore + -o OPTIONS, --options OPTIONS + -a ARGUMENTS, --arguments ARGUMENTS + -s SCHEMA, --schema SCHEMA ``` ### Output