# Copyright 2021 Collate # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # http://www.apache.org/licenses/LICENSE-2.0 # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """ Database Dumping utility for the metadata CLI """ from pathlib import Path from typing import List from sqlalchemy import inspect, text from sqlalchemy.engine import Engine TABLES_DUMP_ALL = { "task_sequence", "entity_usage", "entity_relationship", "entity_extension", "field_relationship", "tag_usage", "openmetadata_settings", } CUSTOM_TABLES = {"entity_extension_time_series": {"exclude_columns": ["timestamp"]}} NOT_MIGRATE = {"DATABASE_CHANGE_LOG"} STATEMENT_JSON = "SELECT json FROM {table}" STATEMENT_ALL = "SELECT * FROM {table}" STATEMENT_TRUNCATE = "TRUNCATE TABLE {table};\n" def clean_col(column_raw: str) -> str: """ Prepare the column to be inserted to MySQL """ return ( repr(str(column_raw)).replace('"', '\\"') if column_raw is not None else "null" ) def dump_json(tables: List[str], engine: Engine, output: Path) -> None: """ Dumps JSON data """ with open(output, "a") as file: for table in tables: truncate = STATEMENT_TRUNCATE.format(table=table) file.write(truncate) res = engine.execute(text(STATEMENT_JSON.format(table=table))).all() for row in res: insert = "INSERT INTO {table} (json) VALUES ({data});\n".format( table=table, data=clean_col(row.json) ) file.write(insert) def dump_all(tables: List[str], engine: Engine, output: Path) -> None: """ Dump tables that need to store all data """ with open(output, "a") as file: for table in tables: truncate = STATEMENT_TRUNCATE.format(table=table) file.write(truncate) res = engine.execute(text(STATEMENT_ALL.format(table=table))).all() for row in res: insert = "INSERT INTO {table} VALUES ({data});\n".format( table=table, data=",".join(clean_col(col) for col in row) ) file.write(insert) def dump_entity_custom(engine: Engine, output: Path, inspector) -> None: """ This function is used to dump entities with custom handling """ with open(output, "a") as file: for table, data in CUSTOM_TABLES.items(): truncate = STATEMENT_TRUNCATE.format(table=table) file.write(truncate) columns = inspector.get_columns(table_name=table) STATEMENT_ALL_NEW = "SELECT {cols} FROM {table}".format( cols=",".join( col["name"] for col in columns if col["name"] not in data["exclude_columns"] ), table=table, ) res = engine.execute(text(STATEMENT_ALL_NEW)).all() for row in res: insert = "INSERT INTO {table} ({cols}) VALUES ({data});\n".format( table=table, data=",".join(clean_col(col) for col in row), cols=",".join( col["name"] for col in columns if col["name"] not in data["exclude_columns"] ), ) file.write(insert) def dump(engine: Engine, output: Path, schema: str = None) -> None: """ Get all tables from the database and dump only the JSON column for the required tables """ inspector = inspect(engine) tables = ( inspector.get_table_names(schema) if schema else inspector.get_table_names() ) dump_json_tables = [ table for table in tables if table not in TABLES_DUMP_ALL and table not in NOT_MIGRATE and table not in CUSTOM_TABLES ] dump_all(tables=list(TABLES_DUMP_ALL), engine=engine, output=output) dump_json(tables=dump_json_tables, engine=engine, output=output) dump_entity_custom(engine=engine, output=output, inspector=inspector)