mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-10-15 18:58:32 +00:00
198 lines
6.1 KiB
Python
198 lines
6.1 KiB
Python
# Copyright 2021 Collate
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
"""
|
|
Database Dumping utility for the metadata CLI
|
|
"""
|
|
|
|
import json
|
|
from functools import singledispatch
|
|
from pathlib import Path
|
|
from typing import List, Optional, Union
|
|
|
|
from sqlalchemy import inspect, text
|
|
from sqlalchemy.engine import Engine
|
|
|
|
from metadata.utils.constants import UTF_8
|
|
|
|
TABLES_DUMP_ALL = {
|
|
"task_sequence",
|
|
"entity_usage",
|
|
"entity_relationship",
|
|
"entity_extension",
|
|
"field_relationship",
|
|
"tag_usage",
|
|
"openmetadata_settings",
|
|
}
|
|
|
|
CUSTOM_TABLES = {"entity_extension_time_series": {"exclude_columns": ["timestamp"]}}
|
|
NOT_MIGRATE = {"DATABASE_CHANGE_LOG", "SERVER_MIGRATION_SQL_LOGS", "SERVER_CHANGE_LOG"}
|
|
|
|
STATEMENT_JSON = "SELECT json FROM {table}"
|
|
STATEMENT_ALL = "SELECT * FROM {table}"
|
|
STATEMENT_TRUNCATE = "TRUNCATE TABLE {table};\n"
|
|
STATEMENT_ALL_NEW = "SELECT {cols} FROM {table}"
|
|
|
|
MYSQL_ENGINE_NAME = "mysql"
|
|
|
|
|
|
def single_quote_wrap(raw: str) -> str:
|
|
"""
|
|
Add single quote wrap to string. From `str` to `'str'`
|
|
"""
|
|
return f"'{raw}'"
|
|
|
|
|
|
def clean_str(raw: str, engine: Engine) -> str:
|
|
"""
|
|
String cleaning for SQL parsing.
|
|
|
|
Logic is different between MySQL and Postgres
|
|
|
|
- descriptions/comments with single quotes, e.g., `Mysql's data`.
|
|
get converted to `Mysql''s data`
|
|
- To insert a literal backlash in MySQL you need to escape with another one. This applies for `\n` and `\"` in
|
|
inner JSONs for a field. This is not required for postgres
|
|
"""
|
|
quoted_str = raw.replace("'", "''")
|
|
|
|
if engine.name == MYSQL_ENGINE_NAME:
|
|
quoted_str = quoted_str.replace("\\", "\\\\")
|
|
|
|
return quoted_str
|
|
|
|
|
|
@singledispatch
|
|
def clean_col(column_raw: Optional[Union[dict, str]], engine: Engine) -> str:
|
|
return (
|
|
single_quote_wrap(clean_str(str(column_raw), engine))
|
|
if column_raw is not None
|
|
else "null"
|
|
)
|
|
|
|
|
|
@clean_col.register(dict)
|
|
@clean_col.register(list)
|
|
def _(column_raw: Optional[Union[dict, list]], engine: Engine) -> str:
|
|
"""
|
|
Prepare the JSON column to be inserted to MySQL
|
|
|
|
Handle:
|
|
- quotes
|
|
- True/False values
|
|
"""
|
|
return (
|
|
single_quote_wrap(
|
|
clean_str(
|
|
json.dumps(
|
|
column_raw,
|
|
default=str, # If we don't know how to serialize, convert to str
|
|
),
|
|
engine,
|
|
)
|
|
)
|
|
if column_raw is not None
|
|
else "null"
|
|
)
|
|
|
|
|
|
def dump_json(tables: List[str], engine: Engine, output: Path) -> None:
|
|
"""
|
|
Dumps JSON data.
|
|
|
|
Postgres: engine.name == "postgresql"
|
|
MySQL: engine.name == "mysql"
|
|
"""
|
|
with open(output, "a", encoding=UTF_8) as file:
|
|
for table in tables:
|
|
truncate = STATEMENT_TRUNCATE.format(table=table)
|
|
file.write(truncate)
|
|
|
|
res = engine.execute(text(STATEMENT_JSON.format(table=table))).all()
|
|
for row in res:
|
|
insert = f"INSERT INTO {table} (json) VALUES ({clean_col(row.json, engine)});\n"
|
|
file.write(insert)
|
|
|
|
|
|
def dump_all(tables: List[str], engine: Engine, output: Path) -> None:
|
|
"""
|
|
Dump tables that need to store all data
|
|
"""
|
|
with open(output, "a", encoding=UTF_8) as file:
|
|
for table in tables:
|
|
truncate = STATEMENT_TRUNCATE.format(table=table)
|
|
file.write(truncate)
|
|
|
|
res = engine.execute(text(STATEMENT_ALL.format(table=table))).all()
|
|
for row in res:
|
|
data = ",".join(clean_col(col, engine) for col in row)
|
|
|
|
insert = f"INSERT INTO {table} VALUES ({data});\n"
|
|
file.write(insert)
|
|
|
|
|
|
def dump_entity_custom(engine: Engine, output: Path, inspector) -> None:
|
|
"""
|
|
This function is used to dump entities with custom handling
|
|
"""
|
|
with open(output, "a", encoding=UTF_8) as file:
|
|
for table, data in CUSTOM_TABLES.items():
|
|
truncate = STATEMENT_TRUNCATE.format(table=table)
|
|
file.write(truncate)
|
|
|
|
columns = inspector.get_columns(table_name=table)
|
|
|
|
statement = STATEMENT_ALL_NEW.format(
|
|
cols=",".join(
|
|
col["name"]
|
|
for col in columns
|
|
if col["name"] not in data["exclude_columns"]
|
|
),
|
|
table=table,
|
|
)
|
|
res = engine.execute(text(statement)).all()
|
|
for row in res:
|
|
# Let's use .format here to not add more variables
|
|
# pylint: disable=consider-using-f-string
|
|
insert = "INSERT INTO {table} ({cols}) VALUES ({data});\n".format(
|
|
table=table,
|
|
data=",".join(clean_col(col, engine) for col in row),
|
|
cols=",".join(
|
|
col["name"]
|
|
for col in columns
|
|
if col["name"] not in data["exclude_columns"]
|
|
),
|
|
)
|
|
file.write(insert)
|
|
|
|
|
|
def dump(engine: Engine, output: Path, schema: str = None) -> None:
|
|
"""
|
|
Get all tables from the database and dump
|
|
only the JSON column for the required tables
|
|
"""
|
|
inspector = inspect(engine)
|
|
tables = (
|
|
inspector.get_table_names(schema) if schema else inspector.get_table_names()
|
|
)
|
|
|
|
dump_json_tables = [
|
|
table
|
|
for table in tables
|
|
if table not in TABLES_DUMP_ALL
|
|
and table not in NOT_MIGRATE
|
|
and table not in CUSTOM_TABLES
|
|
]
|
|
|
|
dump_all(tables=list(TABLES_DUMP_ALL), engine=engine, output=output)
|
|
dump_json(tables=dump_json_tables, engine=engine, output=output)
|
|
dump_entity_custom(engine=engine, output=output, inspector=inspector)
|