2022-07-22 19:05:32 +02:00
|
|
|
# Copyright 2021 Collate
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
|
|
|
|
"""
|
|
|
|
Test ometa to orm converter
|
|
|
|
"""
|
|
|
|
|
|
|
|
from unittest.mock import patch
|
|
|
|
from uuid import UUID
|
|
|
|
|
|
|
|
from pytest import mark
|
2023-03-29 12:06:34 +02:00
|
|
|
from sqlalchemy import Column as SQAColumn
|
|
|
|
from sqlalchemy.sql.sqltypes import INTEGER, String
|
2022-07-22 19:05:32 +02:00
|
|
|
|
|
|
|
from metadata.generated.schema.entity.data.table import Column, DataType, Table
|
|
|
|
from metadata.generated.schema.entity.services.databaseService import (
|
|
|
|
DatabaseServiceType,
|
|
|
|
)
|
2023-03-29 12:06:34 +02:00
|
|
|
from metadata.profiler.orm.converter import get_columns, ometa_to_sqa_orm
|
2022-07-22 19:05:32 +02:00
|
|
|
|
|
|
|
|
2023-03-01 08:20:38 +01:00
|
|
|
@patch("metadata.profiler.orm.converter.get_orm_schema", return_value="schema")
|
|
|
|
@patch("metadata.profiler.orm.converter.get_orm_database", return_value="database")
|
2022-07-22 19:05:32 +02:00
|
|
|
@mark.parametrize(
|
|
|
|
"column_definition, table_name",
|
|
|
|
[
|
|
|
|
(
|
|
|
|
[
|
|
|
|
(
|
|
|
|
"CaseSensitive",
|
|
|
|
DataType.STRING,
|
|
|
|
),
|
|
|
|
("UPPER_CASE", DataType.INT),
|
|
|
|
],
|
|
|
|
"table_1",
|
|
|
|
),
|
|
|
|
(
|
|
|
|
[
|
|
|
|
(
|
|
|
|
"all_lower_case",
|
|
|
|
DataType.STRING,
|
|
|
|
),
|
|
|
|
("lower_case", DataType.INT),
|
|
|
|
],
|
|
|
|
"table_2",
|
|
|
|
),
|
|
|
|
],
|
|
|
|
)
|
2022-09-12 18:05:36 +02:00
|
|
|
def test_snowflake_case_sensitive_orm(
|
|
|
|
mock_schema, mock_database, column_definition, table_name
|
|
|
|
):
|
2022-07-22 19:05:32 +02:00
|
|
|
"""Test that snowflake case sensitive orm table
|
|
|
|
are enforced correctly
|
|
|
|
"""
|
|
|
|
columns = [
|
|
|
|
Column(
|
|
|
|
name=name,
|
|
|
|
dataType=type,
|
|
|
|
)
|
|
|
|
for name, type in column_definition
|
|
|
|
]
|
|
|
|
|
|
|
|
table = Table(
|
|
|
|
id=UUID("1f8c1222-09a0-11ed-871b-ca4e864bb16a"),
|
|
|
|
name=table_name,
|
|
|
|
columns=columns,
|
|
|
|
serviceType=DatabaseServiceType.Snowflake,
|
|
|
|
)
|
|
|
|
|
2022-11-15 20:31:10 +05:30
|
|
|
orm_table = ometa_to_sqa_orm(table, None)
|
2022-07-22 19:05:32 +02:00
|
|
|
|
|
|
|
assert orm_table.__table_args__.get("quote")
|
|
|
|
assert [
|
|
|
|
name.lower() for name, _ in column_definition
|
|
|
|
] == orm_table.__table__.columns.keys()
|
|
|
|
assert orm_table.__tablename__ == table_name
|
|
|
|
assert orm_table.__table_args__["schema"] == "schema"
|
|
|
|
for name, _ in column_definition:
|
|
|
|
assert hasattr(orm_table, name)
|
|
|
|
|
|
|
|
|
2023-03-01 08:20:38 +01:00
|
|
|
@patch("metadata.profiler.orm.converter.get_orm_schema", return_value="schema")
|
|
|
|
@patch("metadata.profiler.orm.converter.get_orm_database", return_value="database")
|
2022-09-12 18:05:36 +02:00
|
|
|
def test_metadata_column(mock_schema, mock_database):
|
2022-07-22 19:05:32 +02:00
|
|
|
"""Test that snowflake case sensitive orm table
|
|
|
|
are enforced correctly
|
|
|
|
"""
|
|
|
|
table_name = "foo"
|
|
|
|
column_definition = [
|
|
|
|
(
|
|
|
|
"foo",
|
|
|
|
DataType.STRING,
|
|
|
|
),
|
|
|
|
("metadata", DataType.INT),
|
|
|
|
]
|
|
|
|
|
|
|
|
columns = [
|
|
|
|
Column(
|
|
|
|
name=name,
|
|
|
|
dataType=type,
|
|
|
|
)
|
|
|
|
for name, type in column_definition
|
|
|
|
]
|
|
|
|
|
|
|
|
table = Table(
|
|
|
|
id=UUID("1f8c1222-09a0-11ed-871b-ca4e864bb16a"),
|
|
|
|
name=table_name,
|
|
|
|
columns=columns,
|
|
|
|
serviceType=DatabaseServiceType.BigQuery,
|
|
|
|
)
|
|
|
|
|
2022-11-15 20:31:10 +05:30
|
|
|
orm_table = ometa_to_sqa_orm(table, None)
|
2022-07-22 19:05:32 +02:00
|
|
|
|
|
|
|
assert not orm_table.__table_args__.get("quote")
|
|
|
|
assert [
|
|
|
|
name.lower() for name, _ in column_definition
|
|
|
|
] == orm_table.__table__.columns.keys()
|
|
|
|
assert orm_table.__tablename__ == table_name
|
|
|
|
assert orm_table.__table_args__["schema"] == "schema"
|
|
|
|
for name, _ in column_definition:
|
|
|
|
assert hasattr(orm_table, name)
|
2023-03-29 12:06:34 +02:00
|
|
|
|
|
|
|
|
|
|
|
def test_get_columns_regular():
|
|
|
|
"""Test get columns function reads columns correctly"""
|
|
|
|
regular_columns = [
|
|
|
|
Column(
|
|
|
|
name="col1",
|
|
|
|
dataType=DataType.STRING,
|
|
|
|
),
|
|
|
|
Column(
|
|
|
|
name="col2",
|
|
|
|
dataType=DataType.INT,
|
|
|
|
),
|
|
|
|
]
|
|
|
|
|
|
|
|
cols = get_columns(regular_columns, DatabaseServiceType.BigQuery)
|
|
|
|
col1 = cols["col1"]
|
|
|
|
col2 = cols["col2"]
|
|
|
|
assert len(cols) == 2
|
|
|
|
assert col1.name == "col1"
|
|
|
|
assert isinstance(col1.type, String)
|
|
|
|
assert col2.name == "col2"
|
|
|
|
assert isinstance(col2.type, INTEGER)
|
|
|
|
|
|
|
|
|
|
|
|
def test_get_columns_struct():
|
|
|
|
"""Test get columns function reads columns correctly for struct"""
|
|
|
|
struct_columns = [
|
|
|
|
Column(
|
|
|
|
name="col1",
|
|
|
|
dataType=DataType.STRING,
|
|
|
|
),
|
|
|
|
Column(
|
|
|
|
name="col2",
|
|
|
|
dataType=DataType.STRUCT,
|
|
|
|
children=[
|
|
|
|
Column(
|
|
|
|
name="structCol1",
|
|
|
|
dataType=DataType.STRING,
|
|
|
|
),
|
|
|
|
Column(
|
|
|
|
name="structCol2",
|
|
|
|
dataType=DataType.STRUCT,
|
|
|
|
children=[
|
|
|
|
Column(
|
|
|
|
name="nestedStructCol1",
|
|
|
|
dataType=DataType.STRING,
|
|
|
|
),
|
|
|
|
],
|
|
|
|
),
|
|
|
|
],
|
|
|
|
),
|
|
|
|
]
|
|
|
|
|
|
|
|
cols = get_columns(struct_columns, DatabaseServiceType.BigQuery)
|
|
|
|
assert len(cols) == 5
|
|
|
|
assert "col2.structCol1" in cols
|
|
|
|
assert "col2.structCol2" in cols
|
|
|
|
assert "col2.structCol2.nestedStructCol1" in cols
|
2023-04-14 15:59:26 +02:00
|
|
|
|
|
|
|
|
|
|
|
def test_get_columns():
|
|
|
|
"""Test get columns function reads columns correctly for struct"""
|
|
|
|
struct_columns = [
|
|
|
|
Column(
|
|
|
|
name="col1",
|
|
|
|
dataType=DataType.STRING,
|
|
|
|
),
|
|
|
|
Column(
|
|
|
|
name="col2",
|
|
|
|
dataType=DataType.STRUCT,
|
|
|
|
children=[
|
|
|
|
Column(
|
|
|
|
name="structCol1",
|
|
|
|
dataType=DataType.STRING,
|
|
|
|
),
|
|
|
|
Column(
|
|
|
|
name="structCol2",
|
|
|
|
dataType=DataType.STRUCT,
|
|
|
|
children=[
|
|
|
|
Column(
|
|
|
|
name="nestedStructCol1",
|
|
|
|
dataType=DataType.STRING,
|
|
|
|
),
|
|
|
|
],
|
|
|
|
),
|
|
|
|
],
|
|
|
|
),
|
|
|
|
]
|
|
|
|
|
|
|
|
cols = get_columns(struct_columns, DatabaseServiceType.BigQuery)
|
|
|
|
assert not any([v._is_array for k, v in cols.items()])
|
|
|
|
assert len(cols) == 5
|
|
|
|
assert "col2.structCol1" in cols
|
|
|
|
assert "col2.structCol2" in cols
|
|
|
|
assert "col2.structCol2.nestedStructCol1" in cols
|
|
|
|
|
|
|
|
struct_columns = [
|
|
|
|
Column(
|
|
|
|
name="col1",
|
|
|
|
dataType=DataType.STRING,
|
|
|
|
),
|
|
|
|
Column(
|
|
|
|
name="col2",
|
|
|
|
dataType=DataType.STRUCT,
|
|
|
|
children=[
|
|
|
|
Column(
|
|
|
|
name="structCol1",
|
|
|
|
dataType=DataType.ARRAY,
|
|
|
|
arrayDataType=DataType.STRUCT,
|
|
|
|
children=[
|
|
|
|
Column(
|
|
|
|
name="arrayStructCol1",
|
|
|
|
dataType=DataType.STRING,
|
|
|
|
),
|
|
|
|
Column(
|
|
|
|
name="arrayStructCol2",
|
|
|
|
dataType=DataType.INT,
|
|
|
|
),
|
|
|
|
],
|
|
|
|
)
|
|
|
|
],
|
|
|
|
),
|
|
|
|
]
|
|
|
|
|
|
|
|
cols = get_columns(struct_columns, DatabaseServiceType.BigQuery)
|
|
|
|
assert any([v._is_array for k, v in cols.items()])
|
|
|
|
assert len(cols) == 5
|
|
|
|
assert "col2.structCol1" in cols
|
|
|
|
assert "col2.structCol1.arrayStructCol1" in cols
|
|
|
|
assert "col2.structCol1.arrayStructCol1" in cols
|
|
|
|
assert cols["col2.structCol1.arrayStructCol1"]._array_col == "col2.structCol1"
|
|
|
|
assert (
|
|
|
|
str(cols["col2.structCol1.arrayStructCol1"].name)
|
|
|
|
== "`col2.structCol1`.arrayStructCol1"
|
|
|
|
)
|
|
|
|
assert not cols["col2.structCol1.arrayStructCol1"].name.quote
|
|
|
|
assert cols["col2.structCol1.arrayStructCol2"]._array_col == "col2.structCol1"
|
|
|
|
assert (
|
|
|
|
str(cols["col2.structCol1.arrayStructCol2"].name)
|
|
|
|
== "`col2.structCol1`.arrayStructCol2"
|
|
|
|
)
|
|
|
|
assert not cols["col2.structCol1.arrayStructCol2"].name.quote
|
|
|
|
|
|
|
|
cols = get_columns(struct_columns, DatabaseServiceType.Snowflake)
|
|
|
|
assert not any([v._is_array for k, v in cols.items()])
|
|
|
|
assert not cols["col2.structCol1.arrayStructCol1"]._array_col
|
|
|
|
assert cols["col2.structCol1.arrayStructCol1"].name.quote
|
|
|
|
assert (
|
|
|
|
str(cols["col2.structCol1.arrayStructCol1"].name)
|
|
|
|
== "col2.structCol1.arrayStructCol1"
|
|
|
|
)
|
|
|
|
assert not cols["col2.structCol1.arrayStructCol2"]._array_col
|
|
|
|
assert cols["col2.structCol1.arrayStructCol2"].name.quote
|
|
|
|
assert (
|
|
|
|
str(cols["col2.structCol1.arrayStructCol2"].name)
|
|
|
|
== "col2.structCol1.arrayStructCol2"
|
|
|
|
)
|