Fixes #7280 - Implements support for BYTEA DType (#7749)

* Added support for postgres bytea data type

* Added BYTEA support for profiler and converts bytes to hex on results read
This commit is contained in:
Teddy 2022-09-28 13:28:35 +02:00 committed by GitHub
parent 859317494b
commit 962866a30e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 142 additions and 1 deletions

View File

@ -1,6 +1,7 @@
import re import re
from typing import Any, Dict, List, Optional, Tuple, Type, Union from typing import Any, Dict, List, Optional, Tuple, Type, Union
from sqlalchemy.dialects.postgresql import BYTEA
from sqlalchemy.sql import sqltypes as types from sqlalchemy.sql import sqltypes as types
from sqlalchemy.types import TypeEngine from sqlalchemy.types import TypeEngine
@ -59,6 +60,7 @@ class ColumnTypeParser:
# Custom wrapper types enriching SQA type system # Custom wrapper types enriching SQA type system
sqa_types.SQAMap: "MAP", sqa_types.SQAMap: "MAP",
sqa_types.SQAStruct: "STRUCT", sqa_types.SQAStruct: "STRUCT",
BYTEA: "BYTEA",
} }
_SOURCE_TYPE_TO_OM_TYPE = { _SOURCE_TYPE_TO_OM_TYPE = {
@ -172,6 +174,7 @@ class ColumnTypeParser:
"UUID": "UUID", "UUID": "UUID",
"POINT": "POINT", "POINT": "POINT",
"POLYGON": "POLYGON", "POLYGON": "POLYGON",
"BYTEA": "BYTEA",
} }
_COMPLEX_TYPE = re.compile("^(struct|map|array|uniontype)") _COMPLEX_TYPE = re.compile("^(struct|map|array|uniontype)")

View File

@ -66,6 +66,7 @@ _TYPE_MAP = {
DataType.ENUM: sqlalchemy.Enum, DataType.ENUM: sqlalchemy.Enum,
DataType.JSON: sqlalchemy.JSON, DataType.JSON: sqlalchemy.JSON,
DataType.UUID: CustomTypes.UUID.value, DataType.UUID: CustomTypes.UUID.value,
DataType.BYTEA: CustomTypes.BYTEA.value,
} }
SQA_RESERVED_ATTRIBUTES = ["metadata"] SQA_RESERVED_ATTRIBUTES = ["metadata"]

View File

@ -18,6 +18,7 @@ from sqlalchemy import Date, DateTime, Integer, Numeric, Time
from sqlalchemy.sql.sqltypes import Concatenable, Enum from sqlalchemy.sql.sqltypes import Concatenable, Enum
from metadata.ingestion.source import sqa_types from metadata.ingestion.source import sqa_types
from metadata.orm_profiler.orm.types.bytea_to_string import ByteaToHex
from metadata.orm_profiler.orm.types.hex_byte_string import HexByteString from metadata.orm_profiler.orm.types.hex_byte_string import HexByteString
from metadata.orm_profiler.orm.types.uuid import UUIDString from metadata.orm_profiler.orm.types.uuid import UUIDString
from metadata.orm_profiler.registry import TypeRegistry from metadata.orm_profiler.registry import TypeRegistry
@ -26,6 +27,7 @@ from metadata.orm_profiler.registry import TypeRegistry
class CustomTypes(TypeRegistry): class CustomTypes(TypeRegistry):
BYTES = HexByteString BYTES = HexByteString
UUID = UUIDString UUID = UUIDString
BYTEA = ByteaToHex
class Dialects(Enum): class Dialects(Enum):

View File

@ -0,0 +1,52 @@
# Copyright 2021 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Expand sqlalchemy types to map them to OpenMetadata DataType
"""
# pylint: disable=duplicate-code
from typing import Optional
from sqlalchemy.sql.sqltypes import String, TypeDecorator
class ByteaToHex(TypeDecorator):
"""convert bytea type to string"""
impl = String
cache_ok = True
@property
def python_type(self):
return str
@staticmethod
def validate(value: bytes):
"""
Make sure the data is of correct type
"""
if not isinstance(value, (memoryview, bytes)):
raise TypeError("ByteaToString columns support only memoryview values.")
def process_result_value(self, value: str, dialect) -> Optional[str]:
"""This is executed during result retrieval
Args:
value: database record
dialect: database dialect
Returns:
hex string representation of the byte value
"""
if not value:
return None
self.validate(value)
return value.hex()

View File

@ -0,0 +1,82 @@
# Copyright 2021 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Test SQA custom types are correctly maped"""
from unittest import TestCase
from sqlalchemy import TEXT, Column, Integer, String, create_engine, func
from sqlalchemy.orm import Session, declarative_base
from metadata.orm_profiler.orm.types.bytea_to_string import ByteaToHex
Base = declarative_base()
class User(Base):
__tablename__ = "users"
id = Column(Integer, primary_key=True)
name = Column(String(256))
fullname = Column(String(256))
nickname = Column(String(256))
comments = Column(TEXT)
age = Column(Integer)
config = Column(ByteaToHex)
class TestCustomTypes(TestCase):
"""test for customer sqa types"""
engine = create_engine("sqlite+pysqlite:///:memory:", echo=False, future=True)
session = Session(engine)
@classmethod
def setUpClass(cls) -> None:
User.__table__.create(bind=cls.engine)
for i in range(10):
data = [
User(
name="John",
fullname="John Doe",
nickname="johnny b goode",
comments="no comments",
age=30,
config=memoryview(b"foo"),
),
User(
name="Jane",
fullname="Jone Doe",
nickname=None,
comments="maybe some comments",
age=31,
config=memoryview(b"bar"),
),
User(
name="John",
fullname="John Doe",
nickname=None,
comments=None,
age=None,
config=memoryview(b"fooBar"),
),
]
cls.session.add_all(data)
cls.session.commit()
def test_bytea_to_hex(self):
"""test ByteaToHex correctly returns an hex from a memoryview value"""
assert isinstance(self.session.query(User.config).first().config, str)
@classmethod
def tearDownClass(cls) -> None:
User.__table__.drop(bind=cls.engine)
return super().tearDownClass()

View File

@ -94,7 +94,8 @@
"VARIANT", "VARIANT",
"GEOMETRY", "GEOMETRY",
"POINT", "POINT",
"POLYGON" "POLYGON",
"BYTEA"
] ]
}, },
"constraint": { "constraint": {