Fixes #7280 - Implements support for BYTEA DType (#7749)

* Added support for postgres bytea data type

* Added BYTEA support for profiler and converts bytes to hex on results read
This commit is contained in:
Teddy 2022-09-28 13:28:35 +02:00 committed by GitHub
parent 859317494b
commit 962866a30e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 142 additions and 1 deletions

View File

@ -1,6 +1,7 @@
import re
from typing import Any, Dict, List, Optional, Tuple, Type, Union
from sqlalchemy.dialects.postgresql import BYTEA
from sqlalchemy.sql import sqltypes as types
from sqlalchemy.types import TypeEngine
@ -59,6 +60,7 @@ class ColumnTypeParser:
# Custom wrapper types enriching SQA type system
sqa_types.SQAMap: "MAP",
sqa_types.SQAStruct: "STRUCT",
BYTEA: "BYTEA",
}
_SOURCE_TYPE_TO_OM_TYPE = {
@ -172,6 +174,7 @@ class ColumnTypeParser:
"UUID": "UUID",
"POINT": "POINT",
"POLYGON": "POLYGON",
"BYTEA": "BYTEA",
}
_COMPLEX_TYPE = re.compile("^(struct|map|array|uniontype)")

View File

@ -66,6 +66,7 @@ _TYPE_MAP = {
DataType.ENUM: sqlalchemy.Enum,
DataType.JSON: sqlalchemy.JSON,
DataType.UUID: CustomTypes.UUID.value,
DataType.BYTEA: CustomTypes.BYTEA.value,
}
SQA_RESERVED_ATTRIBUTES = ["metadata"]

View File

@ -18,6 +18,7 @@ from sqlalchemy import Date, DateTime, Integer, Numeric, Time
from sqlalchemy.sql.sqltypes import Concatenable, Enum
from metadata.ingestion.source import sqa_types
from metadata.orm_profiler.orm.types.bytea_to_string import ByteaToHex
from metadata.orm_profiler.orm.types.hex_byte_string import HexByteString
from metadata.orm_profiler.orm.types.uuid import UUIDString
from metadata.orm_profiler.registry import TypeRegistry
@ -26,6 +27,7 @@ from metadata.orm_profiler.registry import TypeRegistry
class CustomTypes(TypeRegistry):
BYTES = HexByteString
UUID = UUIDString
BYTEA = ByteaToHex
class Dialects(Enum):

View File

@ -0,0 +1,52 @@
# Copyright 2021 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Expand sqlalchemy types to map them to OpenMetadata DataType
"""
# pylint: disable=duplicate-code
from typing import Optional
from sqlalchemy.sql.sqltypes import String, TypeDecorator
class ByteaToHex(TypeDecorator):
"""convert bytea type to string"""
impl = String
cache_ok = True
@property
def python_type(self):
return str
@staticmethod
def validate(value: bytes):
"""
Make sure the data is of correct type
"""
if not isinstance(value, (memoryview, bytes)):
raise TypeError("ByteaToString columns support only memoryview values.")
def process_result_value(self, value: str, dialect) -> Optional[str]:
"""This is executed during result retrieval
Args:
value: database record
dialect: database dialect
Returns:
hex string representation of the byte value
"""
if not value:
return None
self.validate(value)
return value.hex()

View File

@ -0,0 +1,82 @@
# Copyright 2021 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Test SQA custom types are correctly maped"""
from unittest import TestCase
from sqlalchemy import TEXT, Column, Integer, String, create_engine, func
from sqlalchemy.orm import Session, declarative_base
from metadata.orm_profiler.orm.types.bytea_to_string import ByteaToHex
Base = declarative_base()
class User(Base):
__tablename__ = "users"
id = Column(Integer, primary_key=True)
name = Column(String(256))
fullname = Column(String(256))
nickname = Column(String(256))
comments = Column(TEXT)
age = Column(Integer)
config = Column(ByteaToHex)
class TestCustomTypes(TestCase):
"""test for customer sqa types"""
engine = create_engine("sqlite+pysqlite:///:memory:", echo=False, future=True)
session = Session(engine)
@classmethod
def setUpClass(cls) -> None:
User.__table__.create(bind=cls.engine)
for i in range(10):
data = [
User(
name="John",
fullname="John Doe",
nickname="johnny b goode",
comments="no comments",
age=30,
config=memoryview(b"foo"),
),
User(
name="Jane",
fullname="Jone Doe",
nickname=None,
comments="maybe some comments",
age=31,
config=memoryview(b"bar"),
),
User(
name="John",
fullname="John Doe",
nickname=None,
comments=None,
age=None,
config=memoryview(b"fooBar"),
),
]
cls.session.add_all(data)
cls.session.commit()
def test_bytea_to_hex(self):
"""test ByteaToHex correctly returns an hex from a memoryview value"""
assert isinstance(self.session.query(User.config).first().config, str)
@classmethod
def tearDownClass(cls) -> None:
User.__table__.drop(bind=cls.engine)
return super().tearDownClass()

View File

@ -94,7 +94,8 @@
"VARIANT",
"GEOMETRY",
"POINT",
"POLYGON"
"POLYGON",
"BYTEA"
]
},
"constraint": {