datahub/metadata-ingestion/tests/unit/test_schema_util.py

131 lines
2.8 KiB
Python
Raw Normal View History

import unittest
from datahub.ingestion.extractor.schema_util import avro_schema_to_mce_fields
SCHEMA_WITH_OPTIONAL_FIELD_VIA_UNION_TYPE = """
{
"type": "record",
"name": "some.event.name",
"namespace": "some.event.namespace",
"fields": [
{
"name": "my.field",
"type": ["null", "string"],
"doc": "some.doc"
}
]
}
"""
SCHEMA_WITH_OPTIONAL_FIELD_VIA_UNION_TYPE_NULL_ISNT_FIRST_IN_UNION = """
{
"type": "record",
"name": "some.event.name",
"namespace": "some.event.namespace",
"fields": [
{
"name": "my.field",
"type": ["string", "null"],
"doc": "some.doc"
}
]
}
"""
SCHEMA_WITH_OPTIONAL_FIELD_VIA_PRIMITIVE_TYPE = """
{
"type": "record",
"name": "some.event.name",
"namespace": "some.event.namespace",
"fields": [
{
"name": "my.field",
"type": "null",
"doc": "some.doc"
}
]
}
"""
SCHEMA_WITH_MAP_TYPE_FIELD = """
{
"type": "record",
"name": "some.event.name",
"namespace": "some.namespace",
"fields": [
{
"name": "some.field.name",
"type": {
"type": "map",
"values": "long"
}
}
]
}
"""
SCHEMA_WITH_TOP_LEVEL_PRIMITIVE_FIELD = """
{
"type": "string"
}
"""
SCHEMA_WITH_TWO_FIELD_RECORD = """
{
"type": "record",
"name": "some.event.name",
"namespace": "some.event.namespace",
"fields": [
{
"name": "my.field.A",
"type": "string",
"doc": "some.doc"
},
{
"name": "my.field.B",
"type": "string",
"doc": "some.doc"
}
]
}
"""
class SchemaUtilTest(unittest.TestCase):
def test_avro_schema_to_mce_fields_events_with_nullable_fields(self):
examples = [
SCHEMA_WITH_OPTIONAL_FIELD_VIA_UNION_TYPE,
SCHEMA_WITH_OPTIONAL_FIELD_VIA_UNION_TYPE_NULL_ISNT_FIRST_IN_UNION,
SCHEMA_WITH_OPTIONAL_FIELD_VIA_PRIMITIVE_TYPE,
]
for schema in examples:
fields = avro_schema_to_mce_fields(schema)
self.assertEqual(1, len(fields))
self.assertTrue(fields[0].nullable)
def test_avro_schema_to_mce_fields_sample_events_with_different_field_types(self):
examples = [SCHEMA_WITH_MAP_TYPE_FIELD]
for schema in examples:
fields = avro_schema_to_mce_fields(schema)
self.assertEqual(1, len(fields))
def test_avro_schema_to_mce_fields_record_with_two_fields(self):
examples = [SCHEMA_WITH_TWO_FIELD_RECORD]
for schema in examples:
fields = avro_schema_to_mce_fields(schema)
self.assertEqual(2, len(fields))
def test_avro_schema_to_mce_fields_toplevel_isnt_a_record(self):
examples = [SCHEMA_WITH_TOP_LEVEL_PRIMITIVE_FIELD]
for schema in examples:
fields = avro_schema_to_mce_fields(schema)
self.assertEqual(1, len(fields))