2021-04-08 23:00:01 +02:00
|
|
|
import unittest
|
|
|
|
|
|
|
|
from datahub.ingestion.extractor.schema_util import avro_schema_to_mce_fields
|
|
|
|
|
2021-04-10 02:36:01 +02:00
|
|
|
SCHEMA_WITH_OPTIONAL_FIELD_VIA_UNION_TYPE = """
|
2021-04-08 23:00:01 +02:00
|
|
|
{
|
|
|
|
"type": "record",
|
|
|
|
"name": "some.event.name",
|
|
|
|
"namespace": "some.event.namespace",
|
|
|
|
"fields": [
|
|
|
|
{
|
|
|
|
"name": "my.field",
|
|
|
|
"type": ["null", "string"],
|
|
|
|
"doc": "some.doc"
|
|
|
|
}
|
|
|
|
]
|
|
|
|
}
|
|
|
|
"""
|
|
|
|
|
2021-04-10 02:36:01 +02:00
|
|
|
SCHEMA_WITH_OPTIONAL_FIELD_VIA_UNION_TYPE_NULL_ISNT_FIRST_IN_UNION = """
|
2021-04-08 23:00:01 +02:00
|
|
|
{
|
|
|
|
"type": "record",
|
|
|
|
"name": "some.event.name",
|
|
|
|
"namespace": "some.event.namespace",
|
|
|
|
"fields": [
|
|
|
|
{
|
|
|
|
"name": "my.field",
|
|
|
|
"type": ["string", "null"],
|
|
|
|
"doc": "some.doc"
|
|
|
|
}
|
|
|
|
]
|
|
|
|
}
|
|
|
|
"""
|
|
|
|
|
2021-04-10 02:36:01 +02:00
|
|
|
SCHEMA_WITH_OPTIONAL_FIELD_VIA_PRIMITIVE_TYPE = """
|
2021-04-08 23:00:01 +02:00
|
|
|
{
|
|
|
|
"type": "record",
|
|
|
|
"name": "some.event.name",
|
|
|
|
"namespace": "some.event.namespace",
|
|
|
|
"fields": [
|
|
|
|
{
|
|
|
|
"name": "my.field",
|
|
|
|
"type": "null",
|
|
|
|
"doc": "some.doc"
|
|
|
|
}
|
|
|
|
]
|
|
|
|
}
|
|
|
|
"""
|
|
|
|
|
2021-04-08 23:23:12 +02:00
|
|
|
SCHEMA_WITH_MAP_TYPE_FIELD = """
|
|
|
|
{
|
|
|
|
"type": "record",
|
|
|
|
"name": "some.event.name",
|
|
|
|
"namespace": "some.namespace",
|
|
|
|
"fields": [
|
|
|
|
{
|
|
|
|
"name": "some.field.name",
|
|
|
|
"type": {
|
|
|
|
"type": "map",
|
|
|
|
"values": "long"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
]
|
|
|
|
}
|
|
|
|
"""
|
|
|
|
|
2021-04-10 02:36:01 +02:00
|
|
|
SCHEMA_WITH_TOP_LEVEL_PRIMITIVE_FIELD = """
|
|
|
|
{
|
|
|
|
"type": "string"
|
|
|
|
}
|
|
|
|
"""
|
|
|
|
|
|
|
|
SCHEMA_WITH_TWO_FIELD_RECORD = """
|
|
|
|
{
|
|
|
|
"type": "record",
|
|
|
|
"name": "some.event.name",
|
|
|
|
"namespace": "some.event.namespace",
|
|
|
|
"fields": [
|
|
|
|
{
|
|
|
|
"name": "my.field.A",
|
|
|
|
"type": "string",
|
|
|
|
"doc": "some.doc"
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"name": "my.field.B",
|
|
|
|
"type": "string",
|
|
|
|
"doc": "some.doc"
|
|
|
|
}
|
|
|
|
]
|
|
|
|
}
|
|
|
|
"""
|
|
|
|
|
2021-04-08 23:00:01 +02:00
|
|
|
|
|
|
|
class SchemaUtilTest(unittest.TestCase):
|
|
|
|
def test_avro_schema_to_mce_fields_events_with_nullable_fields(self):
|
|
|
|
|
2021-04-10 02:36:01 +02:00
|
|
|
examples = [
|
|
|
|
SCHEMA_WITH_OPTIONAL_FIELD_VIA_UNION_TYPE,
|
|
|
|
SCHEMA_WITH_OPTIONAL_FIELD_VIA_UNION_TYPE_NULL_ISNT_FIRST_IN_UNION,
|
|
|
|
SCHEMA_WITH_OPTIONAL_FIELD_VIA_PRIMITIVE_TYPE,
|
2021-04-08 23:00:01 +02:00
|
|
|
]
|
|
|
|
|
2021-04-10 02:36:01 +02:00
|
|
|
for schema in examples:
|
|
|
|
fields = avro_schema_to_mce_fields(schema)
|
2021-04-08 23:00:01 +02:00
|
|
|
self.assertEqual(1, len(fields))
|
|
|
|
self.assertTrue(fields[0].nullable)
|
2021-04-08 23:23:12 +02:00
|
|
|
|
|
|
|
def test_avro_schema_to_mce_fields_sample_events_with_different_field_types(self):
|
|
|
|
|
2021-04-10 02:36:01 +02:00
|
|
|
examples = [SCHEMA_WITH_MAP_TYPE_FIELD]
|
|
|
|
|
|
|
|
for schema in examples:
|
|
|
|
fields = avro_schema_to_mce_fields(schema)
|
|
|
|
self.assertEqual(1, len(fields))
|
|
|
|
|
|
|
|
def test_avro_schema_to_mce_fields_record_with_two_fields(self):
|
|
|
|
|
|
|
|
examples = [SCHEMA_WITH_TWO_FIELD_RECORD]
|
|
|
|
|
|
|
|
for schema in examples:
|
|
|
|
fields = avro_schema_to_mce_fields(schema)
|
|
|
|
self.assertEqual(2, len(fields))
|
|
|
|
|
|
|
|
def test_avro_schema_to_mce_fields_toplevel_isnt_a_record(self):
|
|
|
|
|
|
|
|
examples = [SCHEMA_WITH_TOP_LEVEL_PRIMITIVE_FIELD]
|
2021-04-08 23:23:12 +02:00
|
|
|
|
2021-04-10 02:36:01 +02:00
|
|
|
for schema in examples:
|
2021-04-08 23:23:12 +02:00
|
|
|
fields = avro_schema_to_mce_fields(schema)
|
|
|
|
self.assertEqual(1, len(fields))
|