feat: Adding clarity around qualified unions and removing extra lines for structs (#3091)

Co-authored-by: Ravindra Lanka <rlanka@acryl.io>
This commit is contained in:
Gabe Lyons 2021-08-12 16:25:44 -07:00 committed by GitHub
parent 801e39bc40
commit b73d0ebb8b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 136 additions and 166 deletions

View File

@ -1,2 +1,4 @@
export const KEY_SCHEMA_PREFIX = '[key=True].';
export const VERSION_PREFIX = '[version=2.0].';
export const ARRAY_TOKEN = '[type=array]';
export const UNION_TOKEN = '[type=union]';

View File

@ -1,5 +1,4 @@
const ARRAY_TOKEN = '[type=array]';
const UNION_TOKEN = '[type=union]';
import { ARRAY_TOKEN, UNION_TOKEN } from './constants';
export default function translateFieldPathSegment(fieldPathSegment, i, fieldPathParts) {
// for each segment, convert its fieldPath representation into a human readable version

View File

@ -9,7 +9,7 @@ import {
} from '../../../../../../types.generated';
import { convertTagsForUpdate } from '../../../../../shared/tags/utils/convertTagsForUpdate';
import { SchemaDiffSummary } from '../components/SchemaVersionSummary';
import { KEY_SCHEMA_PREFIX, VERSION_PREFIX } from './constants';
import { KEY_SCHEMA_PREFIX, UNION_TOKEN, VERSION_PREFIX } from './constants';
import { ExtendedSchemaFields } from './types';
export function convertEditableSchemaMeta(
@ -85,9 +85,24 @@ export function groupByFieldPath(
const row = { children: undefined, ...rows[rowIndex] };
for (let j = rowIndex - 1; j >= 0; j--) {
if (row.fieldPath.indexOf(rows[j].fieldPath) >= 0) {
parentRow = outputRowByPath[rows[j].fieldPath];
break;
const rowTokens = row.fieldPath.split('.');
const isQualifyingUnionField = rowTokens[rowTokens.length - 3] === UNION_TOKEN;
if (isQualifyingUnionField) {
// in the case of unions, parent will not be a subset of the child
rowTokens.splice(rowTokens.length - 2, 1);
const parentPath = rowTokens.join('.');
console.log({ parentPath });
if (rows[j].fieldPath === parentPath) {
parentRow = outputRowByPath[rows[j].fieldPath];
break;
}
} else if (!isQualifyingUnionField) {
// in the case of structs, arrays, etc, parent will be a subset
if (row.fieldPath.indexOf(rows[j].fieldPath) >= 0) {
parentRow = outputRowByPath[rows[j].fieldPath];
break;
}
}
}

View File

@ -14,7 +14,6 @@ usage stats and data profiles. Therefore, it must satisfy the following requirem
* must be unique across all fields within a schema.
* make schema navigation in the UI more intuitive.
* allow for capturing and editing documentation of intermediate structs/records leading to a filed, and not just fields.
* allow for identifying the type of schema the field is part of, such as a `key-schema` or a `value-schema`.
* allow for future-evolution
@ -49,7 +48,8 @@ the record type `A` or `B`.
The syntax for V2 encoding of the `fieldPath` is captured in the following grammar. The `FieldPathSpec` is essentially
the type annotated path of the member, with each token along the path representing one level of nested member,
starting from the most-enclosing type, leading up to the member. In the case of `unions` that have `one-of` semantics,
the corresponding field will be emitted once for each `member` of the union as its `type`.
the corresponding field will be emitted once for each `member` of the union as its `type`, along with one path
corresponding to the `union` itself.
### Formal Spec:
@ -58,9 +58,8 @@ the corresponding field will be emitted once for each `member` of the union as i
| <VersionToken>.<FieldPathSpec> // when part of a value schema
<VersionToken> := [version=<VersionId>] // [version=2.0] for v2
<PartOfKeySchemaToken> := [key=True] // when part of a key schema
<FieldPathSpec> := <PathTokenPrefix>+ // this is the type prefixed path upto the intermediate type or a field.
<PathTokenPrefix> := <TypePrefixToken> | <FieldToken> // intermeidate type, or a field
<FieldToken> := <TypePrefixToken>.<name_of_the_field> // type of field + field name
<FieldPathSpec> := <FieldToken>+ // this is the type prefixed path field (nested if repeats).
<FieldToken> := <TypePrefixToken>.<name_of_the_field> // type prefixed path of a field.
<TypePrefixToken> := <NestedTypePrefixToken>.<SimpleTypeToken> | <SimpleTypeToken>
<NestedTypePrefixToken> := [type=<NestedType>]
<SimpleTypeToken> := [type=<SimpleType>]
@ -68,16 +67,12 @@ the corresponding field will be emitted once for each `member` of the union as i
<SimpleType> := int | float | double | string | fixed | enum
```
For the [example above](#example-ambiguous-field-path), this encoding would produce the following 5 unique field paths.
Notice that there are 3 unique paths corresponding to the `union`, record `A`, and record `B` intermediate types, and 2
unique paths corresponding to the `A.f` and `B.f` fields.
For the [example above](#example-ambiguous-field-path), this encoding would produce the following 2 unique paths
corresponding to the `A.f` and `B.f` fields.
```python
unique_v2_field_paths = [
"[version=2.0].[type=union]",
"[version=2.0].[type=union].[type=A]",
"[version=2.0].[type=union].[type=A].[type=string].f",
"[version=2.0].[type=union].[type=B]",
"[version=2.0].[type=union].[type=B].[type=string].f"
]
```
@ -86,9 +81,8 @@ NOTE:
- this encoding always ensures uniqueness within a schema since the full type annotation leading to a field is encoded
in the fieldPath itself.
- field paths that end with an intermediate type allow us to capture details at record/struct level as well, in addition
to the fields themselves.
- processing a fieldPath, such as from UI, gets simplified simply by walking each token along the path from left-to-right.
- processing a fieldPath, such as from UI, gets simplified simply by walking each token along the path from
left-to-right.
- adding PartOfKeySchemaToken allows for identifying if the field is part of key-schema.
- adding VersionToken allows for future evolvability.
- to represent `optional` fields, which sometimes are modeled as `unions` in formats like `Avro`, instead of treating it
@ -133,7 +127,6 @@ avro_schema = """
"""
unique_v2_field_paths = [
"[version=2.0].[type=E]", # this is the record E itself that we can maintain documentation for!
"[version=2.0].[type=E].[type=string].a",
"[version=2.0].[type=E].[type=string].b",
]
@ -160,8 +153,6 @@ avro_schema = """
"""
unique_v2_field_paths = [
"[version=2.0].[key=True].[type=SimpleNested]",
"[version=2.0].[key=True].[type=SimpleNested].[type=InnerRcd]",
"[version=2.0].[key=True].[type=SimpleNested].[type=InnerRcd].nestedRcd",
"[version=2.0].[key=True].[type=SimpleNested].[type=InnerRcd].nestedRcd.[type=string].aStringField",
]
@ -189,11 +180,8 @@ avro_schema = """
"""
unique_v2_field_paths = [
"[version=2.0].[type=Recursive]",
"[version=2.0].[type=Recursive].[type=R]",
"[version=2.0].[type=Recursive].[type=R].r",
"[version=2.0].[type=Recursive].[type=R].r.[type=int].anIntegerField",
"[version=2.0].[type=Recursive].[type=R].r.[type=R]",
"[version=2.0].[type=Recursive].[type=R].r.[type=R].aRecursiveField"
]
```
@ -216,10 +204,7 @@ avro_schema ="""
}
"""
unique_v2_field_paths = [
"[version=2.0].[type=TreeNode]",
"[version=2.0].[type=TreeNode].[type=long].value",
"[version=2.0].[type=TreeNode].[type=array]",
"[version=2.0].[type=TreeNode].[type=array].[type=TreeNode]",
"[version=2.0].[type=TreeNode].[type=array].[type=TreeNode].children",
]
```
@ -246,14 +231,11 @@ avro_schema = """
}
"""
unique_v2_field_paths: List[str] = [
"[version=2.0].[key=True].[type=ABFooUnion]",
"[version=2.0].[key=True].[type=ABFooUnion].[type=union]",
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=A]",
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=A].a",
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=A].a.[type=string].f",
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=B]",
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=B].a",
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=B].a.[type=string].f",
"[version=2.0].[key=True].[type=ABUnion].[type=union].a",
"[version=2.0].[key=True].[type=ABUnion].[type=union].[type=A].a",
"[version=2.0].[key=True].[type=ABUnion].[type=union].[type=A].a.[type=string].f",
"[version=2.0].[key=True].[type=ABUnion].[type=union].[type=B].a",
"[version=2.0].[key=True].[type=ABUnion].[type=union].[type=B].a.[type=string].f",
]
```
### Arrays
@ -286,10 +268,6 @@ avro_schema = """
}
"""
unique_v2_field_paths: List[str] = [
"[version=2.0].[type=NestedArray]",
"[version=2.0].[type=NestedArray].[type=array]",
"[version=2.0].[type=NestedArray].[type=array].[type=array]",
"[version=2.0].[type=NestedArray].[type=array].[type=array].[type=Foo]",
"[version=2.0].[type=NestedArray].[type=array].[type=array].[type=Foo].ar",
"[version=2.0].[type=NestedArray].[type=array].[type=array].[type=Foo].ar.[type=long].a",
]
@ -313,8 +291,6 @@ avro_schema = """
}
"""
unique_v2_field_paths = [
"[version=2.0].[type=R]",
"[version=2.0].[type=R].[type=map]",
"[version=2.0].[type=R].[type=map].[type=long].a_map_of_longs_field",
]
@ -357,25 +333,18 @@ avro_schema = """
"""
unique_v2_field_paths: List[str] = [
"[version=2.0].[type=ABFooUnion]",
"[version=2.0].[type=ABFooUnion].[type=union]",
"[version=2.0].[type=ABFooUnion].[type=union].[type=A]",
"[version=2.0].[type=ABFooUnion].[type=union].a",
"[version=2.0].[type=ABFooUnion].[type=union].[type=A].a",
"[version=2.0].[type=ABFooUnion].[type=union].[type=A].a.[type=string].f",
"[version=2.0].[type=ABFooUnion].[type=union].[type=B]",
"[version=2.0].[type=ABFooUnion].[type=union].[type=B].a",
"[version=2.0].[type=ABFooUnion].[type=union].[type=B].a.[type=string].f",
"[version=2.0].[type=ABFooUnion].[type=union].[type=array]",
"[version=2.0].[type=ABFooUnion].[type=union].[type=array].[type=array]",
"[version=2.0].[type=ABFooUnion].[type=union].[type=array].[type=array].[type=Foo]",
"[version=2.0].[type=ABFooUnion].[type=union].[type=array].[type=array].[type=Foo].a",
"[version=2.0].[type=ABFooUnion].[type=union].[type=array].[type=array].[type=Foo].a.[type=long].f",
]
```
For more examples, see
the [unit-tests for AvroToMceSchemaConverter](https://github.com/linkedin/datahub/blob/master/metadata-ingestion/tests/unit/test_schema_util.py)
.
the [unit-tests for AvroToMceSchemaConverter](https://github.com/linkedin/datahub/blob/master/metadata-ingestion/tests/unit/test_schema_util.py).
### Backward-compatibility

View File

@ -129,25 +129,42 @@ class AvroToMceSchemaConverter:
return ".".join(self._prefix_name_stack)
@staticmethod
def _get_type_annotation(schema: ExtendedAvroNestedSchemas) -> str:
if isinstance(schema, avro.schema.RecordSchema):
return f"[type={schema.name}]"
# For fields, just return the plain name.
if isinstance(schema, avro.schema.Field):
return f"{schema.name}"
def _get_simple_native_type(schema: ExtendedAvroNestedSchemas) -> str:
if isinstance(schema, (avro.schema.RecordSchema, avro.schema.Field)):
# For Records, fields, always return the name.
return schema.name
# For optional, use the underlying non-null type
if isinstance(schema, avro.schema.UnionSchema) and len(schema.schemas) == 2:
# Optional types as unions in AVRO. Return underlying non-null sub-type.
(first, second) = schema.schemas
if first.type == avro.schema.NULL:
return f"[type={second.type}]"
return second.type
elif second.type == avro.schema.NULL:
return f"[type={first.type}]"
return first.type
# For everything else, use the schema's type
return f"[type={schema.type}]"
return schema.type
@staticmethod
def _get_type_annotation(schema: ExtendedAvroNestedSchemas) -> str:
simple_native_type = AvroToMceSchemaConverter._get_simple_native_type(schema)
if isinstance(schema, avro.schema.Field):
return simple_native_type
else:
return f"[type={simple_native_type}]"
@staticmethod
def _get_underlying_type_if_option_as_union(
schema: AvroNestedSchemas, default: Optional[AvroNestedSchemas] = None
) -> AvroNestedSchemas:
if isinstance(schema, avro.schema.UnionSchema) and len(schema.schemas) == 2:
(first, second) = schema.schemas
if first.type == avro.schema.NULL:
return second
elif second.type == avro.schema.NULL:
return first
return default
class SchemaFieldEmissionContextManager:
"""Context Manager for MCE SchemaFiled emission
@ -178,32 +195,16 @@ class AvroToMceSchemaConverter:
avro.schema.ArraySchema,
avro.schema.Field,
avro.schema.MapSchema,
avro.schema.UnionSchema,
avro.schema.RecordSchema,
),
)
and self._converter._fields_stack
):
# We are in the context of a non-nested(simple) field.
last_field_schema = self._converter._fields_stack[-1]
description = (
last_field_schema.doc
if last_field_schema.doc
else "No description available."
)
if last_field_schema.has_default:
description = f"{description}\nField default value: {last_field_schema.default}"
with AvroToMceSchemaConverter.SchemaFieldEmissionContextManager(
last_field_schema,
last_field_schema,
self._converter,
description,
) as field_emitter:
yield from field_emitter.emit()
# We are in the context of a non-nested(simple) field or the special-cased union.
yield from self._converter._gen_from_last_field()
else:
# Just emit the SchemaField from schema provided in the Ctor.
schema = self._schema
actual_schema = self._actual_schema
if isinstance(schema, avro.schema.Field):
@ -214,15 +215,24 @@ class AvroToMceSchemaConverter:
schema, schema
)
)
# Emit the schema field provided in the Ctor.
description = self._description
if description is None:
description = schema.props.get("doc", None)
native_data_type = self._converter._prefix_name_stack[-1]
if isinstance(schema, (avro.schema.Field, avro.schema.UnionSchema)):
native_data_type = self._converter._prefix_name_stack[-2]
type_prefix = "[type="
if native_data_type.startswith(type_prefix):
native_data_type = native_data_type[
slice(len(type_prefix), len(native_data_type) - 1)
]
field = SchemaField(
fieldPath=self._converter._get_cur_field_path(),
# Not populating this field for Avro, since this is blowing up the KAFKA message size.
nativeDataType="",
# Populate it with the simple native type for now.
nativeDataType=native_data_type,
type=self._converter._get_column_type(actual_schema.type),
description=description,
recursive=False,
@ -234,18 +244,6 @@ class AvroToMceSchemaConverter:
def __exit__(self, exc_type, exc_val, exc_tb):
self._converter._prefix_name_stack.pop()
@staticmethod
def _get_underlying_type_if_option_as_union(
schema: AvroNestedSchemas, default: Optional[AvroNestedSchemas] = None
) -> AvroNestedSchemas:
if isinstance(schema, avro.schema.UnionSchema) and len(schema.schemas) == 2:
(first, second) = schema.schemas
if first.type == avro.schema.NULL:
return second
elif second.type == avro.schema.NULL:
return first
return default
def _get_sub_schemas(
self, schema: ExtendedAvroNestedSchemas
) -> Generator[avro.schema.Schema, None, None]:
@ -290,13 +288,41 @@ class AvroToMceSchemaConverter:
) -> Generator[SchemaField, None, None]:
"""Handles generation of MCE SchemaFields for an AVRO Field type."""
# NOTE: Here we only manage the field stack and trigger MCE Field generation from this field's type.
# The actual emitting of a field happens
# (a) when another nested record is encountered or (b) a non-nested type has been reached.
# The actual emitting of a field happens when
# (a) another nested record is encountered or
# (b) a non-nested type has been reached or
# (c) during the special-casing for unions.
self._fields_stack.append(field)
for sub_schema in self._get_sub_schemas(field):
yield from self._to_mce_fields(sub_schema)
self._fields_stack.pop()
def _gen_from_last_field(
self, schema_to_recurse: Optional[AvroNestedSchemas] = None
) -> Generator[SchemaField, None, None]:
"""Emits the field most-recent field, optionally triggering sub-schema generation under the field."""
last_field_schema = self._fields_stack[-1]
# Generate the custom-description for the field.
description = (
last_field_schema.doc
if last_field_schema.doc
else "No description available."
)
if last_field_schema.has_default:
description = (
f"{description}\nField default value: {last_field_schema.default}"
)
with AvroToMceSchemaConverter.SchemaFieldEmissionContextManager(
last_field_schema, last_field_schema, self, description
) as f_emit:
yield from f_emit.emit()
if schema_to_recurse is not None:
# Generate the nested sub-schemas under the most-recent field.
for sub_schema in self._get_sub_schemas(schema_to_recurse):
yield from self._to_mce_fields(sub_schema)
def _gen_from_non_field_nested_schemas(
self, schema: AvroNestedSchemas
) -> Generator[SchemaField, None, None]:
@ -315,30 +341,24 @@ class AvroToMceSchemaConverter:
with AvroToMceSchemaConverter.SchemaFieldEmissionContextManager(
schema, actual_schema, self
) as fe_schema:
# Emit non-AVRO field complex schemas(even optional unions that become primitives).
yield from fe_schema.emit()
if isinstance(
actual_schema,
(
avro.schema.UnionSchema,
avro.schema.PrimitiveSchema,
avro.schema.FixedSchema,
avro.schema.EnumSchema,
),
):
# Emit non-AVRO field complex schemas(even optional unions that become primitives) and special-casing for extra union emission.
yield from fe_schema.emit()
if (
isinstance(actual_schema, avro.schema.RecordSchema)
and self._fields_stack
):
# We have encountered a nested record, emit the most-recently seen field.
last_field_schema = self._fields_stack[-1]
description = (
last_field_schema.doc
if last_field_schema.doc
else "No description available."
)
if last_field_schema.has_default:
description = f"{description}\nField default value: {last_field_schema.default}"
with AvroToMceSchemaConverter.SchemaFieldEmissionContextManager(
last_field_schema, last_field_schema, self, description
) as f_emit:
yield from f_emit.emit()
# Generate the nested sub-schemas under the most-recent field.
if recurse:
for sub_schema in self._get_sub_schemas(actual_schema):
yield from self._to_mce_fields(sub_schema)
yield from self._gen_from_last_field(actual_schema if recurse else None)
else:
# We are not yet in the context of any field. Generate all nested sub-schemas under the complex type.
if recurse:

View File

@ -98,8 +98,8 @@ def assret_field_paths_match(
)
def test_avro_schema_to_mce_fields_events_with_nullable_fields(schema):
fields = avro_schema_to_mce_fields(schema)
assert 2 == len(fields)
assert fields[1].nullable
assert 1 == len(fields)
assert fields[0].nullable
def test_avro_schema_to_mce_fields_sample_events_with_different_field_types():
@ -121,8 +121,6 @@ def test_avro_schema_to_mce_fields_sample_events_with_different_field_types():
"""
fields = avro_schema_to_mce_fields(schema)
expected_field_paths = [
"[version=2.0].[type=R]",
"[version=2.0].[type=R].[type=map]",
"[version=2.0].[type=R].[type=map].[type=long].a_map_of_longs_field",
]
assret_field_paths_match(fields, expected_field_paths)
@ -150,7 +148,6 @@ def test_avro_schema_to_mce_fields_record_with_two_fields():
"""
fields = avro_schema_to_mce_fields(schema)
expected_field_paths = [
"[version=2.0].[type=name]",
"[version=2.0].[type=name].[type=string].a",
"[version=2.0].[type=name].[type=string].b",
]
@ -186,11 +183,10 @@ def test_avro_schema_to_mce_fields_with_default():
"""
fields = avro_schema_to_mce_fields(schema)
expected_field_paths = [
"[version=2.0].[type=name]",
"[version=2.0].[type=name].[type=string].aStringField",
]
assret_field_paths_match(fields, expected_field_paths)
description = fields[1].description
description = fields[0].description
assert description and "custom, default value" in description
@ -213,10 +209,7 @@ def test_avro_schema_with_recursion():
"""
fields = avro_schema_to_mce_fields(schema)
expected_field_paths = [
"[version=2.0].[type=TreeNode]",
"[version=2.0].[type=TreeNode].[type=long].value",
"[version=2.0].[type=TreeNode].[type=array]",
"[version=2.0].[type=TreeNode].[type=array].[type=TreeNode]",
"[version=2.0].[type=TreeNode].[type=array].[type=TreeNode].children",
]
assret_field_paths_match(fields, expected_field_paths)
@ -263,11 +256,9 @@ def test_avro_sample_payment_schema_to_mce_fields_with_nesting():
"""
fields = avro_schema_to_mce_fields(schema)
expected_field_paths = [
"[version=2.0].[type=Payment]",
"[version=2.0].[type=Payment].[type=string].id",
"[version=2.0].[type=Payment].[type=double].amount",
"[version=2.0].[type=Payment].[type=string].name",
"[version=2.0].[type=Payment].[type=PhoneNumber]",
"[version=2.0].[type=Payment].[type=PhoneNumber].phoneNumber",
"[version=2.0].[type=Payment].[type=PhoneNumber].phoneNumber.[type=string].areaCode",
"[version=2.0].[type=Payment].[type=PhoneNumber].phoneNumber.[type=string].countryCode",
@ -302,13 +293,10 @@ def test_avro_schema_to_mce_fields_with_nesting_across_records():
fields = avro_schema_to_mce_fields(schema)
expected_field_paths = [
"[version=2.0].[type=union]",
"[version=2.0].[type=union].[type=Address]",
"[version=2.0].[type=union].[type=Address].[type=string].streetAddress",
"[version=2.0].[type=union].[type=Address].[type=string].city",
"[version=2.0].[type=union].[type=Person]",
"[version=2.0].[type=union].[type=Person].[type=string].firstname",
"[version=2.0].[type=union].[type=Person].[type=string].lastname",
"[version=2.0].[type=union].[type=Person].[type=Address]",
"[version=2.0].[type=union].[type=Person].[type=Address].address",
]
assret_field_paths_match(fields, expected_field_paths)
@ -328,7 +316,7 @@ def test_simple_record_with_primitive_types():
"name": "enumField",
"type": {
"type": "enum",
"name": "EnumField",
"name": "MyTestEnumField",
"symbols": [ "TEST", "TEST1" ],
"symbolDoc": {
"TEST": "test enum",
@ -341,7 +329,6 @@ def test_simple_record_with_primitive_types():
"""
fields = avro_schema_to_mce_fields(schema)
expected_field_paths = [
"[version=2.0].[type=Simple]",
"[version=2.0].[type=Simple].[type=string].stringField",
"[version=2.0].[type=Simple].[type=boolean].booleanField",
"[version=2.0].[type=Simple].[type=int].intField",
@ -371,8 +358,6 @@ def test_simple_nested_record_with_a_string_field_for_key_schema():
"""
fields = avro_schema_to_mce_fields(schema, True)
expected_field_paths: List[str] = [
"[version=2.0].[key=True].[type=SimpleNested]",
"[version=2.0].[key=True].[type=SimpleNested].[type=InnerRcd]",
"[version=2.0].[key=True].[type=SimpleNested].[type=InnerRcd].nestedRcd",
"[version=2.0].[key=True].[type=SimpleNested].[type=InnerRcd].nestedRcd.[type=string].aStringField",
]
@ -407,15 +392,17 @@ def test_union_with_nested_record_of_union():
"""
fields = avro_schema_to_mce_fields(schema)
expected_field_paths = [
"[version=2.0].[type=UnionSample]",
"[version=2.0].[type=UnionSample].[type=union]",
"[version=2.0].[type=UnionSample].[type=union].aUnion",
"[version=2.0].[type=UnionSample].[type=union].[type=boolean].aUnion",
"[version=2.0].[type=UnionSample].[type=union].[type=Rcd]",
"[version=2.0].[type=UnionSample].[type=union].[type=Rcd].aUnion",
"[version=2.0].[type=UnionSample].[type=union].[type=Rcd].aUnion.[type=string].aNullableStringField",
]
assret_field_paths_match(fields, expected_field_paths)
assert isinstance(fields[5].type.type, StringTypeClass)
assert isinstance(fields[3].type.type, StringTypeClass)
assert fields[0].nativeDataType == "union"
assert fields[1].nativeDataType == "boolean"
assert fields[2].nativeDataType == "Rcd"
assert fields[3].nativeDataType == "string"
def test_nested_arrays():
@ -448,10 +435,6 @@ def test_nested_arrays():
"""
fields = avro_schema_to_mce_fields(schema)
expected_field_paths: List[str] = [
"[version=2.0].[type=NestedArray]",
"[version=2.0].[type=NestedArray].[type=array]",
"[version=2.0].[type=NestedArray].[type=array].[type=array]",
"[version=2.0].[type=NestedArray].[type=array].[type=array].[type=Foo]",
"[version=2.0].[type=NestedArray].[type=array].[type=array].[type=Foo].ar",
"[version=2.0].[type=NestedArray].[type=array].[type=array].[type=Foo].ar.[type=long].a",
]
@ -485,13 +468,10 @@ def test_map_of_union_of_int_and_record_of_union():
"""
fields = avro_schema_to_mce_fields(schema)
expected_field_paths = [
"[version=2.0].[type=MapSample]",
"[version=2.0].[type=MapSample].[type=map]",
"[version=2.0].[type=MapSample].[type=map].[type=union]",
"[version=2.0].[type=MapSample].[type=map].[type=union].aMap",
"[version=2.0].[type=MapSample].[type=map].[type=union].[type=int].aMap",
"[version=2.0].[type=MapSample].[type=map].[type=union].[type=Rcd]",
"[version=2.0].[type=MapSample].[type=map].[type=union].[type=Rcd].aMap",
"[version=2.0].[type=MapSample].[type=map].[type=union].[type=Rcd].aMap.[type=union]",
"[version=2.0].[type=MapSample].[type=map].[type=union].[type=Rcd].aMap.[type=union].aUnion",
"[version=2.0].[type=MapSample].[type=map].[type=union].[type=Rcd].aMap.[type=union].[type=string].aUnion",
"[version=2.0].[type=MapSample].[type=map].[type=union].[type=Rcd].aMap.[type=union].[type=int].aUnion",
]
@ -519,11 +499,8 @@ def test_recursive_avro():
"""
fields = avro_schema_to_mce_fields(schema)
expected_field_paths = [
"[version=2.0].[type=Recursive]",
"[version=2.0].[type=Recursive].[type=R]",
"[version=2.0].[type=Recursive].[type=R].r",
"[version=2.0].[type=Recursive].[type=R].r.[type=int].anIntegerField",
"[version=2.0].[type=Recursive].[type=R].r.[type=R]",
"[version=2.0].[type=Recursive].[type=R].r.[type=R].aRecursiveField",
]
assret_field_paths_match(fields, expected_field_paths)
@ -564,17 +541,11 @@ def test_needs_disambiguation_nested_union_of_records_with_same_field_name():
"""
fields = avro_schema_to_mce_fields(schema)
expected_field_paths: List[str] = [
"[version=2.0].[type=ABFooUnion]",
"[version=2.0].[type=ABFooUnion].[type=union]",
"[version=2.0].[type=ABFooUnion].[type=union].[type=A]",
"[version=2.0].[type=ABFooUnion].[type=union].a",
"[version=2.0].[type=ABFooUnion].[type=union].[type=A].a",
"[version=2.0].[type=ABFooUnion].[type=union].[type=A].a.[type=string].f",
"[version=2.0].[type=ABFooUnion].[type=union].[type=B]",
"[version=2.0].[type=ABFooUnion].[type=union].[type=B].a",
"[version=2.0].[type=ABFooUnion].[type=union].[type=B].a.[type=string].f",
"[version=2.0].[type=ABFooUnion].[type=union].[type=array]",
"[version=2.0].[type=ABFooUnion].[type=union].[type=array].[type=array]",
"[version=2.0].[type=ABFooUnion].[type=union].[type=array].[type=array].[type=Foo]",
"[version=2.0].[type=ABFooUnion].[type=union].[type=array].[type=array].[type=Foo].a",
"[version=2.0].[type=ABFooUnion].[type=union].[type=array].[type=array].[type=Foo].a.[type=long].f",
]
@ -637,17 +608,11 @@ def test_key_schema_handling():
"""
fields: List[SchemaField] = avro_schema_to_mce_fields(schema, is_key_schema=True)
expected_field_paths: List[str] = [
"[version=2.0].[key=True].[type=ABFooUnion]",
"[version=2.0].[key=True].[type=ABFooUnion].[type=union]",
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=A]",
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].a",
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=A].a",
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=A].a.[type=string].f",
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=B]",
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=B].a",
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=B].a.[type=string].f",
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=array]",
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=array].[type=array]",
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=array].[type=array].[type=Foo]",
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=array].[type=array].[type=Foo].a",
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=array].[type=array].[type=Foo].a.[type=long].f",
]