feat: Adding clarity around qualified unions and removing extra lines for structs (#3091)

Co-authored-by: Ravindra Lanka <rlanka@acryl.io>
This commit is contained in:
Gabe Lyons 2021-08-12 16:25:44 -07:00 committed by GitHub
parent 801e39bc40
commit b73d0ebb8b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 136 additions and 166 deletions

View File

@ -1,2 +1,4 @@
export const KEY_SCHEMA_PREFIX = '[key=True].'; export const KEY_SCHEMA_PREFIX = '[key=True].';
export const VERSION_PREFIX = '[version=2.0].'; export const VERSION_PREFIX = '[version=2.0].';
export const ARRAY_TOKEN = '[type=array]';
export const UNION_TOKEN = '[type=union]';

View File

@ -1,5 +1,4 @@
const ARRAY_TOKEN = '[type=array]'; import { ARRAY_TOKEN, UNION_TOKEN } from './constants';
const UNION_TOKEN = '[type=union]';
export default function translateFieldPathSegment(fieldPathSegment, i, fieldPathParts) { export default function translateFieldPathSegment(fieldPathSegment, i, fieldPathParts) {
// for each segment, convert its fieldPath representation into a human readable version // for each segment, convert its fieldPath representation into a human readable version

View File

@ -9,7 +9,7 @@ import {
} from '../../../../../../types.generated'; } from '../../../../../../types.generated';
import { convertTagsForUpdate } from '../../../../../shared/tags/utils/convertTagsForUpdate'; import { convertTagsForUpdate } from '../../../../../shared/tags/utils/convertTagsForUpdate';
import { SchemaDiffSummary } from '../components/SchemaVersionSummary'; import { SchemaDiffSummary } from '../components/SchemaVersionSummary';
import { KEY_SCHEMA_PREFIX, VERSION_PREFIX } from './constants'; import { KEY_SCHEMA_PREFIX, UNION_TOKEN, VERSION_PREFIX } from './constants';
import { ExtendedSchemaFields } from './types'; import { ExtendedSchemaFields } from './types';
export function convertEditableSchemaMeta( export function convertEditableSchemaMeta(
@ -85,9 +85,24 @@ export function groupByFieldPath(
const row = { children: undefined, ...rows[rowIndex] }; const row = { children: undefined, ...rows[rowIndex] };
for (let j = rowIndex - 1; j >= 0; j--) { for (let j = rowIndex - 1; j >= 0; j--) {
if (row.fieldPath.indexOf(rows[j].fieldPath) >= 0) { const rowTokens = row.fieldPath.split('.');
parentRow = outputRowByPath[rows[j].fieldPath]; const isQualifyingUnionField = rowTokens[rowTokens.length - 3] === UNION_TOKEN;
break; if (isQualifyingUnionField) {
// in the case of unions, parent will not be a subset of the child
rowTokens.splice(rowTokens.length - 2, 1);
const parentPath = rowTokens.join('.');
console.log({ parentPath });
if (rows[j].fieldPath === parentPath) {
parentRow = outputRowByPath[rows[j].fieldPath];
break;
}
} else if (!isQualifyingUnionField) {
// in the case of structs, arrays, etc, parent will be a subset
if (row.fieldPath.indexOf(rows[j].fieldPath) >= 0) {
parentRow = outputRowByPath[rows[j].fieldPath];
break;
}
} }
} }

View File

@ -14,7 +14,6 @@ usage stats and data profiles. Therefore, it must satisfy the following requirem
* must be unique across all fields within a schema. * must be unique across all fields within a schema.
* make schema navigation in the UI more intuitive. * make schema navigation in the UI more intuitive.
* allow for capturing and editing documentation of intermediate structs/records leading to a filed, and not just fields.
* allow for identifying the type of schema the field is part of, such as a `key-schema` or a `value-schema`. * allow for identifying the type of schema the field is part of, such as a `key-schema` or a `value-schema`.
* allow for future-evolution * allow for future-evolution
@ -49,7 +48,8 @@ the record type `A` or `B`.
The syntax for V2 encoding of the `fieldPath` is captured in the following grammar. The `FieldPathSpec` is essentially The syntax for V2 encoding of the `fieldPath` is captured in the following grammar. The `FieldPathSpec` is essentially
the type annotated path of the member, with each token along the path representing one level of nested member, the type annotated path of the member, with each token along the path representing one level of nested member,
starting from the most-enclosing type, leading up to the member. In the case of `unions` that have `one-of` semantics, starting from the most-enclosing type, leading up to the member. In the case of `unions` that have `one-of` semantics,
the corresponding field will be emitted once for each `member` of the union as its `type`. the corresponding field will be emitted once for each `member` of the union as its `type`, along with one path
corresponding to the `union` itself.
### Formal Spec: ### Formal Spec:
@ -58,9 +58,8 @@ the corresponding field will be emitted once for each `member` of the union as i
| <VersionToken>.<FieldPathSpec> // when part of a value schema | <VersionToken>.<FieldPathSpec> // when part of a value schema
<VersionToken> := [version=<VersionId>] // [version=2.0] for v2 <VersionToken> := [version=<VersionId>] // [version=2.0] for v2
<PartOfKeySchemaToken> := [key=True] // when part of a key schema <PartOfKeySchemaToken> := [key=True] // when part of a key schema
<FieldPathSpec> := <PathTokenPrefix>+ // this is the type prefixed path upto the intermediate type or a field. <FieldPathSpec> := <FieldToken>+ // this is the type prefixed path field (nested if repeats).
<PathTokenPrefix> := <TypePrefixToken> | <FieldToken> // intermeidate type, or a field <FieldToken> := <TypePrefixToken>.<name_of_the_field> // type prefixed path of a field.
<FieldToken> := <TypePrefixToken>.<name_of_the_field> // type of field + field name
<TypePrefixToken> := <NestedTypePrefixToken>.<SimpleTypeToken> | <SimpleTypeToken> <TypePrefixToken> := <NestedTypePrefixToken>.<SimpleTypeToken> | <SimpleTypeToken>
<NestedTypePrefixToken> := [type=<NestedType>] <NestedTypePrefixToken> := [type=<NestedType>]
<SimpleTypeToken> := [type=<SimpleType>] <SimpleTypeToken> := [type=<SimpleType>]
@ -68,16 +67,12 @@ the corresponding field will be emitted once for each `member` of the union as i
<SimpleType> := int | float | double | string | fixed | enum <SimpleType> := int | float | double | string | fixed | enum
``` ```
For the [example above](#example-ambiguous-field-path), this encoding would produce the following 5 unique field paths. For the [example above](#example-ambiguous-field-path), this encoding would produce the following 2 unique paths
Notice that there are 3 unique paths corresponding to the `union`, record `A`, and record `B` intermediate types, and 2 corresponding to the `A.f` and `B.f` fields.
unique paths corresponding to the `A.f` and `B.f` fields.
```python ```python
unique_v2_field_paths = [ unique_v2_field_paths = [
"[version=2.0].[type=union]",
"[version=2.0].[type=union].[type=A]",
"[version=2.0].[type=union].[type=A].[type=string].f", "[version=2.0].[type=union].[type=A].[type=string].f",
"[version=2.0].[type=union].[type=B]",
"[version=2.0].[type=union].[type=B].[type=string].f" "[version=2.0].[type=union].[type=B].[type=string].f"
] ]
``` ```
@ -86,9 +81,8 @@ NOTE:
- this encoding always ensures uniqueness within a schema since the full type annotation leading to a field is encoded - this encoding always ensures uniqueness within a schema since the full type annotation leading to a field is encoded
in the fieldPath itself. in the fieldPath itself.
- field paths that end with an intermediate type allow us to capture details at record/struct level as well, in addition - processing a fieldPath, such as from UI, gets simplified simply by walking each token along the path from
to the fields themselves. left-to-right.
- processing a fieldPath, such as from UI, gets simplified simply by walking each token along the path from left-to-right.
- adding PartOfKeySchemaToken allows for identifying if the field is part of key-schema. - adding PartOfKeySchemaToken allows for identifying if the field is part of key-schema.
- adding VersionToken allows for future evolvability. - adding VersionToken allows for future evolvability.
- to represent `optional` fields, which sometimes are modeled as `unions` in formats like `Avro`, instead of treating it - to represent `optional` fields, which sometimes are modeled as `unions` in formats like `Avro`, instead of treating it
@ -133,7 +127,6 @@ avro_schema = """
""" """
unique_v2_field_paths = [ unique_v2_field_paths = [
"[version=2.0].[type=E]", # this is the record E itself that we can maintain documentation for!
"[version=2.0].[type=E].[type=string].a", "[version=2.0].[type=E].[type=string].a",
"[version=2.0].[type=E].[type=string].b", "[version=2.0].[type=E].[type=string].b",
] ]
@ -160,8 +153,6 @@ avro_schema = """
""" """
unique_v2_field_paths = [ unique_v2_field_paths = [
"[version=2.0].[key=True].[type=SimpleNested]",
"[version=2.0].[key=True].[type=SimpleNested].[type=InnerRcd]",
"[version=2.0].[key=True].[type=SimpleNested].[type=InnerRcd].nestedRcd", "[version=2.0].[key=True].[type=SimpleNested].[type=InnerRcd].nestedRcd",
"[version=2.0].[key=True].[type=SimpleNested].[type=InnerRcd].nestedRcd.[type=string].aStringField", "[version=2.0].[key=True].[type=SimpleNested].[type=InnerRcd].nestedRcd.[type=string].aStringField",
] ]
@ -189,11 +180,8 @@ avro_schema = """
""" """
unique_v2_field_paths = [ unique_v2_field_paths = [
"[version=2.0].[type=Recursive]",
"[version=2.0].[type=Recursive].[type=R]",
"[version=2.0].[type=Recursive].[type=R].r", "[version=2.0].[type=Recursive].[type=R].r",
"[version=2.0].[type=Recursive].[type=R].r.[type=int].anIntegerField", "[version=2.0].[type=Recursive].[type=R].r.[type=int].anIntegerField",
"[version=2.0].[type=Recursive].[type=R].r.[type=R]",
"[version=2.0].[type=Recursive].[type=R].r.[type=R].aRecursiveField" "[version=2.0].[type=Recursive].[type=R].r.[type=R].aRecursiveField"
] ]
``` ```
@ -216,10 +204,7 @@ avro_schema ="""
} }
""" """
unique_v2_field_paths = [ unique_v2_field_paths = [
"[version=2.0].[type=TreeNode]",
"[version=2.0].[type=TreeNode].[type=long].value", "[version=2.0].[type=TreeNode].[type=long].value",
"[version=2.0].[type=TreeNode].[type=array]",
"[version=2.0].[type=TreeNode].[type=array].[type=TreeNode]",
"[version=2.0].[type=TreeNode].[type=array].[type=TreeNode].children", "[version=2.0].[type=TreeNode].[type=array].[type=TreeNode].children",
] ]
``` ```
@ -246,14 +231,11 @@ avro_schema = """
} }
""" """
unique_v2_field_paths: List[str] = [ unique_v2_field_paths: List[str] = [
"[version=2.0].[key=True].[type=ABFooUnion]", "[version=2.0].[key=True].[type=ABUnion].[type=union].a",
"[version=2.0].[key=True].[type=ABFooUnion].[type=union]", "[version=2.0].[key=True].[type=ABUnion].[type=union].[type=A].a",
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=A]", "[version=2.0].[key=True].[type=ABUnion].[type=union].[type=A].a.[type=string].f",
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=A].a", "[version=2.0].[key=True].[type=ABUnion].[type=union].[type=B].a",
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=A].a.[type=string].f", "[version=2.0].[key=True].[type=ABUnion].[type=union].[type=B].a.[type=string].f",
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=B]",
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=B].a",
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=B].a.[type=string].f",
] ]
``` ```
### Arrays ### Arrays
@ -286,10 +268,6 @@ avro_schema = """
} }
""" """
unique_v2_field_paths: List[str] = [ unique_v2_field_paths: List[str] = [
"[version=2.0].[type=NestedArray]",
"[version=2.0].[type=NestedArray].[type=array]",
"[version=2.0].[type=NestedArray].[type=array].[type=array]",
"[version=2.0].[type=NestedArray].[type=array].[type=array].[type=Foo]",
"[version=2.0].[type=NestedArray].[type=array].[type=array].[type=Foo].ar", "[version=2.0].[type=NestedArray].[type=array].[type=array].[type=Foo].ar",
"[version=2.0].[type=NestedArray].[type=array].[type=array].[type=Foo].ar.[type=long].a", "[version=2.0].[type=NestedArray].[type=array].[type=array].[type=Foo].ar.[type=long].a",
] ]
@ -313,8 +291,6 @@ avro_schema = """
} }
""" """
unique_v2_field_paths = [ unique_v2_field_paths = [
"[version=2.0].[type=R]",
"[version=2.0].[type=R].[type=map]",
"[version=2.0].[type=R].[type=map].[type=long].a_map_of_longs_field", "[version=2.0].[type=R].[type=map].[type=long].a_map_of_longs_field",
] ]
@ -357,25 +333,18 @@ avro_schema = """
""" """
unique_v2_field_paths: List[str] = [ unique_v2_field_paths: List[str] = [
"[version=2.0].[type=ABFooUnion]", "[version=2.0].[type=ABFooUnion].[type=union].a",
"[version=2.0].[type=ABFooUnion].[type=union]",
"[version=2.0].[type=ABFooUnion].[type=union].[type=A]",
"[version=2.0].[type=ABFooUnion].[type=union].[type=A].a", "[version=2.0].[type=ABFooUnion].[type=union].[type=A].a",
"[version=2.0].[type=ABFooUnion].[type=union].[type=A].a.[type=string].f", "[version=2.0].[type=ABFooUnion].[type=union].[type=A].a.[type=string].f",
"[version=2.0].[type=ABFooUnion].[type=union].[type=B]",
"[version=2.0].[type=ABFooUnion].[type=union].[type=B].a", "[version=2.0].[type=ABFooUnion].[type=union].[type=B].a",
"[version=2.0].[type=ABFooUnion].[type=union].[type=B].a.[type=string].f", "[version=2.0].[type=ABFooUnion].[type=union].[type=B].a.[type=string].f",
"[version=2.0].[type=ABFooUnion].[type=union].[type=array]",
"[version=2.0].[type=ABFooUnion].[type=union].[type=array].[type=array]",
"[version=2.0].[type=ABFooUnion].[type=union].[type=array].[type=array].[type=Foo]",
"[version=2.0].[type=ABFooUnion].[type=union].[type=array].[type=array].[type=Foo].a", "[version=2.0].[type=ABFooUnion].[type=union].[type=array].[type=array].[type=Foo].a",
"[version=2.0].[type=ABFooUnion].[type=union].[type=array].[type=array].[type=Foo].a.[type=long].f", "[version=2.0].[type=ABFooUnion].[type=union].[type=array].[type=array].[type=Foo].a.[type=long].f",
] ]
``` ```
For more examples, see For more examples, see
the [unit-tests for AvroToMceSchemaConverter](https://github.com/linkedin/datahub/blob/master/metadata-ingestion/tests/unit/test_schema_util.py) the [unit-tests for AvroToMceSchemaConverter](https://github.com/linkedin/datahub/blob/master/metadata-ingestion/tests/unit/test_schema_util.py).
.
### Backward-compatibility ### Backward-compatibility

View File

@ -129,25 +129,42 @@ class AvroToMceSchemaConverter:
return ".".join(self._prefix_name_stack) return ".".join(self._prefix_name_stack)
@staticmethod @staticmethod
def _get_type_annotation(schema: ExtendedAvroNestedSchemas) -> str: def _get_simple_native_type(schema: ExtendedAvroNestedSchemas) -> str:
if isinstance(schema, avro.schema.RecordSchema): if isinstance(schema, (avro.schema.RecordSchema, avro.schema.Field)):
return f"[type={schema.name}]" # For Records, fields, always return the name.
return schema.name
# For fields, just return the plain name.
if isinstance(schema, avro.schema.Field):
return f"{schema.name}"
# For optional, use the underlying non-null type # For optional, use the underlying non-null type
if isinstance(schema, avro.schema.UnionSchema) and len(schema.schemas) == 2: if isinstance(schema, avro.schema.UnionSchema) and len(schema.schemas) == 2:
# Optional types as unions in AVRO. Return underlying non-null sub-type. # Optional types as unions in AVRO. Return underlying non-null sub-type.
(first, second) = schema.schemas (first, second) = schema.schemas
if first.type == avro.schema.NULL: if first.type == avro.schema.NULL:
return f"[type={second.type}]" return second.type
elif second.type == avro.schema.NULL: elif second.type == avro.schema.NULL:
return f"[type={first.type}]" return first.type
# For everything else, use the schema's type # For everything else, use the schema's type
return f"[type={schema.type}]" return schema.type
@staticmethod
def _get_type_annotation(schema: ExtendedAvroNestedSchemas) -> str:
simple_native_type = AvroToMceSchemaConverter._get_simple_native_type(schema)
if isinstance(schema, avro.schema.Field):
return simple_native_type
else:
return f"[type={simple_native_type}]"
@staticmethod
def _get_underlying_type_if_option_as_union(
schema: AvroNestedSchemas, default: Optional[AvroNestedSchemas] = None
) -> AvroNestedSchemas:
if isinstance(schema, avro.schema.UnionSchema) and len(schema.schemas) == 2:
(first, second) = schema.schemas
if first.type == avro.schema.NULL:
return second
elif second.type == avro.schema.NULL:
return first
return default
class SchemaFieldEmissionContextManager: class SchemaFieldEmissionContextManager:
"""Context Manager for MCE SchemaFiled emission """Context Manager for MCE SchemaFiled emission
@ -178,32 +195,16 @@ class AvroToMceSchemaConverter:
avro.schema.ArraySchema, avro.schema.ArraySchema,
avro.schema.Field, avro.schema.Field,
avro.schema.MapSchema, avro.schema.MapSchema,
avro.schema.UnionSchema,
avro.schema.RecordSchema, avro.schema.RecordSchema,
), ),
) )
and self._converter._fields_stack and self._converter._fields_stack
): ):
# We are in the context of a non-nested(simple) field. # We are in the context of a non-nested(simple) field or the special-cased union.
last_field_schema = self._converter._fields_stack[-1] yield from self._converter._gen_from_last_field()
description = (
last_field_schema.doc
if last_field_schema.doc
else "No description available."
)
if last_field_schema.has_default:
description = f"{description}\nField default value: {last_field_schema.default}"
with AvroToMceSchemaConverter.SchemaFieldEmissionContextManager(
last_field_schema,
last_field_schema,
self._converter,
description,
) as field_emitter:
yield from field_emitter.emit()
else: else:
# Just emit the SchemaField from schema provided in the Ctor.
schema = self._schema schema = self._schema
actual_schema = self._actual_schema actual_schema = self._actual_schema
if isinstance(schema, avro.schema.Field): if isinstance(schema, avro.schema.Field):
@ -214,15 +215,24 @@ class AvroToMceSchemaConverter:
schema, schema schema, schema
) )
) )
# Emit the schema field provided in the Ctor.
description = self._description description = self._description
if description is None: if description is None:
description = schema.props.get("doc", None) description = schema.props.get("doc", None)
native_data_type = self._converter._prefix_name_stack[-1]
if isinstance(schema, (avro.schema.Field, avro.schema.UnionSchema)):
native_data_type = self._converter._prefix_name_stack[-2]
type_prefix = "[type="
if native_data_type.startswith(type_prefix):
native_data_type = native_data_type[
slice(len(type_prefix), len(native_data_type) - 1)
]
field = SchemaField( field = SchemaField(
fieldPath=self._converter._get_cur_field_path(), fieldPath=self._converter._get_cur_field_path(),
# Not populating this field for Avro, since this is blowing up the KAFKA message size. # Populate it with the simple native type for now.
nativeDataType="", nativeDataType=native_data_type,
type=self._converter._get_column_type(actual_schema.type), type=self._converter._get_column_type(actual_schema.type),
description=description, description=description,
recursive=False, recursive=False,
@ -234,18 +244,6 @@ class AvroToMceSchemaConverter:
def __exit__(self, exc_type, exc_val, exc_tb): def __exit__(self, exc_type, exc_val, exc_tb):
self._converter._prefix_name_stack.pop() self._converter._prefix_name_stack.pop()
@staticmethod
def _get_underlying_type_if_option_as_union(
schema: AvroNestedSchemas, default: Optional[AvroNestedSchemas] = None
) -> AvroNestedSchemas:
if isinstance(schema, avro.schema.UnionSchema) and len(schema.schemas) == 2:
(first, second) = schema.schemas
if first.type == avro.schema.NULL:
return second
elif second.type == avro.schema.NULL:
return first
return default
def _get_sub_schemas( def _get_sub_schemas(
self, schema: ExtendedAvroNestedSchemas self, schema: ExtendedAvroNestedSchemas
) -> Generator[avro.schema.Schema, None, None]: ) -> Generator[avro.schema.Schema, None, None]:
@ -290,13 +288,41 @@ class AvroToMceSchemaConverter:
) -> Generator[SchemaField, None, None]: ) -> Generator[SchemaField, None, None]:
"""Handles generation of MCE SchemaFields for an AVRO Field type.""" """Handles generation of MCE SchemaFields for an AVRO Field type."""
# NOTE: Here we only manage the field stack and trigger MCE Field generation from this field's type. # NOTE: Here we only manage the field stack and trigger MCE Field generation from this field's type.
# The actual emitting of a field happens # The actual emitting of a field happens when
# (a) when another nested record is encountered or (b) a non-nested type has been reached. # (a) another nested record is encountered or
# (b) a non-nested type has been reached or
# (c) during the special-casing for unions.
self._fields_stack.append(field) self._fields_stack.append(field)
for sub_schema in self._get_sub_schemas(field): for sub_schema in self._get_sub_schemas(field):
yield from self._to_mce_fields(sub_schema) yield from self._to_mce_fields(sub_schema)
self._fields_stack.pop() self._fields_stack.pop()
def _gen_from_last_field(
self, schema_to_recurse: Optional[AvroNestedSchemas] = None
) -> Generator[SchemaField, None, None]:
"""Emits the field most-recent field, optionally triggering sub-schema generation under the field."""
last_field_schema = self._fields_stack[-1]
# Generate the custom-description for the field.
description = (
last_field_schema.doc
if last_field_schema.doc
else "No description available."
)
if last_field_schema.has_default:
description = (
f"{description}\nField default value: {last_field_schema.default}"
)
with AvroToMceSchemaConverter.SchemaFieldEmissionContextManager(
last_field_schema, last_field_schema, self, description
) as f_emit:
yield from f_emit.emit()
if schema_to_recurse is not None:
# Generate the nested sub-schemas under the most-recent field.
for sub_schema in self._get_sub_schemas(schema_to_recurse):
yield from self._to_mce_fields(sub_schema)
def _gen_from_non_field_nested_schemas( def _gen_from_non_field_nested_schemas(
self, schema: AvroNestedSchemas self, schema: AvroNestedSchemas
) -> Generator[SchemaField, None, None]: ) -> Generator[SchemaField, None, None]:
@ -315,30 +341,24 @@ class AvroToMceSchemaConverter:
with AvroToMceSchemaConverter.SchemaFieldEmissionContextManager( with AvroToMceSchemaConverter.SchemaFieldEmissionContextManager(
schema, actual_schema, self schema, actual_schema, self
) as fe_schema: ) as fe_schema:
# Emit non-AVRO field complex schemas(even optional unions that become primitives). if isinstance(
yield from fe_schema.emit() actual_schema,
(
avro.schema.UnionSchema,
avro.schema.PrimitiveSchema,
avro.schema.FixedSchema,
avro.schema.EnumSchema,
),
):
# Emit non-AVRO field complex schemas(even optional unions that become primitives) and special-casing for extra union emission.
yield from fe_schema.emit()
if ( if (
isinstance(actual_schema, avro.schema.RecordSchema) isinstance(actual_schema, avro.schema.RecordSchema)
and self._fields_stack and self._fields_stack
): ):
# We have encountered a nested record, emit the most-recently seen field. # We have encountered a nested record, emit the most-recently seen field.
last_field_schema = self._fields_stack[-1] yield from self._gen_from_last_field(actual_schema if recurse else None)
description = (
last_field_schema.doc
if last_field_schema.doc
else "No description available."
)
if last_field_schema.has_default:
description = f"{description}\nField default value: {last_field_schema.default}"
with AvroToMceSchemaConverter.SchemaFieldEmissionContextManager(
last_field_schema, last_field_schema, self, description
) as f_emit:
yield from f_emit.emit()
# Generate the nested sub-schemas under the most-recent field.
if recurse:
for sub_schema in self._get_sub_schemas(actual_schema):
yield from self._to_mce_fields(sub_schema)
else: else:
# We are not yet in the context of any field. Generate all nested sub-schemas under the complex type. # We are not yet in the context of any field. Generate all nested sub-schemas under the complex type.
if recurse: if recurse:

View File

@ -98,8 +98,8 @@ def assret_field_paths_match(
) )
def test_avro_schema_to_mce_fields_events_with_nullable_fields(schema): def test_avro_schema_to_mce_fields_events_with_nullable_fields(schema):
fields = avro_schema_to_mce_fields(schema) fields = avro_schema_to_mce_fields(schema)
assert 2 == len(fields) assert 1 == len(fields)
assert fields[1].nullable assert fields[0].nullable
def test_avro_schema_to_mce_fields_sample_events_with_different_field_types(): def test_avro_schema_to_mce_fields_sample_events_with_different_field_types():
@ -121,8 +121,6 @@ def test_avro_schema_to_mce_fields_sample_events_with_different_field_types():
""" """
fields = avro_schema_to_mce_fields(schema) fields = avro_schema_to_mce_fields(schema)
expected_field_paths = [ expected_field_paths = [
"[version=2.0].[type=R]",
"[version=2.0].[type=R].[type=map]",
"[version=2.0].[type=R].[type=map].[type=long].a_map_of_longs_field", "[version=2.0].[type=R].[type=map].[type=long].a_map_of_longs_field",
] ]
assret_field_paths_match(fields, expected_field_paths) assret_field_paths_match(fields, expected_field_paths)
@ -150,7 +148,6 @@ def test_avro_schema_to_mce_fields_record_with_two_fields():
""" """
fields = avro_schema_to_mce_fields(schema) fields = avro_schema_to_mce_fields(schema)
expected_field_paths = [ expected_field_paths = [
"[version=2.0].[type=name]",
"[version=2.0].[type=name].[type=string].a", "[version=2.0].[type=name].[type=string].a",
"[version=2.0].[type=name].[type=string].b", "[version=2.0].[type=name].[type=string].b",
] ]
@ -186,11 +183,10 @@ def test_avro_schema_to_mce_fields_with_default():
""" """
fields = avro_schema_to_mce_fields(schema) fields = avro_schema_to_mce_fields(schema)
expected_field_paths = [ expected_field_paths = [
"[version=2.0].[type=name]",
"[version=2.0].[type=name].[type=string].aStringField", "[version=2.0].[type=name].[type=string].aStringField",
] ]
assret_field_paths_match(fields, expected_field_paths) assret_field_paths_match(fields, expected_field_paths)
description = fields[1].description description = fields[0].description
assert description and "custom, default value" in description assert description and "custom, default value" in description
@ -213,10 +209,7 @@ def test_avro_schema_with_recursion():
""" """
fields = avro_schema_to_mce_fields(schema) fields = avro_schema_to_mce_fields(schema)
expected_field_paths = [ expected_field_paths = [
"[version=2.0].[type=TreeNode]",
"[version=2.0].[type=TreeNode].[type=long].value", "[version=2.0].[type=TreeNode].[type=long].value",
"[version=2.0].[type=TreeNode].[type=array]",
"[version=2.0].[type=TreeNode].[type=array].[type=TreeNode]",
"[version=2.0].[type=TreeNode].[type=array].[type=TreeNode].children", "[version=2.0].[type=TreeNode].[type=array].[type=TreeNode].children",
] ]
assret_field_paths_match(fields, expected_field_paths) assret_field_paths_match(fields, expected_field_paths)
@ -263,11 +256,9 @@ def test_avro_sample_payment_schema_to_mce_fields_with_nesting():
""" """
fields = avro_schema_to_mce_fields(schema) fields = avro_schema_to_mce_fields(schema)
expected_field_paths = [ expected_field_paths = [
"[version=2.0].[type=Payment]",
"[version=2.0].[type=Payment].[type=string].id", "[version=2.0].[type=Payment].[type=string].id",
"[version=2.0].[type=Payment].[type=double].amount", "[version=2.0].[type=Payment].[type=double].amount",
"[version=2.0].[type=Payment].[type=string].name", "[version=2.0].[type=Payment].[type=string].name",
"[version=2.0].[type=Payment].[type=PhoneNumber]",
"[version=2.0].[type=Payment].[type=PhoneNumber].phoneNumber", "[version=2.0].[type=Payment].[type=PhoneNumber].phoneNumber",
"[version=2.0].[type=Payment].[type=PhoneNumber].phoneNumber.[type=string].areaCode", "[version=2.0].[type=Payment].[type=PhoneNumber].phoneNumber.[type=string].areaCode",
"[version=2.0].[type=Payment].[type=PhoneNumber].phoneNumber.[type=string].countryCode", "[version=2.0].[type=Payment].[type=PhoneNumber].phoneNumber.[type=string].countryCode",
@ -302,13 +293,10 @@ def test_avro_schema_to_mce_fields_with_nesting_across_records():
fields = avro_schema_to_mce_fields(schema) fields = avro_schema_to_mce_fields(schema)
expected_field_paths = [ expected_field_paths = [
"[version=2.0].[type=union]", "[version=2.0].[type=union]",
"[version=2.0].[type=union].[type=Address]",
"[version=2.0].[type=union].[type=Address].[type=string].streetAddress", "[version=2.0].[type=union].[type=Address].[type=string].streetAddress",
"[version=2.0].[type=union].[type=Address].[type=string].city", "[version=2.0].[type=union].[type=Address].[type=string].city",
"[version=2.0].[type=union].[type=Person]",
"[version=2.0].[type=union].[type=Person].[type=string].firstname", "[version=2.0].[type=union].[type=Person].[type=string].firstname",
"[version=2.0].[type=union].[type=Person].[type=string].lastname", "[version=2.0].[type=union].[type=Person].[type=string].lastname",
"[version=2.0].[type=union].[type=Person].[type=Address]",
"[version=2.0].[type=union].[type=Person].[type=Address].address", "[version=2.0].[type=union].[type=Person].[type=Address].address",
] ]
assret_field_paths_match(fields, expected_field_paths) assret_field_paths_match(fields, expected_field_paths)
@ -328,7 +316,7 @@ def test_simple_record_with_primitive_types():
"name": "enumField", "name": "enumField",
"type": { "type": {
"type": "enum", "type": "enum",
"name": "EnumField", "name": "MyTestEnumField",
"symbols": [ "TEST", "TEST1" ], "symbols": [ "TEST", "TEST1" ],
"symbolDoc": { "symbolDoc": {
"TEST": "test enum", "TEST": "test enum",
@ -341,7 +329,6 @@ def test_simple_record_with_primitive_types():
""" """
fields = avro_schema_to_mce_fields(schema) fields = avro_schema_to_mce_fields(schema)
expected_field_paths = [ expected_field_paths = [
"[version=2.0].[type=Simple]",
"[version=2.0].[type=Simple].[type=string].stringField", "[version=2.0].[type=Simple].[type=string].stringField",
"[version=2.0].[type=Simple].[type=boolean].booleanField", "[version=2.0].[type=Simple].[type=boolean].booleanField",
"[version=2.0].[type=Simple].[type=int].intField", "[version=2.0].[type=Simple].[type=int].intField",
@ -371,8 +358,6 @@ def test_simple_nested_record_with_a_string_field_for_key_schema():
""" """
fields = avro_schema_to_mce_fields(schema, True) fields = avro_schema_to_mce_fields(schema, True)
expected_field_paths: List[str] = [ expected_field_paths: List[str] = [
"[version=2.0].[key=True].[type=SimpleNested]",
"[version=2.0].[key=True].[type=SimpleNested].[type=InnerRcd]",
"[version=2.0].[key=True].[type=SimpleNested].[type=InnerRcd].nestedRcd", "[version=2.0].[key=True].[type=SimpleNested].[type=InnerRcd].nestedRcd",
"[version=2.0].[key=True].[type=SimpleNested].[type=InnerRcd].nestedRcd.[type=string].aStringField", "[version=2.0].[key=True].[type=SimpleNested].[type=InnerRcd].nestedRcd.[type=string].aStringField",
] ]
@ -407,15 +392,17 @@ def test_union_with_nested_record_of_union():
""" """
fields = avro_schema_to_mce_fields(schema) fields = avro_schema_to_mce_fields(schema)
expected_field_paths = [ expected_field_paths = [
"[version=2.0].[type=UnionSample]", "[version=2.0].[type=UnionSample].[type=union].aUnion",
"[version=2.0].[type=UnionSample].[type=union]",
"[version=2.0].[type=UnionSample].[type=union].[type=boolean].aUnion", "[version=2.0].[type=UnionSample].[type=union].[type=boolean].aUnion",
"[version=2.0].[type=UnionSample].[type=union].[type=Rcd]",
"[version=2.0].[type=UnionSample].[type=union].[type=Rcd].aUnion", "[version=2.0].[type=UnionSample].[type=union].[type=Rcd].aUnion",
"[version=2.0].[type=UnionSample].[type=union].[type=Rcd].aUnion.[type=string].aNullableStringField", "[version=2.0].[type=UnionSample].[type=union].[type=Rcd].aUnion.[type=string].aNullableStringField",
] ]
assret_field_paths_match(fields, expected_field_paths) assret_field_paths_match(fields, expected_field_paths)
assert isinstance(fields[5].type.type, StringTypeClass) assert isinstance(fields[3].type.type, StringTypeClass)
assert fields[0].nativeDataType == "union"
assert fields[1].nativeDataType == "boolean"
assert fields[2].nativeDataType == "Rcd"
assert fields[3].nativeDataType == "string"
def test_nested_arrays(): def test_nested_arrays():
@ -448,10 +435,6 @@ def test_nested_arrays():
""" """
fields = avro_schema_to_mce_fields(schema) fields = avro_schema_to_mce_fields(schema)
expected_field_paths: List[str] = [ expected_field_paths: List[str] = [
"[version=2.0].[type=NestedArray]",
"[version=2.0].[type=NestedArray].[type=array]",
"[version=2.0].[type=NestedArray].[type=array].[type=array]",
"[version=2.0].[type=NestedArray].[type=array].[type=array].[type=Foo]",
"[version=2.0].[type=NestedArray].[type=array].[type=array].[type=Foo].ar", "[version=2.0].[type=NestedArray].[type=array].[type=array].[type=Foo].ar",
"[version=2.0].[type=NestedArray].[type=array].[type=array].[type=Foo].ar.[type=long].a", "[version=2.0].[type=NestedArray].[type=array].[type=array].[type=Foo].ar.[type=long].a",
] ]
@ -485,13 +468,10 @@ def test_map_of_union_of_int_and_record_of_union():
""" """
fields = avro_schema_to_mce_fields(schema) fields = avro_schema_to_mce_fields(schema)
expected_field_paths = [ expected_field_paths = [
"[version=2.0].[type=MapSample]", "[version=2.0].[type=MapSample].[type=map].[type=union].aMap",
"[version=2.0].[type=MapSample].[type=map]",
"[version=2.0].[type=MapSample].[type=map].[type=union]",
"[version=2.0].[type=MapSample].[type=map].[type=union].[type=int].aMap", "[version=2.0].[type=MapSample].[type=map].[type=union].[type=int].aMap",
"[version=2.0].[type=MapSample].[type=map].[type=union].[type=Rcd]",
"[version=2.0].[type=MapSample].[type=map].[type=union].[type=Rcd].aMap", "[version=2.0].[type=MapSample].[type=map].[type=union].[type=Rcd].aMap",
"[version=2.0].[type=MapSample].[type=map].[type=union].[type=Rcd].aMap.[type=union]", "[version=2.0].[type=MapSample].[type=map].[type=union].[type=Rcd].aMap.[type=union].aUnion",
"[version=2.0].[type=MapSample].[type=map].[type=union].[type=Rcd].aMap.[type=union].[type=string].aUnion", "[version=2.0].[type=MapSample].[type=map].[type=union].[type=Rcd].aMap.[type=union].[type=string].aUnion",
"[version=2.0].[type=MapSample].[type=map].[type=union].[type=Rcd].aMap.[type=union].[type=int].aUnion", "[version=2.0].[type=MapSample].[type=map].[type=union].[type=Rcd].aMap.[type=union].[type=int].aUnion",
] ]
@ -519,11 +499,8 @@ def test_recursive_avro():
""" """
fields = avro_schema_to_mce_fields(schema) fields = avro_schema_to_mce_fields(schema)
expected_field_paths = [ expected_field_paths = [
"[version=2.0].[type=Recursive]",
"[version=2.0].[type=Recursive].[type=R]",
"[version=2.0].[type=Recursive].[type=R].r", "[version=2.0].[type=Recursive].[type=R].r",
"[version=2.0].[type=Recursive].[type=R].r.[type=int].anIntegerField", "[version=2.0].[type=Recursive].[type=R].r.[type=int].anIntegerField",
"[version=2.0].[type=Recursive].[type=R].r.[type=R]",
"[version=2.0].[type=Recursive].[type=R].r.[type=R].aRecursiveField", "[version=2.0].[type=Recursive].[type=R].r.[type=R].aRecursiveField",
] ]
assret_field_paths_match(fields, expected_field_paths) assret_field_paths_match(fields, expected_field_paths)
@ -564,17 +541,11 @@ def test_needs_disambiguation_nested_union_of_records_with_same_field_name():
""" """
fields = avro_schema_to_mce_fields(schema) fields = avro_schema_to_mce_fields(schema)
expected_field_paths: List[str] = [ expected_field_paths: List[str] = [
"[version=2.0].[type=ABFooUnion]", "[version=2.0].[type=ABFooUnion].[type=union].a",
"[version=2.0].[type=ABFooUnion].[type=union]",
"[version=2.0].[type=ABFooUnion].[type=union].[type=A]",
"[version=2.0].[type=ABFooUnion].[type=union].[type=A].a", "[version=2.0].[type=ABFooUnion].[type=union].[type=A].a",
"[version=2.0].[type=ABFooUnion].[type=union].[type=A].a.[type=string].f", "[version=2.0].[type=ABFooUnion].[type=union].[type=A].a.[type=string].f",
"[version=2.0].[type=ABFooUnion].[type=union].[type=B]",
"[version=2.0].[type=ABFooUnion].[type=union].[type=B].a", "[version=2.0].[type=ABFooUnion].[type=union].[type=B].a",
"[version=2.0].[type=ABFooUnion].[type=union].[type=B].a.[type=string].f", "[version=2.0].[type=ABFooUnion].[type=union].[type=B].a.[type=string].f",
"[version=2.0].[type=ABFooUnion].[type=union].[type=array]",
"[version=2.0].[type=ABFooUnion].[type=union].[type=array].[type=array]",
"[version=2.0].[type=ABFooUnion].[type=union].[type=array].[type=array].[type=Foo]",
"[version=2.0].[type=ABFooUnion].[type=union].[type=array].[type=array].[type=Foo].a", "[version=2.0].[type=ABFooUnion].[type=union].[type=array].[type=array].[type=Foo].a",
"[version=2.0].[type=ABFooUnion].[type=union].[type=array].[type=array].[type=Foo].a.[type=long].f", "[version=2.0].[type=ABFooUnion].[type=union].[type=array].[type=array].[type=Foo].a.[type=long].f",
] ]
@ -637,17 +608,11 @@ def test_key_schema_handling():
""" """
fields: List[SchemaField] = avro_schema_to_mce_fields(schema, is_key_schema=True) fields: List[SchemaField] = avro_schema_to_mce_fields(schema, is_key_schema=True)
expected_field_paths: List[str] = [ expected_field_paths: List[str] = [
"[version=2.0].[key=True].[type=ABFooUnion]", "[version=2.0].[key=True].[type=ABFooUnion].[type=union].a",
"[version=2.0].[key=True].[type=ABFooUnion].[type=union]",
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=A]",
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=A].a", "[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=A].a",
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=A].a.[type=string].f", "[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=A].a.[type=string].f",
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=B]",
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=B].a", "[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=B].a",
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=B].a.[type=string].f", "[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=B].a.[type=string].f",
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=array]",
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=array].[type=array]",
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=array].[type=array].[type=Foo]",
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=array].[type=array].[type=Foo].a", "[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=array].[type=array].[type=Foo].a",
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=array].[type=array].[type=Foo].a.[type=long].f", "[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=array].[type=array].[type=Foo].a.[type=long].f",
] ]