mirror of
https://github.com/datahub-project/datahub.git
synced 2025-11-03 12:16:10 +00:00
fix(ingest/json-schema): adding support descriptions for array (#9757)
This commit is contained in:
parent
6eb5f80a9a
commit
1b4f31bcfe
@ -417,15 +417,35 @@ class JsonSchemaTranslator:
|
||||
inner_field_path,
|
||||
)
|
||||
elif datahub_field_type == ArrayTypeClass:
|
||||
field_path = field_path.expand_type("array", schema)
|
||||
# default items schema is string
|
||||
field_path = field_path.expand_type(discriminated_type, schema)
|
||||
yield SchemaField(
|
||||
fieldPath=field_path.as_string(),
|
||||
type=type_override or SchemaFieldDataTypeClass(type=ArrayTypeClass()),
|
||||
nativeDataType=native_type_override
|
||||
or JsonSchemaTranslator._get_discriminated_type_from_schema(schema),
|
||||
description=JsonSchemaTranslator._get_description_from_any_schema(
|
||||
schema
|
||||
),
|
||||
nullable=nullable,
|
||||
jsonProps=JsonSchemaTranslator._get_jsonprops_for_any_schema(
|
||||
schema, required=required
|
||||
),
|
||||
isPartOfKey=field_path.is_key_schema,
|
||||
)
|
||||
|
||||
items_schema = schema.get("items", {"type": "string"})
|
||||
items_type = JsonSchemaTranslator._get_type_from_schema(items_schema)
|
||||
field_path._set_parent_type_if_not_exists(
|
||||
DataHubType(type=ArrayTypeClass, nested_type=items_type)
|
||||
field_name = items_schema.get("title", None)
|
||||
if not field_name:
|
||||
field_name = items_type
|
||||
inner_field_path = field_path.clone_plus(
|
||||
FieldElement(type=[], name=field_name, schema_types=[])
|
||||
)
|
||||
yield from JsonSchemaTranslator.get_fields(
|
||||
items_type, items_schema, required=False, base_field_path=field_path
|
||||
items_type,
|
||||
items_schema,
|
||||
required=False,
|
||||
base_field_path=inner_field_path,
|
||||
)
|
||||
|
||||
elif datahub_field_type == MapTypeClass:
|
||||
|
||||
@ -153,15 +153,20 @@ def test_json_schema_with_recursion():
|
||||
},
|
||||
}
|
||||
fields = list(JsonSchemaTranslator.get_fields_from_schema(schema))
|
||||
|
||||
expected_field_paths = [
|
||||
{
|
||||
"path": "[version=2.0].[type=TreeNode].[type=integer].value",
|
||||
"type": NumberTypeClass,
|
||||
},
|
||||
{
|
||||
"path": "[version=2.0].[type=TreeNode].[type=array].[type=TreeNode].children",
|
||||
"path": "[version=2.0].[type=TreeNode].[type=array].children",
|
||||
"type": ArrayTypeClass,
|
||||
},
|
||||
{
|
||||
"path": "[version=2.0].[type=TreeNode].[type=array].children.[type=TreeNode].TreeNode",
|
||||
"type": RecordTypeClass,
|
||||
},
|
||||
]
|
||||
assert_field_paths_match(fields, expected_field_paths)
|
||||
assert_fields_are_valid(fields)
|
||||
@ -372,8 +377,10 @@ def test_nested_arrays():
|
||||
|
||||
fields = list(JsonSchemaTranslator.get_fields_from_schema(schema))
|
||||
expected_field_paths: List[str] = [
|
||||
"[version=2.0].[type=NestedArray].[type=array].[type=array].[type=Foo].ar",
|
||||
"[version=2.0].[type=NestedArray].[type=array].[type=array].[type=Foo].ar.[type=integer].a",
|
||||
"[version=2.0].[type=NestedArray].[type=array].ar",
|
||||
"[version=2.0].[type=NestedArray].[type=array].ar.[type=array].array",
|
||||
"[version=2.0].[type=NestedArray].[type=array].ar.[type=array].array.[type=Foo].Foo",
|
||||
"[version=2.0].[type=NestedArray].[type=array].ar.[type=array].array.[type=Foo].Foo.[type=integer].a",
|
||||
]
|
||||
assert_field_paths_match(fields, expected_field_paths)
|
||||
assert isinstance(fields[0].type.type, ArrayTypeClass)
|
||||
@ -496,14 +503,17 @@ def test_needs_disambiguation_nested_union_of_records_with_same_field_name():
|
||||
},
|
||||
}
|
||||
fields = list(JsonSchemaTranslator.get_fields_from_schema(schema))
|
||||
|
||||
expected_field_paths: List[str] = [
|
||||
"[version=2.0].[type=ABFooUnion].[type=union].a",
|
||||
"[version=2.0].[type=ABFooUnion].[type=union].[type=A].a",
|
||||
"[version=2.0].[type=ABFooUnion].[type=union].[type=A].a.[type=string].f",
|
||||
"[version=2.0].[type=ABFooUnion].[type=union].[type=B].a",
|
||||
"[version=2.0].[type=ABFooUnion].[type=union].[type=B].a.[type=string].f",
|
||||
"[version=2.0].[type=ABFooUnion].[type=union].[type=array].[type=array].[type=Foo].a",
|
||||
"[version=2.0].[type=ABFooUnion].[type=union].[type=array].[type=array].[type=Foo].a.[type=integer].f",
|
||||
"[version=2.0].[type=ABFooUnion].[type=union].[type=array].a",
|
||||
"[version=2.0].[type=ABFooUnion].[type=union].[type=array].a.[type=array].array",
|
||||
"[version=2.0].[type=ABFooUnion].[type=union].[type=array].a.[type=array].array.[type=Foo].Foo",
|
||||
"[version=2.0].[type=ABFooUnion].[type=union].[type=array].a.[type=array].array.[type=Foo].Foo.[type=integer].f",
|
||||
]
|
||||
assert_field_paths_match(fields, expected_field_paths)
|
||||
|
||||
@ -578,8 +588,10 @@ def test_key_schema_handling():
|
||||
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=A].a.[type=string].f",
|
||||
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=B].a",
|
||||
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=B].a.[type=string].f",
|
||||
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=array].[type=array].[type=Foo].a",
|
||||
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=array].[type=array].[type=Foo].a.[type=number].f",
|
||||
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=array].a",
|
||||
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=array].a.[type=array].array",
|
||||
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=array].a.[type=array].array.[type=Foo].Foo",
|
||||
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=array].a.[type=array].array.[type=Foo].Foo.[type=number].f",
|
||||
]
|
||||
assert_field_paths_match(fields, expected_field_paths)
|
||||
for f in fields:
|
||||
@ -664,7 +676,8 @@ def test_simple_array():
|
||||
|
||||
fields = list(JsonSchemaTranslator.get_fields_from_schema(schema))
|
||||
expected_field_paths: List[str] = [
|
||||
"[version=2.0].[type=ObjectWithArray].[type=array].[type=string].ar",
|
||||
"[version=2.0].[type=ObjectWithArray].[type=array].ar",
|
||||
"[version=2.0].[type=ObjectWithArray].[type=array].ar.[type=string].string",
|
||||
]
|
||||
assert_field_paths_match(fields, expected_field_paths)
|
||||
assert isinstance(fields[0].type.type, ArrayTypeClass)
|
||||
@ -846,3 +859,31 @@ def test_top_level_trival_allof():
|
||||
assert json.loads(fields[1].jsonProps or "{}")["required"] is False
|
||||
assert json.loads(fields[2].jsonProps or "{}")["required"] is True
|
||||
assert json.loads(fields[3].jsonProps or "{}")["required"] is False
|
||||
|
||||
|
||||
def test_description_extraction():
|
||||
schema = {
|
||||
"$id": "test",
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"properties": {
|
||||
"bar": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"},
|
||||
"description": "XYZ",
|
||||
}
|
||||
},
|
||||
}
|
||||
fields = list(JsonSchemaTranslator.get_fields_from_schema(schema))
|
||||
expected_field_paths: List[str] = [
|
||||
"[version=2.0].[type=object].[type=array].bar",
|
||||
"[version=2.0].[type=object].[type=array].bar.[type=string].string",
|
||||
]
|
||||
assert_field_paths_match(fields, expected_field_paths)
|
||||
assert_fields_are_valid(fields)
|
||||
# Additional check for the description extraction
|
||||
array_field = next(
|
||||
field
|
||||
for field in fields
|
||||
if field.fieldPath == "[version=2.0].[type=object].[type=array].bar"
|
||||
)
|
||||
assert array_field.description == "XYZ"
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user