mirror of
				https://github.com/datahub-project/datahub.git
				synced 2025-11-04 04:39:10 +00:00 
			
		
		
		
	fix(ingest/json-schema): adding support descriptions for array (#9757)
This commit is contained in:
		
							parent
							
								
									6eb5f80a9a
								
							
						
					
					
						commit
						1b4f31bcfe
					
				@ -417,15 +417,35 @@ class JsonSchemaTranslator:
 | 
				
			|||||||
                    inner_field_path,
 | 
					                    inner_field_path,
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
        elif datahub_field_type == ArrayTypeClass:
 | 
					        elif datahub_field_type == ArrayTypeClass:
 | 
				
			||||||
            field_path = field_path.expand_type("array", schema)
 | 
					            field_path = field_path.expand_type(discriminated_type, schema)
 | 
				
			||||||
            # default items schema is string
 | 
					            yield SchemaField(
 | 
				
			||||||
 | 
					                fieldPath=field_path.as_string(),
 | 
				
			||||||
 | 
					                type=type_override or SchemaFieldDataTypeClass(type=ArrayTypeClass()),
 | 
				
			||||||
 | 
					                nativeDataType=native_type_override
 | 
				
			||||||
 | 
					                or JsonSchemaTranslator._get_discriminated_type_from_schema(schema),
 | 
				
			||||||
 | 
					                description=JsonSchemaTranslator._get_description_from_any_schema(
 | 
				
			||||||
 | 
					                    schema
 | 
				
			||||||
 | 
					                ),
 | 
				
			||||||
 | 
					                nullable=nullable,
 | 
				
			||||||
 | 
					                jsonProps=JsonSchemaTranslator._get_jsonprops_for_any_schema(
 | 
				
			||||||
 | 
					                    schema, required=required
 | 
				
			||||||
 | 
					                ),
 | 
				
			||||||
 | 
					                isPartOfKey=field_path.is_key_schema,
 | 
				
			||||||
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            items_schema = schema.get("items", {"type": "string"})
 | 
					            items_schema = schema.get("items", {"type": "string"})
 | 
				
			||||||
            items_type = JsonSchemaTranslator._get_type_from_schema(items_schema)
 | 
					            items_type = JsonSchemaTranslator._get_type_from_schema(items_schema)
 | 
				
			||||||
            field_path._set_parent_type_if_not_exists(
 | 
					            field_name = items_schema.get("title", None)
 | 
				
			||||||
                DataHubType(type=ArrayTypeClass, nested_type=items_type)
 | 
					            if not field_name:
 | 
				
			||||||
 | 
					                field_name = items_type
 | 
				
			||||||
 | 
					            inner_field_path = field_path.clone_plus(
 | 
				
			||||||
 | 
					                FieldElement(type=[], name=field_name, schema_types=[])
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
            yield from JsonSchemaTranslator.get_fields(
 | 
					            yield from JsonSchemaTranslator.get_fields(
 | 
				
			||||||
                items_type, items_schema, required=False, base_field_path=field_path
 | 
					                items_type,
 | 
				
			||||||
 | 
					                items_schema,
 | 
				
			||||||
 | 
					                required=False,
 | 
				
			||||||
 | 
					                base_field_path=inner_field_path,
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        elif datahub_field_type == MapTypeClass:
 | 
					        elif datahub_field_type == MapTypeClass:
 | 
				
			||||||
 | 
				
			|||||||
@ -153,15 +153,20 @@ def test_json_schema_with_recursion():
 | 
				
			|||||||
        },
 | 
					        },
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    fields = list(JsonSchemaTranslator.get_fields_from_schema(schema))
 | 
					    fields = list(JsonSchemaTranslator.get_fields_from_schema(schema))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    expected_field_paths = [
 | 
					    expected_field_paths = [
 | 
				
			||||||
        {
 | 
					        {
 | 
				
			||||||
            "path": "[version=2.0].[type=TreeNode].[type=integer].value",
 | 
					            "path": "[version=2.0].[type=TreeNode].[type=integer].value",
 | 
				
			||||||
            "type": NumberTypeClass,
 | 
					            "type": NumberTypeClass,
 | 
				
			||||||
        },
 | 
					        },
 | 
				
			||||||
        {
 | 
					        {
 | 
				
			||||||
            "path": "[version=2.0].[type=TreeNode].[type=array].[type=TreeNode].children",
 | 
					            "path": "[version=2.0].[type=TreeNode].[type=array].children",
 | 
				
			||||||
            "type": ArrayTypeClass,
 | 
					            "type": ArrayTypeClass,
 | 
				
			||||||
        },
 | 
					        },
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					            "path": "[version=2.0].[type=TreeNode].[type=array].children.[type=TreeNode].TreeNode",
 | 
				
			||||||
 | 
					            "type": RecordTypeClass,
 | 
				
			||||||
 | 
					        },
 | 
				
			||||||
    ]
 | 
					    ]
 | 
				
			||||||
    assert_field_paths_match(fields, expected_field_paths)
 | 
					    assert_field_paths_match(fields, expected_field_paths)
 | 
				
			||||||
    assert_fields_are_valid(fields)
 | 
					    assert_fields_are_valid(fields)
 | 
				
			||||||
@ -372,8 +377,10 @@ def test_nested_arrays():
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    fields = list(JsonSchemaTranslator.get_fields_from_schema(schema))
 | 
					    fields = list(JsonSchemaTranslator.get_fields_from_schema(schema))
 | 
				
			||||||
    expected_field_paths: List[str] = [
 | 
					    expected_field_paths: List[str] = [
 | 
				
			||||||
        "[version=2.0].[type=NestedArray].[type=array].[type=array].[type=Foo].ar",
 | 
					        "[version=2.0].[type=NestedArray].[type=array].ar",
 | 
				
			||||||
        "[version=2.0].[type=NestedArray].[type=array].[type=array].[type=Foo].ar.[type=integer].a",
 | 
					        "[version=2.0].[type=NestedArray].[type=array].ar.[type=array].array",
 | 
				
			||||||
 | 
					        "[version=2.0].[type=NestedArray].[type=array].ar.[type=array].array.[type=Foo].Foo",
 | 
				
			||||||
 | 
					        "[version=2.0].[type=NestedArray].[type=array].ar.[type=array].array.[type=Foo].Foo.[type=integer].a",
 | 
				
			||||||
    ]
 | 
					    ]
 | 
				
			||||||
    assert_field_paths_match(fields, expected_field_paths)
 | 
					    assert_field_paths_match(fields, expected_field_paths)
 | 
				
			||||||
    assert isinstance(fields[0].type.type, ArrayTypeClass)
 | 
					    assert isinstance(fields[0].type.type, ArrayTypeClass)
 | 
				
			||||||
@ -496,14 +503,17 @@ def test_needs_disambiguation_nested_union_of_records_with_same_field_name():
 | 
				
			|||||||
        },
 | 
					        },
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    fields = list(JsonSchemaTranslator.get_fields_from_schema(schema))
 | 
					    fields = list(JsonSchemaTranslator.get_fields_from_schema(schema))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    expected_field_paths: List[str] = [
 | 
					    expected_field_paths: List[str] = [
 | 
				
			||||||
        "[version=2.0].[type=ABFooUnion].[type=union].a",
 | 
					        "[version=2.0].[type=ABFooUnion].[type=union].a",
 | 
				
			||||||
        "[version=2.0].[type=ABFooUnion].[type=union].[type=A].a",
 | 
					        "[version=2.0].[type=ABFooUnion].[type=union].[type=A].a",
 | 
				
			||||||
        "[version=2.0].[type=ABFooUnion].[type=union].[type=A].a.[type=string].f",
 | 
					        "[version=2.0].[type=ABFooUnion].[type=union].[type=A].a.[type=string].f",
 | 
				
			||||||
        "[version=2.0].[type=ABFooUnion].[type=union].[type=B].a",
 | 
					        "[version=2.0].[type=ABFooUnion].[type=union].[type=B].a",
 | 
				
			||||||
        "[version=2.0].[type=ABFooUnion].[type=union].[type=B].a.[type=string].f",
 | 
					        "[version=2.0].[type=ABFooUnion].[type=union].[type=B].a.[type=string].f",
 | 
				
			||||||
        "[version=2.0].[type=ABFooUnion].[type=union].[type=array].[type=array].[type=Foo].a",
 | 
					        "[version=2.0].[type=ABFooUnion].[type=union].[type=array].a",
 | 
				
			||||||
        "[version=2.0].[type=ABFooUnion].[type=union].[type=array].[type=array].[type=Foo].a.[type=integer].f",
 | 
					        "[version=2.0].[type=ABFooUnion].[type=union].[type=array].a.[type=array].array",
 | 
				
			||||||
 | 
					        "[version=2.0].[type=ABFooUnion].[type=union].[type=array].a.[type=array].array.[type=Foo].Foo",
 | 
				
			||||||
 | 
					        "[version=2.0].[type=ABFooUnion].[type=union].[type=array].a.[type=array].array.[type=Foo].Foo.[type=integer].f",
 | 
				
			||||||
    ]
 | 
					    ]
 | 
				
			||||||
    assert_field_paths_match(fields, expected_field_paths)
 | 
					    assert_field_paths_match(fields, expected_field_paths)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -578,8 +588,10 @@ def test_key_schema_handling():
 | 
				
			|||||||
        "[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=A].a.[type=string].f",
 | 
					        "[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=A].a.[type=string].f",
 | 
				
			||||||
        "[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=B].a",
 | 
					        "[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=B].a",
 | 
				
			||||||
        "[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=B].a.[type=string].f",
 | 
					        "[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=B].a.[type=string].f",
 | 
				
			||||||
        "[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=array].[type=array].[type=Foo].a",
 | 
					        "[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=array].a",
 | 
				
			||||||
        "[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=array].[type=array].[type=Foo].a.[type=number].f",
 | 
					        "[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=array].a.[type=array].array",
 | 
				
			||||||
 | 
					        "[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=array].a.[type=array].array.[type=Foo].Foo",
 | 
				
			||||||
 | 
					        "[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=array].a.[type=array].array.[type=Foo].Foo.[type=number].f",
 | 
				
			||||||
    ]
 | 
					    ]
 | 
				
			||||||
    assert_field_paths_match(fields, expected_field_paths)
 | 
					    assert_field_paths_match(fields, expected_field_paths)
 | 
				
			||||||
    for f in fields:
 | 
					    for f in fields:
 | 
				
			||||||
@ -664,7 +676,8 @@ def test_simple_array():
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    fields = list(JsonSchemaTranslator.get_fields_from_schema(schema))
 | 
					    fields = list(JsonSchemaTranslator.get_fields_from_schema(schema))
 | 
				
			||||||
    expected_field_paths: List[str] = [
 | 
					    expected_field_paths: List[str] = [
 | 
				
			||||||
        "[version=2.0].[type=ObjectWithArray].[type=array].[type=string].ar",
 | 
					        "[version=2.0].[type=ObjectWithArray].[type=array].ar",
 | 
				
			||||||
 | 
					        "[version=2.0].[type=ObjectWithArray].[type=array].ar.[type=string].string",
 | 
				
			||||||
    ]
 | 
					    ]
 | 
				
			||||||
    assert_field_paths_match(fields, expected_field_paths)
 | 
					    assert_field_paths_match(fields, expected_field_paths)
 | 
				
			||||||
    assert isinstance(fields[0].type.type, ArrayTypeClass)
 | 
					    assert isinstance(fields[0].type.type, ArrayTypeClass)
 | 
				
			||||||
@ -846,3 +859,31 @@ def test_top_level_trival_allof():
 | 
				
			|||||||
    assert json.loads(fields[1].jsonProps or "{}")["required"] is False
 | 
					    assert json.loads(fields[1].jsonProps or "{}")["required"] is False
 | 
				
			||||||
    assert json.loads(fields[2].jsonProps or "{}")["required"] is True
 | 
					    assert json.loads(fields[2].jsonProps or "{}")["required"] is True
 | 
				
			||||||
    assert json.loads(fields[3].jsonProps or "{}")["required"] is False
 | 
					    assert json.loads(fields[3].jsonProps or "{}")["required"] is False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def test_description_extraction():
 | 
				
			||||||
 | 
					    schema = {
 | 
				
			||||||
 | 
					        "$id": "test",
 | 
				
			||||||
 | 
					        "$schema": "http://json-schema.org/draft-07/schema#",
 | 
				
			||||||
 | 
					        "properties": {
 | 
				
			||||||
 | 
					            "bar": {
 | 
				
			||||||
 | 
					                "type": "array",
 | 
				
			||||||
 | 
					                "items": {"type": "string"},
 | 
				
			||||||
 | 
					                "description": "XYZ",
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        },
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    fields = list(JsonSchemaTranslator.get_fields_from_schema(schema))
 | 
				
			||||||
 | 
					    expected_field_paths: List[str] = [
 | 
				
			||||||
 | 
					        "[version=2.0].[type=object].[type=array].bar",
 | 
				
			||||||
 | 
					        "[version=2.0].[type=object].[type=array].bar.[type=string].string",
 | 
				
			||||||
 | 
					    ]
 | 
				
			||||||
 | 
					    assert_field_paths_match(fields, expected_field_paths)
 | 
				
			||||||
 | 
					    assert_fields_are_valid(fields)
 | 
				
			||||||
 | 
					    # Additional check for the description extraction
 | 
				
			||||||
 | 
					    array_field = next(
 | 
				
			||||||
 | 
					        field
 | 
				
			||||||
 | 
					        for field in fields
 | 
				
			||||||
 | 
					        if field.fieldPath == "[version=2.0].[type=object].[type=array].bar"
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					    assert array_field.description == "XYZ"
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user