mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-12-12 15:57:44 +00:00
Add array supp for json schema parser (#18255)
This commit is contained in:
parent
69a58d2556
commit
2ee015e426
@ -16,7 +16,7 @@ Utils module to parse the jsonschema
|
||||
import json
|
||||
import traceback
|
||||
from enum import Enum
|
||||
from typing import List, Optional, Type
|
||||
from typing import List, Optional, Tuple, Type
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
@ -66,6 +66,49 @@ def parse_json_schema(
|
||||
return None
|
||||
|
||||
|
||||
def get_child_models(key, value, field_models, cls: Type[BaseModel] = FieldModel):
|
||||
"""
|
||||
Method to parse the child objects in the json schema
|
||||
"""
|
||||
try:
|
||||
cls_obj = cls(
|
||||
name=key,
|
||||
displayName=value.get("title"),
|
||||
dataType=JsonSchemaDataTypes(value.get("type", "unknown")).name,
|
||||
description=value.get("description"),
|
||||
)
|
||||
children = None
|
||||
if value.get("type") == JsonSchemaDataTypes.RECORD.value:
|
||||
children = get_json_schema_fields(value.get("properties"), cls=cls)
|
||||
if value.get("type") == JsonSchemaDataTypes.ARRAY.value:
|
||||
datatype_display, children = get_json_schema_array_fields(
|
||||
value.get("items"), cls=cls
|
||||
)
|
||||
cls_obj.dataTypeDisplay = f"ARRAY<{datatype_display}>"
|
||||
cls_obj.children = children
|
||||
field_models.append(cls_obj)
|
||||
except Exception as exc: # pylint: disable=broad-except
|
||||
logger.debug(traceback.format_exc())
|
||||
logger.warning(f"Unable to parse the json schema into models: {exc}")
|
||||
|
||||
|
||||
def get_json_schema_array_fields(
|
||||
array_items, cls: Type[BaseModel] = FieldModel
|
||||
) -> Optional[Tuple[str, List[FieldModel]]]:
|
||||
"""
|
||||
Recursively convert the parsed array schema into required models
|
||||
"""
|
||||
field_models = []
|
||||
if array_items.get("type") == JsonSchemaDataTypes.RECORD.value:
|
||||
for key, value in array_items.get("properties", {}).items():
|
||||
get_child_models(key, value, field_models, cls)
|
||||
|
||||
return (
|
||||
JsonSchemaDataTypes(array_items.get("type", "unknown")).name,
|
||||
field_models or None,
|
||||
)
|
||||
|
||||
|
||||
def get_json_schema_fields(
|
||||
properties, cls: Type[BaseModel] = FieldModel
|
||||
) -> Optional[List[FieldModel]]:
|
||||
@ -74,20 +117,6 @@ def get_json_schema_fields(
|
||||
"""
|
||||
field_models = []
|
||||
for key, value in properties.items():
|
||||
try:
|
||||
field_models.append(
|
||||
cls(
|
||||
name=key,
|
||||
displayName=value.get("title"),
|
||||
dataType=JsonSchemaDataTypes(value.get("type", "unknown")).name,
|
||||
description=value.get("description"),
|
||||
children=get_json_schema_fields(value.get("properties"), cls=cls)
|
||||
if value.get("type") == "object"
|
||||
else None,
|
||||
)
|
||||
)
|
||||
except Exception as exc: # pylint: disable=broad-except
|
||||
logger.debug(traceback.format_exc())
|
||||
logger.warning(f"Unable to parse the json schema into models: {exc}")
|
||||
get_child_models(key, value, field_models, cls)
|
||||
|
||||
return field_models
|
||||
|
||||
@ -91,8 +91,81 @@ class JsonSchemaParserTests(TestCase):
|
||||
}
|
||||
}"""
|
||||
|
||||
sample_array_json_schema = """
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"firstName": {
|
||||
"type": "string"
|
||||
},
|
||||
"lastName": {
|
||||
"type": "string"
|
||||
},
|
||||
"age": {
|
||||
"type": "integer"
|
||||
},
|
||||
"address": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"streetAddress": {
|
||||
"type": "string"
|
||||
},
|
||||
"city": {
|
||||
"type": "string"
|
||||
},
|
||||
"state": {
|
||||
"type": "string"
|
||||
},
|
||||
"postalCode": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"streetAddress",
|
||||
"city",
|
||||
"state",
|
||||
"postalCode"
|
||||
]
|
||||
},
|
||||
"phoneNumbers": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string"
|
||||
},
|
||||
"number": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"type",
|
||||
"number"
|
||||
]
|
||||
}
|
||||
},
|
||||
"hobbies": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"firstName",
|
||||
"lastName",
|
||||
"age",
|
||||
"address",
|
||||
"phoneNumbers"
|
||||
]
|
||||
}
|
||||
"""
|
||||
|
||||
parsed_schema = parse_json_schema(sample_json_schema)
|
||||
parsed_postgres_schema = parse_json_schema(sample_postgres_json_schema, Column)
|
||||
parsed_array_schema = parse_json_schema(sample_array_json_schema)
|
||||
|
||||
def test_schema_name(self):
|
||||
self.assertEqual(self.parsed_schema[0].name.root, "Person")
|
||||
@ -143,3 +216,37 @@ class JsonSchemaParserTests(TestCase):
|
||||
)
|
||||
self.assertEqual(len(self.parsed_postgres_schema[0].children), 3)
|
||||
self.assertEqual(len(self.parsed_postgres_schema[0].children[1].children), 4)
|
||||
|
||||
def test_parse_postgres_json_fields(self):
|
||||
self.assertEqual(self.parsed_array_schema[0].name.root, "default")
|
||||
self.assertEqual(len(self.parsed_array_schema[0].children), 6)
|
||||
|
||||
# Validate the complex array datatype
|
||||
self.assertEqual(
|
||||
self.parsed_array_schema[0].children[4].name.root, "phoneNumbers"
|
||||
)
|
||||
self.assertEqual(self.parsed_array_schema[0].children[4].dataType.name, "ARRAY")
|
||||
self.assertEqual(
|
||||
self.parsed_array_schema[0].children[4].dataTypeDisplay, "ARRAY<RECORD>"
|
||||
)
|
||||
self.assertEqual(len(self.parsed_array_schema[0].children[4].children), 2)
|
||||
self.assertEqual(
|
||||
self.parsed_array_schema[0].children[4].children[0].name.root, "type"
|
||||
)
|
||||
self.assertEqual(
|
||||
self.parsed_array_schema[0].children[4].children[0].dataType.name, "STRING"
|
||||
)
|
||||
self.assertEqual(
|
||||
self.parsed_array_schema[0].children[4].children[1].name.root, "number"
|
||||
)
|
||||
self.assertEqual(
|
||||
self.parsed_array_schema[0].children[4].children[1].dataType.name, "STRING"
|
||||
)
|
||||
|
||||
# Validate the primitive array datatype
|
||||
self.assertEqual(self.parsed_array_schema[0].children[5].name.root, "hobbies")
|
||||
self.assertEqual(self.parsed_array_schema[0].children[5].dataType.name, "ARRAY")
|
||||
self.assertEqual(
|
||||
self.parsed_array_schema[0].children[5].dataTypeDisplay, "ARRAY<STRING>"
|
||||
)
|
||||
self.assertIsNone(self.parsed_array_schema[0].children[5].children)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user