mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-12-13 16:46:59 +00:00
Add array supp for json schema parser (#18255)
This commit is contained in:
parent
69a58d2556
commit
2ee015e426
@ -16,7 +16,7 @@ Utils module to parse the jsonschema
|
|||||||
import json
|
import json
|
||||||
import traceback
|
import traceback
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import List, Optional, Type
|
from typing import List, Optional, Tuple, Type
|
||||||
|
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
@ -66,6 +66,49 @@ def parse_json_schema(
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def get_child_models(key, value, field_models, cls: Type[BaseModel] = FieldModel):
|
||||||
|
"""
|
||||||
|
Method to parse the child objects in the json schema
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
cls_obj = cls(
|
||||||
|
name=key,
|
||||||
|
displayName=value.get("title"),
|
||||||
|
dataType=JsonSchemaDataTypes(value.get("type", "unknown")).name,
|
||||||
|
description=value.get("description"),
|
||||||
|
)
|
||||||
|
children = None
|
||||||
|
if value.get("type") == JsonSchemaDataTypes.RECORD.value:
|
||||||
|
children = get_json_schema_fields(value.get("properties"), cls=cls)
|
||||||
|
if value.get("type") == JsonSchemaDataTypes.ARRAY.value:
|
||||||
|
datatype_display, children = get_json_schema_array_fields(
|
||||||
|
value.get("items"), cls=cls
|
||||||
|
)
|
||||||
|
cls_obj.dataTypeDisplay = f"ARRAY<{datatype_display}>"
|
||||||
|
cls_obj.children = children
|
||||||
|
field_models.append(cls_obj)
|
||||||
|
except Exception as exc: # pylint: disable=broad-except
|
||||||
|
logger.debug(traceback.format_exc())
|
||||||
|
logger.warning(f"Unable to parse the json schema into models: {exc}")
|
||||||
|
|
||||||
|
|
||||||
|
def get_json_schema_array_fields(
|
||||||
|
array_items, cls: Type[BaseModel] = FieldModel
|
||||||
|
) -> Optional[Tuple[str, List[FieldModel]]]:
|
||||||
|
"""
|
||||||
|
Recursively convert the parsed array schema into required models
|
||||||
|
"""
|
||||||
|
field_models = []
|
||||||
|
if array_items.get("type") == JsonSchemaDataTypes.RECORD.value:
|
||||||
|
for key, value in array_items.get("properties", {}).items():
|
||||||
|
get_child_models(key, value, field_models, cls)
|
||||||
|
|
||||||
|
return (
|
||||||
|
JsonSchemaDataTypes(array_items.get("type", "unknown")).name,
|
||||||
|
field_models or None,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def get_json_schema_fields(
|
def get_json_schema_fields(
|
||||||
properties, cls: Type[BaseModel] = FieldModel
|
properties, cls: Type[BaseModel] = FieldModel
|
||||||
) -> Optional[List[FieldModel]]:
|
) -> Optional[List[FieldModel]]:
|
||||||
@ -74,20 +117,6 @@ def get_json_schema_fields(
|
|||||||
"""
|
"""
|
||||||
field_models = []
|
field_models = []
|
||||||
for key, value in properties.items():
|
for key, value in properties.items():
|
||||||
try:
|
get_child_models(key, value, field_models, cls)
|
||||||
field_models.append(
|
|
||||||
cls(
|
|
||||||
name=key,
|
|
||||||
displayName=value.get("title"),
|
|
||||||
dataType=JsonSchemaDataTypes(value.get("type", "unknown")).name,
|
|
||||||
description=value.get("description"),
|
|
||||||
children=get_json_schema_fields(value.get("properties"), cls=cls)
|
|
||||||
if value.get("type") == "object"
|
|
||||||
else None,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
except Exception as exc: # pylint: disable=broad-except
|
|
||||||
logger.debug(traceback.format_exc())
|
|
||||||
logger.warning(f"Unable to parse the json schema into models: {exc}")
|
|
||||||
|
|
||||||
return field_models
|
return field_models
|
||||||
|
|||||||
@ -91,8 +91,81 @@ class JsonSchemaParserTests(TestCase):
|
|||||||
}
|
}
|
||||||
}"""
|
}"""
|
||||||
|
|
||||||
|
sample_array_json_schema = """
|
||||||
|
{
|
||||||
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"firstName": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"lastName": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"age": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"address": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"streetAddress": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"city": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"state": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"postalCode": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": [
|
||||||
|
"streetAddress",
|
||||||
|
"city",
|
||||||
|
"state",
|
||||||
|
"postalCode"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"phoneNumbers": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"type": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"number": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": [
|
||||||
|
"type",
|
||||||
|
"number"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"hobbies": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": [
|
||||||
|
"firstName",
|
||||||
|
"lastName",
|
||||||
|
"age",
|
||||||
|
"address",
|
||||||
|
"phoneNumbers"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
|
||||||
parsed_schema = parse_json_schema(sample_json_schema)
|
parsed_schema = parse_json_schema(sample_json_schema)
|
||||||
parsed_postgres_schema = parse_json_schema(sample_postgres_json_schema, Column)
|
parsed_postgres_schema = parse_json_schema(sample_postgres_json_schema, Column)
|
||||||
|
parsed_array_schema = parse_json_schema(sample_array_json_schema)
|
||||||
|
|
||||||
def test_schema_name(self):
|
def test_schema_name(self):
|
||||||
self.assertEqual(self.parsed_schema[0].name.root, "Person")
|
self.assertEqual(self.parsed_schema[0].name.root, "Person")
|
||||||
@ -143,3 +216,37 @@ class JsonSchemaParserTests(TestCase):
|
|||||||
)
|
)
|
||||||
self.assertEqual(len(self.parsed_postgres_schema[0].children), 3)
|
self.assertEqual(len(self.parsed_postgres_schema[0].children), 3)
|
||||||
self.assertEqual(len(self.parsed_postgres_schema[0].children[1].children), 4)
|
self.assertEqual(len(self.parsed_postgres_schema[0].children[1].children), 4)
|
||||||
|
|
||||||
|
def test_parse_postgres_json_fields(self):
|
||||||
|
self.assertEqual(self.parsed_array_schema[0].name.root, "default")
|
||||||
|
self.assertEqual(len(self.parsed_array_schema[0].children), 6)
|
||||||
|
|
||||||
|
# Validate the complex array datatype
|
||||||
|
self.assertEqual(
|
||||||
|
self.parsed_array_schema[0].children[4].name.root, "phoneNumbers"
|
||||||
|
)
|
||||||
|
self.assertEqual(self.parsed_array_schema[0].children[4].dataType.name, "ARRAY")
|
||||||
|
self.assertEqual(
|
||||||
|
self.parsed_array_schema[0].children[4].dataTypeDisplay, "ARRAY<RECORD>"
|
||||||
|
)
|
||||||
|
self.assertEqual(len(self.parsed_array_schema[0].children[4].children), 2)
|
||||||
|
self.assertEqual(
|
||||||
|
self.parsed_array_schema[0].children[4].children[0].name.root, "type"
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
self.parsed_array_schema[0].children[4].children[0].dataType.name, "STRING"
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
self.parsed_array_schema[0].children[4].children[1].name.root, "number"
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
self.parsed_array_schema[0].children[4].children[1].dataType.name, "STRING"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Validate the primitive array datatype
|
||||||
|
self.assertEqual(self.parsed_array_schema[0].children[5].name.root, "hobbies")
|
||||||
|
self.assertEqual(self.parsed_array_schema[0].children[5].dataType.name, "ARRAY")
|
||||||
|
self.assertEqual(
|
||||||
|
self.parsed_array_schema[0].children[5].dataTypeDisplay, "ARRAY<STRING>"
|
||||||
|
)
|
||||||
|
self.assertIsNone(self.parsed_array_schema[0].children[5].children)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user