Add array supp for json schema parser (#18255)

This commit is contained in:
Onkar Ravgan 2024-10-15 07:30:16 +05:30 committed by GitHub
parent 69a58d2556
commit 2ee015e426
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 152 additions and 16 deletions

View File

@ -16,7 +16,7 @@ Utils module to parse the jsonschema
import json
import traceback
from enum import Enum
from typing import List, Optional, Type
from typing import List, Optional, Tuple, Type
from pydantic import BaseModel
@ -66,6 +66,49 @@ def parse_json_schema(
return None
def get_child_models(key, value, field_models, cls: Type[BaseModel] = FieldModel):
"""
Method to parse the child objects in the json schema
"""
try:
cls_obj = cls(
name=key,
displayName=value.get("title"),
dataType=JsonSchemaDataTypes(value.get("type", "unknown")).name,
description=value.get("description"),
)
children = None
if value.get("type") == JsonSchemaDataTypes.RECORD.value:
children = get_json_schema_fields(value.get("properties"), cls=cls)
if value.get("type") == JsonSchemaDataTypes.ARRAY.value:
datatype_display, children = get_json_schema_array_fields(
value.get("items"), cls=cls
)
cls_obj.dataTypeDisplay = f"ARRAY<{datatype_display}>"
cls_obj.children = children
field_models.append(cls_obj)
except Exception as exc: # pylint: disable=broad-except
logger.debug(traceback.format_exc())
logger.warning(f"Unable to parse the json schema into models: {exc}")
def get_json_schema_array_fields(
array_items, cls: Type[BaseModel] = FieldModel
) -> Optional[Tuple[str, List[FieldModel]]]:
"""
Recursively convert the parsed array schema into required models
"""
field_models = []
if array_items.get("type") == JsonSchemaDataTypes.RECORD.value:
for key, value in array_items.get("properties", {}).items():
get_child_models(key, value, field_models, cls)
return (
JsonSchemaDataTypes(array_items.get("type", "unknown")).name,
field_models or None,
)
def get_json_schema_fields(
properties, cls: Type[BaseModel] = FieldModel
) -> Optional[List[FieldModel]]:
@ -74,20 +117,6 @@ def get_json_schema_fields(
"""
field_models = []
for key, value in properties.items():
try:
field_models.append(
cls(
name=key,
displayName=value.get("title"),
dataType=JsonSchemaDataTypes(value.get("type", "unknown")).name,
description=value.get("description"),
children=get_json_schema_fields(value.get("properties"), cls=cls)
if value.get("type") == "object"
else None,
)
)
except Exception as exc: # pylint: disable=broad-except
logger.debug(traceback.format_exc())
logger.warning(f"Unable to parse the json schema into models: {exc}")
get_child_models(key, value, field_models, cls)
return field_models

View File

@ -91,8 +91,81 @@ class JsonSchemaParserTests(TestCase):
}
}"""
sample_array_json_schema = """
{
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"properties": {
"firstName": {
"type": "string"
},
"lastName": {
"type": "string"
},
"age": {
"type": "integer"
},
"address": {
"type": "object",
"properties": {
"streetAddress": {
"type": "string"
},
"city": {
"type": "string"
},
"state": {
"type": "string"
},
"postalCode": {
"type": "string"
}
},
"required": [
"streetAddress",
"city",
"state",
"postalCode"
]
},
"phoneNumbers": {
"type": "array",
"items": {
"type": "object",
"properties": {
"type": {
"type": "string"
},
"number": {
"type": "string"
}
},
"required": [
"type",
"number"
]
}
},
"hobbies": {
"type": "array",
"items": {
"type": "string"
}
}
},
"required": [
"firstName",
"lastName",
"age",
"address",
"phoneNumbers"
]
}
"""
parsed_schema = parse_json_schema(sample_json_schema)
parsed_postgres_schema = parse_json_schema(sample_postgres_json_schema, Column)
parsed_array_schema = parse_json_schema(sample_array_json_schema)
def test_schema_name(self):
self.assertEqual(self.parsed_schema[0].name.root, "Person")
@ -143,3 +216,37 @@ class JsonSchemaParserTests(TestCase):
)
self.assertEqual(len(self.parsed_postgres_schema[0].children), 3)
self.assertEqual(len(self.parsed_postgres_schema[0].children[1].children), 4)
def test_parse_postgres_json_fields(self):
self.assertEqual(self.parsed_array_schema[0].name.root, "default")
self.assertEqual(len(self.parsed_array_schema[0].children), 6)
# Validate the complex array datatype
self.assertEqual(
self.parsed_array_schema[0].children[4].name.root, "phoneNumbers"
)
self.assertEqual(self.parsed_array_schema[0].children[4].dataType.name, "ARRAY")
self.assertEqual(
self.parsed_array_schema[0].children[4].dataTypeDisplay, "ARRAY<RECORD>"
)
self.assertEqual(len(self.parsed_array_schema[0].children[4].children), 2)
self.assertEqual(
self.parsed_array_schema[0].children[4].children[0].name.root, "type"
)
self.assertEqual(
self.parsed_array_schema[0].children[4].children[0].dataType.name, "STRING"
)
self.assertEqual(
self.parsed_array_schema[0].children[4].children[1].name.root, "number"
)
self.assertEqual(
self.parsed_array_schema[0].children[4].children[1].dataType.name, "STRING"
)
# Validate the primitive array datatype
self.assertEqual(self.parsed_array_schema[0].children[5].name.root, "hobbies")
self.assertEqual(self.parsed_array_schema[0].children[5].dataType.name, "ARRAY")
self.assertEqual(
self.parsed_array_schema[0].children[5].dataTypeDisplay, "ARRAY<STRING>"
)
self.assertIsNone(self.parsed_array_schema[0].children[5].children)