mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-10-09 15:56:33 +00:00
Add OpenAPI YAML format support for REST API ingestion (#22304)
* Initial plan * Implement OpenAPI YAML support with backward JSON compatibility Co-authored-by: harshach <38649+harshach@users.noreply.github.com> * fix tests & lint --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: harshach <38649+harshach@users.noreply.github.com> Co-authored-by: ulixius9 <mayursingal9@gmail.com>
This commit is contained in:
parent
13d73de29f
commit
a4da357892
@ -28,6 +28,11 @@ from metadata.generated.schema.entity.services.connections.testConnectionResult
|
|||||||
)
|
)
|
||||||
from metadata.ingestion.connections.test_connections import test_connection_steps
|
from metadata.ingestion.connections.test_connections import test_connection_steps
|
||||||
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
||||||
|
from metadata.ingestion.source.api.rest.parser import (
|
||||||
|
OpenAPIParseError,
|
||||||
|
parse_openapi_schema,
|
||||||
|
validate_openapi_schema,
|
||||||
|
)
|
||||||
from metadata.utils.constants import THREE_MIN
|
from metadata.utils.constants import THREE_MIN
|
||||||
|
|
||||||
|
|
||||||
@ -75,16 +80,17 @@ def test_connection(
|
|||||||
|
|
||||||
def custom_schema_exec():
|
def custom_schema_exec():
|
||||||
try:
|
try:
|
||||||
if client.json() is not None and client.json().get("openapi") is not None:
|
schema = parse_openapi_schema(client)
|
||||||
|
if validate_openapi_schema(schema):
|
||||||
return []
|
return []
|
||||||
|
|
||||||
raise InvalidOpenAPISchemaError(
|
raise InvalidOpenAPISchemaError(
|
||||||
"Provided schema is not valid OpenAPI JSON schema"
|
"Provided schema is not valid OpenAPI specification"
|
||||||
)
|
|
||||||
except:
|
|
||||||
raise InvalidOpenAPISchemaError(
|
|
||||||
"Provided schema is not valid OpenAPI JSON schema"
|
|
||||||
)
|
)
|
||||||
|
except OpenAPIParseError as e:
|
||||||
|
raise InvalidOpenAPISchemaError(f"Failed to parse OpenAPI schema: {e}")
|
||||||
|
except Exception as e:
|
||||||
|
raise InvalidOpenAPISchemaError(f"Error validating OpenAPI schema: {e}")
|
||||||
|
|
||||||
test_fn = {"CheckURL": custom_url_exec, "CheckSchema": custom_schema_exec}
|
test_fn = {"CheckURL": custom_url_exec, "CheckSchema": custom_schema_exec}
|
||||||
|
|
||||||
|
@ -40,6 +40,7 @@ from metadata.ingestion.api.steps import InvalidSourceException
|
|||||||
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
||||||
from metadata.ingestion.source.api.api_service import ApiServiceSource
|
from metadata.ingestion.source.api.api_service import ApiServiceSource
|
||||||
from metadata.ingestion.source.api.rest.models import RESTCollection, RESTEndpoint
|
from metadata.ingestion.source.api.rest.models import RESTCollection, RESTEndpoint
|
||||||
|
from metadata.ingestion.source.api.rest.parser import parse_openapi_schema
|
||||||
from metadata.utils import fqn
|
from metadata.utils import fqn
|
||||||
from metadata.utils.filters import filter_by_collection
|
from metadata.utils.filters import filter_by_collection
|
||||||
from metadata.utils.helpers import clean_uri
|
from metadata.utils.helpers import clean_uri
|
||||||
@ -77,7 +78,7 @@ class RestSource(ApiServiceSource):
|
|||||||
Here is where filtering happens
|
Here is where filtering happens
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
self.json_response = self.connection.json()
|
self.json_response = parse_openapi_schema(self.connection)
|
||||||
collections_list = []
|
collections_list = []
|
||||||
tags_collection_set = set()
|
tags_collection_set = set()
|
||||||
if self.json_response.get("tags", []):
|
if self.json_response.get("tags", []):
|
||||||
|
103
ingestion/src/metadata/ingestion/source/api/rest/parser.py
Normal file
103
ingestion/src/metadata/ingestion/source/api/rest/parser.py
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
# Copyright 2024 Collate
|
||||||
|
# Licensed under the Collate Community License, Version 1.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
"""
|
||||||
|
OpenAPI schema parser for both JSON and YAML formats
|
||||||
|
"""
|
||||||
|
import json
|
||||||
|
from typing import Any, Dict
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
from requests.models import Response
|
||||||
|
|
||||||
|
from metadata.utils.logger import ingestion_logger
|
||||||
|
|
||||||
|
logger = ingestion_logger()
|
||||||
|
|
||||||
|
|
||||||
|
class OpenAPIParseError(Exception):
|
||||||
|
"""
|
||||||
|
Exception raised when OpenAPI schema cannot be parsed as either JSON or YAML
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def parse_openapi_schema(response: Response) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Parse OpenAPI schema from HTTP response.
|
||||||
|
Supports both JSON and YAML formats.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
response: HTTP response containing OpenAPI schema
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Parsed OpenAPI schema as dictionary
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
OpenAPIParseError: If content cannot be parsed as either JSON or YAML
|
||||||
|
"""
|
||||||
|
content = response.text
|
||||||
|
content_type = response.headers.get("content-type", "").lower()
|
||||||
|
|
||||||
|
# Try to determine format from content-type header
|
||||||
|
if "json" in content_type:
|
||||||
|
try:
|
||||||
|
return json.loads(content)
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
logger.warning(f"Failed to parse as JSON despite content-type: {e}")
|
||||||
|
elif "yaml" in content_type or "yml" in content_type:
|
||||||
|
try:
|
||||||
|
return yaml.safe_load(content)
|
||||||
|
except yaml.YAMLError as e:
|
||||||
|
logger.warning(f"Failed to parse as YAML despite content-type: {e}")
|
||||||
|
|
||||||
|
# If content-type is not definitive or parsing failed, try both formats
|
||||||
|
|
||||||
|
# First try JSON (backward compatibility)
|
||||||
|
try:
|
||||||
|
parsed = json.loads(content)
|
||||||
|
logger.debug("Successfully parsed OpenAPI schema as JSON")
|
||||||
|
return parsed
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
logger.debug("Content is not valid JSON, trying YAML")
|
||||||
|
|
||||||
|
# Then try YAML
|
||||||
|
try:
|
||||||
|
parsed = yaml.safe_load(content)
|
||||||
|
if parsed is None:
|
||||||
|
raise OpenAPIParseError("YAML parsing returned None")
|
||||||
|
logger.debug("Successfully parsed OpenAPI schema as YAML")
|
||||||
|
return parsed
|
||||||
|
except yaml.YAMLError as e:
|
||||||
|
logger.error(f"Failed to parse as YAML: {e}")
|
||||||
|
|
||||||
|
# If both formats fail, raise an error
|
||||||
|
raise OpenAPIParseError(
|
||||||
|
"Failed to parse OpenAPI schema as either JSON or YAML. "
|
||||||
|
"Please ensure the schema is valid OpenAPI specification."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def validate_openapi_schema(schema: Dict[str, Any]) -> bool:
|
||||||
|
"""
|
||||||
|
Validate that the parsed schema is a valid OpenAPI specification.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
schema: Parsed schema dictionary
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if schema appears to be valid OpenAPI, False otherwise
|
||||||
|
"""
|
||||||
|
# Check for required OpenAPI fields
|
||||||
|
if not isinstance(schema, dict):
|
||||||
|
return False
|
||||||
|
|
||||||
|
# OpenAPI 3.x uses "openapi" field, OpenAPI 2.x uses "swagger" field
|
||||||
|
return schema.get("openapi") is not None or schema.get("swagger") is not None
|
200
ingestion/tests/unit/topology/api/test_openapi_parser.py
Normal file
200
ingestion/tests/unit/topology/api/test_openapi_parser.py
Normal file
@ -0,0 +1,200 @@
|
|||||||
|
# Copyright 2024 Collate
|
||||||
|
# Licensed under the Collate Community License, Version 1.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
"""
|
||||||
|
Test OpenAPI schema parser for both JSON and YAML formats
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
from unittest import TestCase
|
||||||
|
from unittest.mock import Mock
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
from metadata.ingestion.source.api.rest.parser import (
|
||||||
|
OpenAPIParseError,
|
||||||
|
parse_openapi_schema,
|
||||||
|
validate_openapi_schema,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestOpenAPIParser(TestCase):
|
||||||
|
"""Test cases for OpenAPI schema parser"""
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
"""Set up test data"""
|
||||||
|
self.openapi_data = {
|
||||||
|
"openapi": "3.0.3",
|
||||||
|
"info": {"title": "Test API", "version": "1.0.0"},
|
||||||
|
"paths": {
|
||||||
|
"/pets": {
|
||||||
|
"get": {
|
||||||
|
"tags": ["pets"],
|
||||||
|
"summary": "List all pets",
|
||||||
|
"operationId": "listPets",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tags": [{"name": "pets", "description": "Everything about pets"}],
|
||||||
|
}
|
||||||
|
|
||||||
|
self.swagger_data = {
|
||||||
|
"swagger": "2.0",
|
||||||
|
"info": {"title": "Test API", "version": "1.0.0"},
|
||||||
|
"paths": {
|
||||||
|
"/pets": {
|
||||||
|
"get": {
|
||||||
|
"tags": ["pets"],
|
||||||
|
"summary": "List all pets",
|
||||||
|
"operationId": "listPets",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tags": [{"name": "pets", "description": "Everything about pets"}],
|
||||||
|
}
|
||||||
|
|
||||||
|
def test_parse_json_openapi_schema(self):
|
||||||
|
"""Test parsing valid JSON OpenAPI schema"""
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_response.text = json.dumps(self.openapi_data)
|
||||||
|
mock_response.headers = {"content-type": "application/json"}
|
||||||
|
|
||||||
|
result = parse_openapi_schema(mock_response)
|
||||||
|
|
||||||
|
self.assertEqual(result, self.openapi_data)
|
||||||
|
|
||||||
|
def test_parse_yaml_openapi_schema(self):
|
||||||
|
"""Test parsing valid YAML OpenAPI schema"""
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_response.text = yaml.dump(self.openapi_data)
|
||||||
|
mock_response.headers = {"content-type": "application/yaml"}
|
||||||
|
|
||||||
|
result = parse_openapi_schema(mock_response)
|
||||||
|
|
||||||
|
self.assertEqual(result, self.openapi_data)
|
||||||
|
|
||||||
|
def test_parse_json_without_content_type(self):
|
||||||
|
"""Test parsing JSON when content-type is not specified"""
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_response.text = json.dumps(self.openapi_data)
|
||||||
|
mock_response.headers = {}
|
||||||
|
|
||||||
|
result = parse_openapi_schema(mock_response)
|
||||||
|
|
||||||
|
self.assertEqual(result, self.openapi_data)
|
||||||
|
|
||||||
|
def test_parse_yaml_without_content_type(self):
|
||||||
|
"""Test parsing YAML when content-type is not specified"""
|
||||||
|
mock_response = Mock()
|
||||||
|
# Use YAML format that's not valid JSON to ensure YAML parser is used
|
||||||
|
yaml_content = """
|
||||||
|
openapi: 3.0.3
|
||||||
|
info:
|
||||||
|
title: Test API
|
||||||
|
version: 1.0.0
|
||||||
|
paths:
|
||||||
|
/pets:
|
||||||
|
get:
|
||||||
|
tags:
|
||||||
|
- pets
|
||||||
|
summary: List all pets
|
||||||
|
operationId: listPets
|
||||||
|
tags:
|
||||||
|
- name: pets
|
||||||
|
description: Everything about pets
|
||||||
|
"""
|
||||||
|
mock_response.text = yaml_content
|
||||||
|
mock_response.headers = {}
|
||||||
|
|
||||||
|
result = parse_openapi_schema(mock_response)
|
||||||
|
|
||||||
|
self.assertEqual(result["openapi"], "3.0.3")
|
||||||
|
self.assertEqual(result["info"]["title"], "Test API")
|
||||||
|
|
||||||
|
def test_parse_yml_content_type(self):
|
||||||
|
"""Test parsing with content-type: application/yml"""
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_response.text = yaml.dump(self.openapi_data)
|
||||||
|
mock_response.headers = {"content-type": "application/yml"}
|
||||||
|
|
||||||
|
result = parse_openapi_schema(mock_response)
|
||||||
|
|
||||||
|
self.assertEqual(result, self.openapi_data)
|
||||||
|
|
||||||
|
def test_parse_text_yaml_content_type(self):
|
||||||
|
"""Test parsing with content-type: text/yaml"""
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_response.text = yaml.dump(self.openapi_data)
|
||||||
|
mock_response.headers = {"content-type": "text/yaml"}
|
||||||
|
|
||||||
|
result = parse_openapi_schema(mock_response)
|
||||||
|
|
||||||
|
self.assertEqual(result, self.openapi_data)
|
||||||
|
|
||||||
|
def test_parse_empty_content(self):
|
||||||
|
"""Test parsing empty content"""
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_response.text = ""
|
||||||
|
mock_response.headers = {}
|
||||||
|
|
||||||
|
with self.assertRaises(OpenAPIParseError):
|
||||||
|
parse_openapi_schema(mock_response)
|
||||||
|
|
||||||
|
def test_parse_none_yaml_content(self):
|
||||||
|
"""Test parsing YAML that returns None"""
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_response.text = "# This is just a comment\n"
|
||||||
|
mock_response.headers = {}
|
||||||
|
|
||||||
|
with self.assertRaises(OpenAPIParseError):
|
||||||
|
parse_openapi_schema(mock_response)
|
||||||
|
|
||||||
|
def test_validate_openapi_schema_valid_openapi3(self):
|
||||||
|
"""Test validation of valid OpenAPI 3.x schema"""
|
||||||
|
self.assertTrue(validate_openapi_schema(self.openapi_data))
|
||||||
|
|
||||||
|
def test_validate_openapi_schema_valid_swagger2(self):
|
||||||
|
"""Test validation of valid Swagger 2.0 schema"""
|
||||||
|
self.assertTrue(validate_openapi_schema(self.swagger_data))
|
||||||
|
|
||||||
|
def test_validate_openapi_schema_invalid_not_dict(self):
|
||||||
|
"""Test validation of invalid schema (not a dictionary)"""
|
||||||
|
self.assertFalse(validate_openapi_schema("not a dict"))
|
||||||
|
self.assertFalse(validate_openapi_schema(123))
|
||||||
|
self.assertFalse(validate_openapi_schema([]))
|
||||||
|
|
||||||
|
def test_validate_openapi_schema_invalid_missing_fields(self):
|
||||||
|
"""Test validation of invalid schema (missing required fields)"""
|
||||||
|
invalid_schema = {"info": {"title": "Test API", "version": "1.0.0"}}
|
||||||
|
self.assertFalse(validate_openapi_schema(invalid_schema))
|
||||||
|
|
||||||
|
def test_parse_json_with_wrong_content_type_fallback(self):
|
||||||
|
"""Test parsing JSON with wrong content-type, should fallback to JSON parser"""
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_response.text = json.dumps(self.openapi_data)
|
||||||
|
mock_response.headers = {"content-type": "text/plain"} # Wrong content-type
|
||||||
|
|
||||||
|
result = parse_openapi_schema(mock_response)
|
||||||
|
|
||||||
|
self.assertEqual(result, self.openapi_data)
|
||||||
|
|
||||||
|
def test_parse_yaml_with_wrong_content_type_fallback(self):
|
||||||
|
"""Test parsing YAML with wrong content-type, should fallback to YAML parser"""
|
||||||
|
mock_response = Mock()
|
||||||
|
# YAML that's not valid JSON
|
||||||
|
yaml_content = "openapi: '3.0.3'\ninfo:\n title: Test\n version: '1.0'"
|
||||||
|
mock_response.text = yaml_content
|
||||||
|
mock_response.headers = {"content-type": "text/plain"} # Wrong content-type
|
||||||
|
|
||||||
|
result = parse_openapi_schema(mock_response)
|
||||||
|
|
||||||
|
self.assertEqual(result["openapi"], "3.0.3")
|
||||||
|
self.assertEqual(result["info"]["title"], "Test")
|
@ -276,7 +276,7 @@ class RESTTest(TestCase):
|
|||||||
):
|
):
|
||||||
collections = list(self.rest_source.get_api_collections())
|
collections = list(self.rest_source.get_api_collections())
|
||||||
MOCK_COLLECTIONS_COPY = deepcopy(MOCK_COLLECTIONS)
|
MOCK_COLLECTIONS_COPY = deepcopy(MOCK_COLLECTIONS)
|
||||||
MOCK_COLLECTIONS_COPY[2].description = None
|
MOCK_COLLECTIONS_COPY[2].description = Markdown(root="Operations about user")
|
||||||
assert collections == MOCK_COLLECTIONS_COPY
|
assert collections == MOCK_COLLECTIONS_COPY
|
||||||
|
|
||||||
def test_generate_collection_url(self):
|
def test_generate_collection_url(self):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user