mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-10-08 15:26:39 +00:00
Add OpenAPI YAML format support for REST API ingestion (#22304)
* Initial plan * Implement OpenAPI YAML support with backward JSON compatibility Co-authored-by: harshach <38649+harshach@users.noreply.github.com> * fix tests & lint --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: harshach <38649+harshach@users.noreply.github.com> Co-authored-by: ulixius9 <mayursingal9@gmail.com>
This commit is contained in:
parent
13d73de29f
commit
a4da357892
@ -28,6 +28,11 @@ from metadata.generated.schema.entity.services.connections.testConnectionResult
|
||||
)
|
||||
from metadata.ingestion.connections.test_connections import test_connection_steps
|
||||
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
||||
from metadata.ingestion.source.api.rest.parser import (
|
||||
OpenAPIParseError,
|
||||
parse_openapi_schema,
|
||||
validate_openapi_schema,
|
||||
)
|
||||
from metadata.utils.constants import THREE_MIN
|
||||
|
||||
|
||||
@ -75,16 +80,17 @@ def test_connection(
|
||||
|
||||
def custom_schema_exec():
|
||||
try:
|
||||
if client.json() is not None and client.json().get("openapi") is not None:
|
||||
schema = parse_openapi_schema(client)
|
||||
if validate_openapi_schema(schema):
|
||||
return []
|
||||
|
||||
raise InvalidOpenAPISchemaError(
|
||||
"Provided schema is not valid OpenAPI JSON schema"
|
||||
)
|
||||
except:
|
||||
raise InvalidOpenAPISchemaError(
|
||||
"Provided schema is not valid OpenAPI JSON schema"
|
||||
"Provided schema is not valid OpenAPI specification"
|
||||
)
|
||||
except OpenAPIParseError as e:
|
||||
raise InvalidOpenAPISchemaError(f"Failed to parse OpenAPI schema: {e}")
|
||||
except Exception as e:
|
||||
raise InvalidOpenAPISchemaError(f"Error validating OpenAPI schema: {e}")
|
||||
|
||||
test_fn = {"CheckURL": custom_url_exec, "CheckSchema": custom_schema_exec}
|
||||
|
||||
|
@ -40,6 +40,7 @@ from metadata.ingestion.api.steps import InvalidSourceException
|
||||
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
||||
from metadata.ingestion.source.api.api_service import ApiServiceSource
|
||||
from metadata.ingestion.source.api.rest.models import RESTCollection, RESTEndpoint
|
||||
from metadata.ingestion.source.api.rest.parser import parse_openapi_schema
|
||||
from metadata.utils import fqn
|
||||
from metadata.utils.filters import filter_by_collection
|
||||
from metadata.utils.helpers import clean_uri
|
||||
@ -77,7 +78,7 @@ class RestSource(ApiServiceSource):
|
||||
Here is where filtering happens
|
||||
"""
|
||||
try:
|
||||
self.json_response = self.connection.json()
|
||||
self.json_response = parse_openapi_schema(self.connection)
|
||||
collections_list = []
|
||||
tags_collection_set = set()
|
||||
if self.json_response.get("tags", []):
|
||||
|
103
ingestion/src/metadata/ingestion/source/api/rest/parser.py
Normal file
103
ingestion/src/metadata/ingestion/source/api/rest/parser.py
Normal file
@ -0,0 +1,103 @@
|
||||
# Copyright 2024 Collate
|
||||
# Licensed under the Collate Community License, Version 1.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""
|
||||
OpenAPI schema parser for both JSON and YAML formats
|
||||
"""
|
||||
import json
|
||||
from typing import Any, Dict
|
||||
|
||||
import yaml
|
||||
from requests.models import Response
|
||||
|
||||
from metadata.utils.logger import ingestion_logger
|
||||
|
||||
logger = ingestion_logger()
|
||||
|
||||
|
||||
class OpenAPIParseError(Exception):
|
||||
"""
|
||||
Exception raised when OpenAPI schema cannot be parsed as either JSON or YAML
|
||||
"""
|
||||
|
||||
|
||||
def parse_openapi_schema(response: Response) -> Dict[str, Any]:
|
||||
"""
|
||||
Parse OpenAPI schema from HTTP response.
|
||||
Supports both JSON and YAML formats.
|
||||
|
||||
Args:
|
||||
response: HTTP response containing OpenAPI schema
|
||||
|
||||
Returns:
|
||||
Parsed OpenAPI schema as dictionary
|
||||
|
||||
Raises:
|
||||
OpenAPIParseError: If content cannot be parsed as either JSON or YAML
|
||||
"""
|
||||
content = response.text
|
||||
content_type = response.headers.get("content-type", "").lower()
|
||||
|
||||
# Try to determine format from content-type header
|
||||
if "json" in content_type:
|
||||
try:
|
||||
return json.loads(content)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"Failed to parse as JSON despite content-type: {e}")
|
||||
elif "yaml" in content_type or "yml" in content_type:
|
||||
try:
|
||||
return yaml.safe_load(content)
|
||||
except yaml.YAMLError as e:
|
||||
logger.warning(f"Failed to parse as YAML despite content-type: {e}")
|
||||
|
||||
# If content-type is not definitive or parsing failed, try both formats
|
||||
|
||||
# First try JSON (backward compatibility)
|
||||
try:
|
||||
parsed = json.loads(content)
|
||||
logger.debug("Successfully parsed OpenAPI schema as JSON")
|
||||
return parsed
|
||||
except json.JSONDecodeError:
|
||||
logger.debug("Content is not valid JSON, trying YAML")
|
||||
|
||||
# Then try YAML
|
||||
try:
|
||||
parsed = yaml.safe_load(content)
|
||||
if parsed is None:
|
||||
raise OpenAPIParseError("YAML parsing returned None")
|
||||
logger.debug("Successfully parsed OpenAPI schema as YAML")
|
||||
return parsed
|
||||
except yaml.YAMLError as e:
|
||||
logger.error(f"Failed to parse as YAML: {e}")
|
||||
|
||||
# If both formats fail, raise an error
|
||||
raise OpenAPIParseError(
|
||||
"Failed to parse OpenAPI schema as either JSON or YAML. "
|
||||
"Please ensure the schema is valid OpenAPI specification."
|
||||
)
|
||||
|
||||
|
||||
def validate_openapi_schema(schema: Dict[str, Any]) -> bool:
|
||||
"""
|
||||
Validate that the parsed schema is a valid OpenAPI specification.
|
||||
|
||||
Args:
|
||||
schema: Parsed schema dictionary
|
||||
|
||||
Returns:
|
||||
True if schema appears to be valid OpenAPI, False otherwise
|
||||
"""
|
||||
# Check for required OpenAPI fields
|
||||
if not isinstance(schema, dict):
|
||||
return False
|
||||
|
||||
# OpenAPI 3.x uses "openapi" field, OpenAPI 2.x uses "swagger" field
|
||||
return schema.get("openapi") is not None or schema.get("swagger") is not None
|
200
ingestion/tests/unit/topology/api/test_openapi_parser.py
Normal file
200
ingestion/tests/unit/topology/api/test_openapi_parser.py
Normal file
@ -0,0 +1,200 @@
|
||||
# Copyright 2024 Collate
|
||||
# Licensed under the Collate Community License, Version 1.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""
|
||||
Test OpenAPI schema parser for both JSON and YAML formats
|
||||
"""
|
||||
|
||||
import json
|
||||
from unittest import TestCase
|
||||
from unittest.mock import Mock
|
||||
|
||||
import yaml
|
||||
|
||||
from metadata.ingestion.source.api.rest.parser import (
|
||||
OpenAPIParseError,
|
||||
parse_openapi_schema,
|
||||
validate_openapi_schema,
|
||||
)
|
||||
|
||||
|
||||
class TestOpenAPIParser(TestCase):
|
||||
"""Test cases for OpenAPI schema parser"""
|
||||
|
||||
def setUp(self):
|
||||
"""Set up test data"""
|
||||
self.openapi_data = {
|
||||
"openapi": "3.0.3",
|
||||
"info": {"title": "Test API", "version": "1.0.0"},
|
||||
"paths": {
|
||||
"/pets": {
|
||||
"get": {
|
||||
"tags": ["pets"],
|
||||
"summary": "List all pets",
|
||||
"operationId": "listPets",
|
||||
}
|
||||
}
|
||||
},
|
||||
"tags": [{"name": "pets", "description": "Everything about pets"}],
|
||||
}
|
||||
|
||||
self.swagger_data = {
|
||||
"swagger": "2.0",
|
||||
"info": {"title": "Test API", "version": "1.0.0"},
|
||||
"paths": {
|
||||
"/pets": {
|
||||
"get": {
|
||||
"tags": ["pets"],
|
||||
"summary": "List all pets",
|
||||
"operationId": "listPets",
|
||||
}
|
||||
}
|
||||
},
|
||||
"tags": [{"name": "pets", "description": "Everything about pets"}],
|
||||
}
|
||||
|
||||
def test_parse_json_openapi_schema(self):
|
||||
"""Test parsing valid JSON OpenAPI schema"""
|
||||
mock_response = Mock()
|
||||
mock_response.text = json.dumps(self.openapi_data)
|
||||
mock_response.headers = {"content-type": "application/json"}
|
||||
|
||||
result = parse_openapi_schema(mock_response)
|
||||
|
||||
self.assertEqual(result, self.openapi_data)
|
||||
|
||||
def test_parse_yaml_openapi_schema(self):
|
||||
"""Test parsing valid YAML OpenAPI schema"""
|
||||
mock_response = Mock()
|
||||
mock_response.text = yaml.dump(self.openapi_data)
|
||||
mock_response.headers = {"content-type": "application/yaml"}
|
||||
|
||||
result = parse_openapi_schema(mock_response)
|
||||
|
||||
self.assertEqual(result, self.openapi_data)
|
||||
|
||||
def test_parse_json_without_content_type(self):
|
||||
"""Test parsing JSON when content-type is not specified"""
|
||||
mock_response = Mock()
|
||||
mock_response.text = json.dumps(self.openapi_data)
|
||||
mock_response.headers = {}
|
||||
|
||||
result = parse_openapi_schema(mock_response)
|
||||
|
||||
self.assertEqual(result, self.openapi_data)
|
||||
|
||||
def test_parse_yaml_without_content_type(self):
|
||||
"""Test parsing YAML when content-type is not specified"""
|
||||
mock_response = Mock()
|
||||
# Use YAML format that's not valid JSON to ensure YAML parser is used
|
||||
yaml_content = """
|
||||
openapi: 3.0.3
|
||||
info:
|
||||
title: Test API
|
||||
version: 1.0.0
|
||||
paths:
|
||||
/pets:
|
||||
get:
|
||||
tags:
|
||||
- pets
|
||||
summary: List all pets
|
||||
operationId: listPets
|
||||
tags:
|
||||
- name: pets
|
||||
description: Everything about pets
|
||||
"""
|
||||
mock_response.text = yaml_content
|
||||
mock_response.headers = {}
|
||||
|
||||
result = parse_openapi_schema(mock_response)
|
||||
|
||||
self.assertEqual(result["openapi"], "3.0.3")
|
||||
self.assertEqual(result["info"]["title"], "Test API")
|
||||
|
||||
def test_parse_yml_content_type(self):
|
||||
"""Test parsing with content-type: application/yml"""
|
||||
mock_response = Mock()
|
||||
mock_response.text = yaml.dump(self.openapi_data)
|
||||
mock_response.headers = {"content-type": "application/yml"}
|
||||
|
||||
result = parse_openapi_schema(mock_response)
|
||||
|
||||
self.assertEqual(result, self.openapi_data)
|
||||
|
||||
def test_parse_text_yaml_content_type(self):
|
||||
"""Test parsing with content-type: text/yaml"""
|
||||
mock_response = Mock()
|
||||
mock_response.text = yaml.dump(self.openapi_data)
|
||||
mock_response.headers = {"content-type": "text/yaml"}
|
||||
|
||||
result = parse_openapi_schema(mock_response)
|
||||
|
||||
self.assertEqual(result, self.openapi_data)
|
||||
|
||||
def test_parse_empty_content(self):
|
||||
"""Test parsing empty content"""
|
||||
mock_response = Mock()
|
||||
mock_response.text = ""
|
||||
mock_response.headers = {}
|
||||
|
||||
with self.assertRaises(OpenAPIParseError):
|
||||
parse_openapi_schema(mock_response)
|
||||
|
||||
def test_parse_none_yaml_content(self):
|
||||
"""Test parsing YAML that returns None"""
|
||||
mock_response = Mock()
|
||||
mock_response.text = "# This is just a comment\n"
|
||||
mock_response.headers = {}
|
||||
|
||||
with self.assertRaises(OpenAPIParseError):
|
||||
parse_openapi_schema(mock_response)
|
||||
|
||||
def test_validate_openapi_schema_valid_openapi3(self):
|
||||
"""Test validation of valid OpenAPI 3.x schema"""
|
||||
self.assertTrue(validate_openapi_schema(self.openapi_data))
|
||||
|
||||
def test_validate_openapi_schema_valid_swagger2(self):
|
||||
"""Test validation of valid Swagger 2.0 schema"""
|
||||
self.assertTrue(validate_openapi_schema(self.swagger_data))
|
||||
|
||||
def test_validate_openapi_schema_invalid_not_dict(self):
|
||||
"""Test validation of invalid schema (not a dictionary)"""
|
||||
self.assertFalse(validate_openapi_schema("not a dict"))
|
||||
self.assertFalse(validate_openapi_schema(123))
|
||||
self.assertFalse(validate_openapi_schema([]))
|
||||
|
||||
def test_validate_openapi_schema_invalid_missing_fields(self):
|
||||
"""Test validation of invalid schema (missing required fields)"""
|
||||
invalid_schema = {"info": {"title": "Test API", "version": "1.0.0"}}
|
||||
self.assertFalse(validate_openapi_schema(invalid_schema))
|
||||
|
||||
def test_parse_json_with_wrong_content_type_fallback(self):
|
||||
"""Test parsing JSON with wrong content-type, should fallback to JSON parser"""
|
||||
mock_response = Mock()
|
||||
mock_response.text = json.dumps(self.openapi_data)
|
||||
mock_response.headers = {"content-type": "text/plain"} # Wrong content-type
|
||||
|
||||
result = parse_openapi_schema(mock_response)
|
||||
|
||||
self.assertEqual(result, self.openapi_data)
|
||||
|
||||
def test_parse_yaml_with_wrong_content_type_fallback(self):
|
||||
"""Test parsing YAML with wrong content-type, should fallback to YAML parser"""
|
||||
mock_response = Mock()
|
||||
# YAML that's not valid JSON
|
||||
yaml_content = "openapi: '3.0.3'\ninfo:\n title: Test\n version: '1.0'"
|
||||
mock_response.text = yaml_content
|
||||
mock_response.headers = {"content-type": "text/plain"} # Wrong content-type
|
||||
|
||||
result = parse_openapi_schema(mock_response)
|
||||
|
||||
self.assertEqual(result["openapi"], "3.0.3")
|
||||
self.assertEqual(result["info"]["title"], "Test")
|
@ -276,7 +276,7 @@ class RESTTest(TestCase):
|
||||
):
|
||||
collections = list(self.rest_source.get_api_collections())
|
||||
MOCK_COLLECTIONS_COPY = deepcopy(MOCK_COLLECTIONS)
|
||||
MOCK_COLLECTIONS_COPY[2].description = None
|
||||
MOCK_COLLECTIONS_COPY[2].description = Markdown(root="Operations about user")
|
||||
assert collections == MOCK_COLLECTIONS_COPY
|
||||
|
||||
def test_generate_collection_url(self):
|
||||
|
Loading…
x
Reference in New Issue
Block a user