dify/api/tests/unit_tests/services/test_metadata_bug_complete.py
lyzno1 0ea010d7ee
fix: metadata API nullable validation consistency issue (#23133)
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2025-07-30 10:33:24 +08:00

190 lines
8.4 KiB
Python

from unittest.mock import Mock, patch
import pytest
from flask_restful import reqparse
from werkzeug.exceptions import BadRequest
from services.entities.knowledge_entities.knowledge_entities import MetadataArgs
from services.metadata_service import MetadataService
class TestMetadataBugCompleteValidation:
"""Complete test suite to verify the metadata nullable bug and its fix."""
def test_1_pydantic_layer_validation(self):
"""Test Layer 1: Pydantic model validation correctly rejects None values."""
# Pydantic should reject None values for required fields
with pytest.raises((ValueError, TypeError)):
MetadataArgs(type=None, name=None)
with pytest.raises((ValueError, TypeError)):
MetadataArgs(type="string", name=None)
with pytest.raises((ValueError, TypeError)):
MetadataArgs(type=None, name="test")
# Valid values should work
valid_args = MetadataArgs(type="string", name="test_name")
assert valid_args.type == "string"
assert valid_args.name == "test_name"
def test_2_business_logic_layer_crashes_on_none(self):
"""Test Layer 2: Business logic crashes when None values slip through."""
# Create mock that bypasses Pydantic validation
mock_metadata_args = Mock()
mock_metadata_args.name = None
mock_metadata_args.type = "string"
with patch("services.metadata_service.current_user") as mock_user:
mock_user.current_tenant_id = "tenant-123"
mock_user.id = "user-456"
# Should crash with TypeError
with pytest.raises(TypeError, match="object of type 'NoneType' has no len"):
MetadataService.create_metadata("dataset-123", mock_metadata_args)
# Test update method as well
with patch("services.metadata_service.current_user") as mock_user:
mock_user.current_tenant_id = "tenant-123"
mock_user.id = "user-456"
with pytest.raises(TypeError, match="object of type 'NoneType' has no len"):
MetadataService.update_metadata_name("dataset-123", "metadata-456", None)
def test_3_database_constraints_verification(self):
"""Test Layer 3: Verify database model has nullable=False constraints."""
from sqlalchemy import inspect
from models.dataset import DatasetMetadata
# Get table info
mapper = inspect(DatasetMetadata)
# Check that type and name columns are not nullable
type_column = mapper.columns["type"]
name_column = mapper.columns["name"]
assert type_column.nullable is False, "type column should be nullable=False"
assert name_column.nullable is False, "name column should be nullable=False"
def test_4_fixed_api_layer_rejects_null(self, app):
"""Test Layer 4: Fixed API configuration properly rejects null values."""
# Test Console API create endpoint (fixed)
parser = reqparse.RequestParser()
parser.add_argument("type", type=str, required=True, nullable=False, location="json")
parser.add_argument("name", type=str, required=True, nullable=False, location="json")
with app.test_request_context(json={"type": None, "name": None}, content_type="application/json"):
with pytest.raises(BadRequest):
parser.parse_args()
# Test with just name being null
with app.test_request_context(json={"type": "string", "name": None}, content_type="application/json"):
with pytest.raises(BadRequest):
parser.parse_args()
# Test with just type being null
with app.test_request_context(json={"type": None, "name": "test"}, content_type="application/json"):
with pytest.raises(BadRequest):
parser.parse_args()
def test_5_fixed_api_accepts_valid_values(self, app):
"""Test that fixed API still accepts valid non-null values."""
parser = reqparse.RequestParser()
parser.add_argument("type", type=str, required=True, nullable=False, location="json")
parser.add_argument("name", type=str, required=True, nullable=False, location="json")
with app.test_request_context(json={"type": "string", "name": "valid_name"}, content_type="application/json"):
args = parser.parse_args()
assert args["type"] == "string"
assert args["name"] == "valid_name"
def test_6_simulated_buggy_behavior(self, app):
"""Test simulating the original buggy behavior with nullable=True."""
# Simulate the old buggy configuration
buggy_parser = reqparse.RequestParser()
buggy_parser.add_argument("type", type=str, required=True, nullable=True, location="json")
buggy_parser.add_argument("name", type=str, required=True, nullable=True, location="json")
with app.test_request_context(json={"type": None, "name": None}, content_type="application/json"):
# This would pass in the buggy version
args = buggy_parser.parse_args()
assert args["type"] is None
assert args["name"] is None
# But would crash when trying to create MetadataArgs
with pytest.raises((ValueError, TypeError)):
MetadataArgs(**args)
def test_7_end_to_end_validation_layers(self):
"""Test all validation layers work together correctly."""
# Layer 1: API should reject null at parameter level (with fix)
# Layer 2: Pydantic should reject null at model level
# Layer 3: Business logic expects non-null
# Layer 4: Database enforces non-null
# Test that valid data flows through all layers
valid_data = {"type": "string", "name": "test_metadata"}
# Should create valid Pydantic object
metadata_args = MetadataArgs(**valid_data)
assert metadata_args.type == "string"
assert metadata_args.name == "test_metadata"
# Should not crash in business logic length check
assert len(metadata_args.name) <= 255 # This should not crash
assert len(metadata_args.type) > 0 # This should not crash
def test_8_verify_specific_fix_locations(self):
"""Verify that the specific locations mentioned in bug report are fixed."""
# Read the actual files to verify fixes
import os
# Console API create
console_create_file = "api/controllers/console/datasets/metadata.py"
if os.path.exists(console_create_file):
with open(console_create_file) as f:
content = f.read()
# Should contain nullable=False, not nullable=True
assert "nullable=True" not in content.split("class DatasetMetadataCreateApi")[1].split("class")[0]
# Service API create
service_create_file = "api/controllers/service_api/dataset/metadata.py"
if os.path.exists(service_create_file):
with open(service_create_file) as f:
content = f.read()
# Should contain nullable=False, not nullable=True
create_api_section = content.split("class DatasetMetadataCreateServiceApi")[1].split("class")[0]
assert "nullable=True" not in create_api_section
class TestMetadataValidationSummary:
"""Summary tests that demonstrate the complete validation architecture."""
def test_validation_layer_architecture(self):
"""Document and test the 4-layer validation architecture."""
# Layer 1: API Parameter Validation (Flask-RESTful reqparse)
# - Role: First line of defense, validates HTTP request parameters
# - Fixed: nullable=False ensures null values are rejected at API boundary
# Layer 2: Pydantic Model Validation
# - Role: Validates data structure and types before business logic
# - Working: Required fields without Optional[] reject None values
# Layer 3: Business Logic Validation
# - Role: Domain-specific validation (length checks, uniqueness, etc.)
# - Vulnerable: Direct len() calls crash on None values
# Layer 4: Database Constraints
# - Role: Final data integrity enforcement
# - Working: nullable=False prevents None values in database
# The bug was: Layer 1 allowed None, but Layers 2-4 expected non-None
# The fix: Make Layer 1 consistent with Layers 2-4
assert True # This test documents the architecture
if __name__ == "__main__":
pytest.main([__file__, "-v"])