refactor: extract common url validator for config_entity.py (#21934)

Signed-off-by: neatguycoding <15627489+NeatGuyCoding@users.noreply.github.com>
This commit is contained in:
NeatGuyCoding 2025-07-07 09:34:13 +08:00 committed by GitHub
parent 8288145ee4
commit ac69b8b191
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 576 additions and 65 deletions

View File

@ -2,6 +2,8 @@ from enum import StrEnum
from pydantic import BaseModel, ValidationInfo, field_validator
from core.ops.utils import validate_project_name, validate_url, validate_url_with_path
class TracingProviderEnum(StrEnum):
ARIZE = "arize"
@ -15,10 +17,36 @@ class TracingProviderEnum(StrEnum):
class BaseTracingConfig(BaseModel):
"""
Base model class for tracing
Base model class for tracing configurations
"""
...
@classmethod
def validate_endpoint_url(cls, v: str, default_url: str) -> str:
"""
Common endpoint URL validation logic
Args:
v: URL value to validate
default_url: Default URL to use if input is None or empty
Returns:
Validated and normalized URL
"""
return validate_url(v, default_url)
@classmethod
def validate_project_field(cls, v: str, default_name: str) -> str:
"""
Common project name validation logic
Args:
v: Project name to validate
default_name: Default name to use if input is None or empty
Returns:
Validated project name
"""
return validate_project_name(v, default_name)
class ArizeConfig(BaseTracingConfig):
@ -34,23 +62,12 @@ class ArizeConfig(BaseTracingConfig):
@field_validator("project")
@classmethod
def project_validator(cls, v, info: ValidationInfo):
if v is None or v == "":
v = "default"
return v
return cls.validate_project_field(v, "default")
@field_validator("endpoint")
@classmethod
def endpoint_validator(cls, v, info: ValidationInfo):
if v is None or v == "":
v = "https://otlp.arize.com"
if not v.startswith(("https://", "http://")):
raise ValueError("endpoint must start with https:// or http://")
if "/" in v[8:]:
parts = v.split("/")
v = parts[0] + "//" + parts[2]
return v
return cls.validate_endpoint_url(v, "https://otlp.arize.com")
class PhoenixConfig(BaseTracingConfig):
@ -65,23 +82,12 @@ class PhoenixConfig(BaseTracingConfig):
@field_validator("project")
@classmethod
def project_validator(cls, v, info: ValidationInfo):
if v is None or v == "":
v = "default"
return v
return cls.validate_project_field(v, "default")
@field_validator("endpoint")
@classmethod
def endpoint_validator(cls, v, info: ValidationInfo):
if v is None or v == "":
v = "https://app.phoenix.arize.com"
if not v.startswith(("https://", "http://")):
raise ValueError("endpoint must start with https:// or http://")
if "/" in v[8:]:
parts = v.split("/")
v = parts[0] + "//" + parts[2]
return v
return cls.validate_endpoint_url(v, "https://app.phoenix.arize.com")
class LangfuseConfig(BaseTracingConfig):
@ -95,13 +101,8 @@ class LangfuseConfig(BaseTracingConfig):
@field_validator("host")
@classmethod
def set_value(cls, v, info: ValidationInfo):
if v is None or v == "":
v = "https://api.langfuse.com"
if not v.startswith("https://") and not v.startswith("http://"):
raise ValueError("host must start with https:// or http://")
return v
def host_validator(cls, v, info: ValidationInfo):
return cls.validate_endpoint_url(v, "https://api.langfuse.com")
class LangSmithConfig(BaseTracingConfig):
@ -115,13 +116,9 @@ class LangSmithConfig(BaseTracingConfig):
@field_validator("endpoint")
@classmethod
def set_value(cls, v, info: ValidationInfo):
if v is None or v == "":
v = "https://api.smith.langchain.com"
if not v.startswith("https://"):
raise ValueError("endpoint must start with https://")
return v
def endpoint_validator(cls, v, info: ValidationInfo):
# LangSmith only allows HTTPS
return validate_url(v, "https://api.smith.langchain.com", allowed_schemes=("https",))
class OpikConfig(BaseTracingConfig):
@ -137,22 +134,12 @@ class OpikConfig(BaseTracingConfig):
@field_validator("project")
@classmethod
def project_validator(cls, v, info: ValidationInfo):
if v is None or v == "":
v = "Default Project"
return v
return cls.validate_project_field(v, "Default Project")
@field_validator("url")
@classmethod
def url_validator(cls, v, info: ValidationInfo):
if v is None or v == "":
v = "https://www.comet.com/opik/api/"
if not v.startswith(("https://", "http://")):
raise ValueError("url must start with https:// or http://")
if not v.endswith("/api/"):
raise ValueError("url should ends with /api/")
return v
return validate_url_with_path(v, "https://www.comet.com/opik/api/", required_suffix="/api/")
class WeaveConfig(BaseTracingConfig):
@ -168,20 +155,15 @@ class WeaveConfig(BaseTracingConfig):
@field_validator("endpoint")
@classmethod
def set_value(cls, v, info: ValidationInfo):
if v is None or v == "":
v = "https://trace.wandb.ai"
if not v.startswith("https://"):
raise ValueError("endpoint must start with https://")
return v
def endpoint_validator(cls, v, info: ValidationInfo):
# Weave only allows HTTPS for endpoint
return validate_url(v, "https://trace.wandb.ai", allowed_schemes=("https",))
@field_validator("host")
@classmethod
def validate_host(cls, v, info: ValidationInfo):
if v is not None and v != "":
if not v.startswith(("https://", "http://")):
raise ValueError("host must start with https:// or http://")
def host_validator(cls, v, info: ValidationInfo):
if v is not None and v.strip() != "":
return validate_url(v, v, allowed_schemes=("https", "http"))
return v

View File

@ -1,6 +1,7 @@
from contextlib import contextmanager
from datetime import datetime
from typing import Optional, Union
from urllib.parse import urlparse
from extensions.ext_database import db
from models.model import Message
@ -60,3 +61,83 @@ def generate_dotted_order(
return current_segment
return f"{parent_dotted_order}.{current_segment}"
def validate_url(url: str, default_url: str, allowed_schemes: tuple = ("https", "http")) -> str:
"""
Validate and normalize URL with proper error handling
Args:
url: The URL to validate
default_url: Default URL to use if input is None or empty
allowed_schemes: Tuple of allowed URL schemes (default: https, http)
Returns:
Normalized URL string
Raises:
ValueError: If URL format is invalid or scheme not allowed
"""
if not url or url.strip() == "":
return default_url
# Parse URL to validate format
parsed = urlparse(url)
# Check if scheme is allowed
if parsed.scheme not in allowed_schemes:
raise ValueError(f"URL scheme must be one of: {', '.join(allowed_schemes)}")
# Reconstruct URL with only scheme, netloc (removing path, query, fragment)
normalized_url = f"{parsed.scheme}://{parsed.netloc}"
return normalized_url
def validate_url_with_path(url: str, default_url: str, required_suffix: str | None = None) -> str:
"""
Validate URL that may include path components
Args:
url: The URL to validate
default_url: Default URL to use if input is None or empty
required_suffix: Optional suffix that URL must end with
Returns:
Validated URL string
Raises:
ValueError: If URL format is invalid or doesn't match required suffix
"""
if not url or url.strip() == "":
return default_url
# Parse URL to validate format
parsed = urlparse(url)
# Check if scheme is allowed
if parsed.scheme not in ("https", "http"):
raise ValueError("URL must start with https:// or http://")
# Check required suffix if specified
if required_suffix and not url.endswith(required_suffix):
raise ValueError(f"URL should end with {required_suffix}")
return url
def validate_project_name(project: str, default_name: str) -> str:
"""
Validate and normalize project name
Args:
project: Project name to validate
default_name: Default name to use if input is None or empty
Returns:
Normalized project name
"""
if not project or project.strip() == "":
return default_name
return project.strip()

View File

@ -0,0 +1 @@
# Unit tests for core ops module

View File

@ -0,0 +1,309 @@
import pytest
from pydantic import ValidationError
from core.ops.entities.config_entity import (
ArizeConfig,
LangfuseConfig,
LangSmithConfig,
OpikConfig,
PhoenixConfig,
TracingProviderEnum,
WeaveConfig,
)
class TestTracingProviderEnum:
"""Test cases for TracingProviderEnum"""
def test_enum_values(self):
"""Test that all expected enum values are present"""
assert TracingProviderEnum.ARIZE == "arize"
assert TracingProviderEnum.PHOENIX == "phoenix"
assert TracingProviderEnum.LANGFUSE == "langfuse"
assert TracingProviderEnum.LANGSMITH == "langsmith"
assert TracingProviderEnum.OPIK == "opik"
assert TracingProviderEnum.WEAVE == "weave"
class TestArizeConfig:
"""Test cases for ArizeConfig"""
def test_valid_config(self):
"""Test valid Arize configuration"""
config = ArizeConfig(
api_key="test_key", space_id="test_space", project="test_project", endpoint="https://custom.arize.com"
)
assert config.api_key == "test_key"
assert config.space_id == "test_space"
assert config.project == "test_project"
assert config.endpoint == "https://custom.arize.com"
def test_default_values(self):
"""Test default values are set correctly"""
config = ArizeConfig()
assert config.api_key is None
assert config.space_id is None
assert config.project is None
assert config.endpoint == "https://otlp.arize.com"
def test_project_validation_empty(self):
"""Test project validation with empty value"""
config = ArizeConfig(project="")
assert config.project == "default"
def test_project_validation_none(self):
"""Test project validation with None value"""
config = ArizeConfig(project=None)
assert config.project == "default"
def test_endpoint_validation_empty(self):
"""Test endpoint validation with empty value"""
config = ArizeConfig(endpoint="")
assert config.endpoint == "https://otlp.arize.com"
def test_endpoint_validation_with_path(self):
"""Test endpoint validation normalizes URL by removing path"""
config = ArizeConfig(endpoint="https://custom.arize.com/api/v1")
assert config.endpoint == "https://custom.arize.com"
def test_endpoint_validation_invalid_scheme(self):
"""Test endpoint validation rejects invalid schemes"""
with pytest.raises(ValidationError, match="URL scheme must be one of"):
ArizeConfig(endpoint="ftp://invalid.com")
def test_endpoint_validation_no_scheme(self):
"""Test endpoint validation rejects URLs without scheme"""
with pytest.raises(ValidationError, match="URL scheme must be one of"):
ArizeConfig(endpoint="invalid.com")
class TestPhoenixConfig:
"""Test cases for PhoenixConfig"""
def test_valid_config(self):
"""Test valid Phoenix configuration"""
config = PhoenixConfig(api_key="test_key", project="test_project", endpoint="https://custom.phoenix.com")
assert config.api_key == "test_key"
assert config.project == "test_project"
assert config.endpoint == "https://custom.phoenix.com"
def test_default_values(self):
"""Test default values are set correctly"""
config = PhoenixConfig()
assert config.api_key is None
assert config.project is None
assert config.endpoint == "https://app.phoenix.arize.com"
def test_project_validation_empty(self):
"""Test project validation with empty value"""
config = PhoenixConfig(project="")
assert config.project == "default"
def test_endpoint_validation_with_path(self):
"""Test endpoint validation normalizes URL by removing path"""
config = PhoenixConfig(endpoint="https://custom.phoenix.com/api/v1")
assert config.endpoint == "https://custom.phoenix.com"
class TestLangfuseConfig:
"""Test cases for LangfuseConfig"""
def test_valid_config(self):
"""Test valid Langfuse configuration"""
config = LangfuseConfig(public_key="public_key", secret_key="secret_key", host="https://custom.langfuse.com")
assert config.public_key == "public_key"
assert config.secret_key == "secret_key"
assert config.host == "https://custom.langfuse.com"
def test_default_values(self):
"""Test default values are set correctly"""
config = LangfuseConfig(public_key="public", secret_key="secret")
assert config.host == "https://api.langfuse.com"
def test_missing_required_fields(self):
"""Test that required fields are enforced"""
with pytest.raises(ValidationError):
LangfuseConfig()
with pytest.raises(ValidationError):
LangfuseConfig(public_key="public")
with pytest.raises(ValidationError):
LangfuseConfig(secret_key="secret")
def test_host_validation_empty(self):
"""Test host validation with empty value"""
config = LangfuseConfig(public_key="public", secret_key="secret", host="")
assert config.host == "https://api.langfuse.com"
class TestLangSmithConfig:
"""Test cases for LangSmithConfig"""
def test_valid_config(self):
"""Test valid LangSmith configuration"""
config = LangSmithConfig(api_key="test_key", project="test_project", endpoint="https://custom.smith.com")
assert config.api_key == "test_key"
assert config.project == "test_project"
assert config.endpoint == "https://custom.smith.com"
def test_default_values(self):
"""Test default values are set correctly"""
config = LangSmithConfig(api_key="key", project="project")
assert config.endpoint == "https://api.smith.langchain.com"
def test_missing_required_fields(self):
"""Test that required fields are enforced"""
with pytest.raises(ValidationError):
LangSmithConfig()
with pytest.raises(ValidationError):
LangSmithConfig(api_key="key")
with pytest.raises(ValidationError):
LangSmithConfig(project="project")
def test_endpoint_validation_https_only(self):
"""Test endpoint validation only allows HTTPS"""
with pytest.raises(ValidationError, match="URL scheme must be one of"):
LangSmithConfig(api_key="key", project="project", endpoint="http://insecure.com")
class TestOpikConfig:
"""Test cases for OpikConfig"""
def test_valid_config(self):
"""Test valid Opik configuration"""
config = OpikConfig(
api_key="test_key",
project="test_project",
workspace="test_workspace",
url="https://custom.comet.com/opik/api/",
)
assert config.api_key == "test_key"
assert config.project == "test_project"
assert config.workspace == "test_workspace"
assert config.url == "https://custom.comet.com/opik/api/"
def test_default_values(self):
"""Test default values are set correctly"""
config = OpikConfig()
assert config.api_key is None
assert config.project is None
assert config.workspace is None
assert config.url == "https://www.comet.com/opik/api/"
def test_project_validation_empty(self):
"""Test project validation with empty value"""
config = OpikConfig(project="")
assert config.project == "Default Project"
def test_url_validation_empty(self):
"""Test URL validation with empty value"""
config = OpikConfig(url="")
assert config.url == "https://www.comet.com/opik/api/"
def test_url_validation_missing_suffix(self):
"""Test URL validation requires /api/ suffix"""
with pytest.raises(ValidationError, match="URL should end with /api/"):
OpikConfig(url="https://custom.comet.com/opik/")
def test_url_validation_invalid_scheme(self):
"""Test URL validation rejects invalid schemes"""
with pytest.raises(ValidationError, match="URL must start with https:// or http://"):
OpikConfig(url="ftp://custom.comet.com/opik/api/")
class TestWeaveConfig:
"""Test cases for WeaveConfig"""
def test_valid_config(self):
"""Test valid Weave configuration"""
config = WeaveConfig(
api_key="test_key",
entity="test_entity",
project="test_project",
endpoint="https://custom.wandb.ai",
host="https://custom.host.com",
)
assert config.api_key == "test_key"
assert config.entity == "test_entity"
assert config.project == "test_project"
assert config.endpoint == "https://custom.wandb.ai"
assert config.host == "https://custom.host.com"
def test_default_values(self):
"""Test default values are set correctly"""
config = WeaveConfig(api_key="key", project="project")
assert config.entity is None
assert config.endpoint == "https://trace.wandb.ai"
assert config.host is None
def test_missing_required_fields(self):
"""Test that required fields are enforced"""
with pytest.raises(ValidationError):
WeaveConfig()
with pytest.raises(ValidationError):
WeaveConfig(api_key="key")
with pytest.raises(ValidationError):
WeaveConfig(project="project")
def test_endpoint_validation_https_only(self):
"""Test endpoint validation only allows HTTPS"""
with pytest.raises(ValidationError, match="URL scheme must be one of"):
WeaveConfig(api_key="key", project="project", endpoint="http://insecure.wandb.ai")
def test_host_validation_optional(self):
"""Test host validation is optional but validates when provided"""
config = WeaveConfig(api_key="key", project="project", host=None)
assert config.host is None
config = WeaveConfig(api_key="key", project="project", host="")
assert config.host == ""
config = WeaveConfig(api_key="key", project="project", host="https://valid.host.com")
assert config.host == "https://valid.host.com"
def test_host_validation_invalid_scheme(self):
"""Test host validation rejects invalid schemes when provided"""
with pytest.raises(ValidationError, match="URL scheme must be one of"):
WeaveConfig(api_key="key", project="project", host="ftp://invalid.host.com")
class TestConfigIntegration:
"""Integration tests for configuration classes"""
def test_all_configs_can_be_instantiated(self):
"""Test that all config classes can be instantiated with valid data"""
configs = [
ArizeConfig(api_key="key"),
PhoenixConfig(api_key="key"),
LangfuseConfig(public_key="public", secret_key="secret"),
LangSmithConfig(api_key="key", project="project"),
OpikConfig(api_key="key"),
WeaveConfig(api_key="key", project="project"),
]
for config in configs:
assert config is not None
def test_url_normalization_consistency(self):
"""Test that URL normalization works consistently across configs"""
# Test that paths are removed from endpoints
arize_config = ArizeConfig(endpoint="https://arize.com/api/v1/test")
phoenix_config = PhoenixConfig(endpoint="https://phoenix.com/api/v2/")
assert arize_config.endpoint == "https://arize.com"
assert phoenix_config.endpoint == "https://phoenix.com"
def test_project_default_values(self):
"""Test that project default values are set correctly"""
arize_config = ArizeConfig(project="")
phoenix_config = PhoenixConfig(project="")
opik_config = OpikConfig(project="")
assert arize_config.project == "default"
assert phoenix_config.project == "default"
assert opik_config.project == "Default Project"

View File

@ -0,0 +1,138 @@
import pytest
from core.ops.utils import validate_project_name, validate_url, validate_url_with_path
class TestValidateUrl:
"""Test cases for validate_url function"""
def test_valid_https_url(self):
"""Test valid HTTPS URL"""
result = validate_url("https://example.com", "https://default.com")
assert result == "https://example.com"
def test_valid_http_url(self):
"""Test valid HTTP URL"""
result = validate_url("http://example.com", "https://default.com")
assert result == "http://example.com"
def test_url_with_path_removed(self):
"""Test that URL path is removed during normalization"""
result = validate_url("https://example.com/api/v1/test", "https://default.com")
assert result == "https://example.com"
def test_url_with_query_removed(self):
"""Test that URL query parameters are removed"""
result = validate_url("https://example.com?param=value", "https://default.com")
assert result == "https://example.com"
def test_url_with_fragment_removed(self):
"""Test that URL fragments are removed"""
result = validate_url("https://example.com#section", "https://default.com")
assert result == "https://example.com"
def test_empty_url_returns_default(self):
"""Test empty URL returns default"""
result = validate_url("", "https://default.com")
assert result == "https://default.com"
def test_none_url_returns_default(self):
"""Test None URL returns default"""
result = validate_url(None, "https://default.com")
assert result == "https://default.com"
def test_whitespace_url_returns_default(self):
"""Test whitespace URL returns default"""
result = validate_url(" ", "https://default.com")
assert result == "https://default.com"
def test_invalid_scheme_raises_error(self):
"""Test invalid scheme raises ValueError"""
with pytest.raises(ValueError, match="URL scheme must be one of"):
validate_url("ftp://example.com", "https://default.com")
def test_no_scheme_raises_error(self):
"""Test URL without scheme raises ValueError"""
with pytest.raises(ValueError, match="URL scheme must be one of"):
validate_url("example.com", "https://default.com")
def test_custom_allowed_schemes(self):
"""Test custom allowed schemes"""
result = validate_url("https://example.com", "https://default.com", allowed_schemes=("https",))
assert result == "https://example.com"
with pytest.raises(ValueError, match="URL scheme must be one of"):
validate_url("http://example.com", "https://default.com", allowed_schemes=("https",))
class TestValidateUrlWithPath:
"""Test cases for validate_url_with_path function"""
def test_valid_url_with_path(self):
"""Test valid URL with path"""
result = validate_url_with_path("https://example.com/api/v1", "https://default.com")
assert result == "https://example.com/api/v1"
def test_valid_url_with_required_suffix(self):
"""Test valid URL with required suffix"""
result = validate_url_with_path("https://example.com/api/", "https://default.com", required_suffix="/api/")
assert result == "https://example.com/api/"
def test_url_without_required_suffix_raises_error(self):
"""Test URL without required suffix raises error"""
with pytest.raises(ValueError, match="URL should end with /api/"):
validate_url_with_path("https://example.com/api", "https://default.com", required_suffix="/api/")
def test_empty_url_returns_default(self):
"""Test empty URL returns default"""
result = validate_url_with_path("", "https://default.com")
assert result == "https://default.com"
def test_none_url_returns_default(self):
"""Test None URL returns default"""
result = validate_url_with_path(None, "https://default.com")
assert result == "https://default.com"
def test_invalid_scheme_raises_error(self):
"""Test invalid scheme raises ValueError"""
with pytest.raises(ValueError, match="URL must start with https:// or http://"):
validate_url_with_path("ftp://example.com", "https://default.com")
def test_no_scheme_raises_error(self):
"""Test URL without scheme raises ValueError"""
with pytest.raises(ValueError, match="URL must start with https:// or http://"):
validate_url_with_path("example.com", "https://default.com")
class TestValidateProjectName:
"""Test cases for validate_project_name function"""
def test_valid_project_name(self):
"""Test valid project name"""
result = validate_project_name("my-project", "default")
assert result == "my-project"
def test_empty_project_name_returns_default(self):
"""Test empty project name returns default"""
result = validate_project_name("", "default")
assert result == "default"
def test_none_project_name_returns_default(self):
"""Test None project name returns default"""
result = validate_project_name(None, "default")
assert result == "default"
def test_whitespace_project_name_returns_default(self):
"""Test whitespace project name returns default"""
result = validate_project_name(" ", "default")
assert result == "default"
def test_project_name_with_whitespace_trimmed(self):
"""Test project name with whitespace is trimmed"""
result = validate_project_name(" my-project ", "default")
assert result == "my-project"
def test_custom_default_name(self):
"""Test custom default name"""
result = validate_project_name("", "Custom Default")
assert result == "Custom Default"