refactor: extract common url validator for config_entity.py (#21934)

Signed-off-by: neatguycoding <15627489+NeatGuyCoding@users.noreply.github.com>
This commit is contained in:
NeatGuyCoding 2025-07-07 09:34:13 +08:00 committed by GitHub
parent 8288145ee4
commit ac69b8b191
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 576 additions and 65 deletions

View File

@ -2,6 +2,8 @@ from enum import StrEnum
from pydantic import BaseModel, ValidationInfo, field_validator from pydantic import BaseModel, ValidationInfo, field_validator
from core.ops.utils import validate_project_name, validate_url, validate_url_with_path
class TracingProviderEnum(StrEnum): class TracingProviderEnum(StrEnum):
ARIZE = "arize" ARIZE = "arize"
@ -15,10 +17,36 @@ class TracingProviderEnum(StrEnum):
class BaseTracingConfig(BaseModel): class BaseTracingConfig(BaseModel):
""" """
Base model class for tracing Base model class for tracing configurations
""" """
... @classmethod
def validate_endpoint_url(cls, v: str, default_url: str) -> str:
"""
Common endpoint URL validation logic
Args:
v: URL value to validate
default_url: Default URL to use if input is None or empty
Returns:
Validated and normalized URL
"""
return validate_url(v, default_url)
@classmethod
def validate_project_field(cls, v: str, default_name: str) -> str:
"""
Common project name validation logic
Args:
v: Project name to validate
default_name: Default name to use if input is None or empty
Returns:
Validated project name
"""
return validate_project_name(v, default_name)
class ArizeConfig(BaseTracingConfig): class ArizeConfig(BaseTracingConfig):
@ -34,23 +62,12 @@ class ArizeConfig(BaseTracingConfig):
@field_validator("project") @field_validator("project")
@classmethod @classmethod
def project_validator(cls, v, info: ValidationInfo): def project_validator(cls, v, info: ValidationInfo):
if v is None or v == "": return cls.validate_project_field(v, "default")
v = "default"
return v
@field_validator("endpoint") @field_validator("endpoint")
@classmethod @classmethod
def endpoint_validator(cls, v, info: ValidationInfo): def endpoint_validator(cls, v, info: ValidationInfo):
if v is None or v == "": return cls.validate_endpoint_url(v, "https://otlp.arize.com")
v = "https://otlp.arize.com"
if not v.startswith(("https://", "http://")):
raise ValueError("endpoint must start with https:// or http://")
if "/" in v[8:]:
parts = v.split("/")
v = parts[0] + "//" + parts[2]
return v
class PhoenixConfig(BaseTracingConfig): class PhoenixConfig(BaseTracingConfig):
@ -65,23 +82,12 @@ class PhoenixConfig(BaseTracingConfig):
@field_validator("project") @field_validator("project")
@classmethod @classmethod
def project_validator(cls, v, info: ValidationInfo): def project_validator(cls, v, info: ValidationInfo):
if v is None or v == "": return cls.validate_project_field(v, "default")
v = "default"
return v
@field_validator("endpoint") @field_validator("endpoint")
@classmethod @classmethod
def endpoint_validator(cls, v, info: ValidationInfo): def endpoint_validator(cls, v, info: ValidationInfo):
if v is None or v == "": return cls.validate_endpoint_url(v, "https://app.phoenix.arize.com")
v = "https://app.phoenix.arize.com"
if not v.startswith(("https://", "http://")):
raise ValueError("endpoint must start with https:// or http://")
if "/" in v[8:]:
parts = v.split("/")
v = parts[0] + "//" + parts[2]
return v
class LangfuseConfig(BaseTracingConfig): class LangfuseConfig(BaseTracingConfig):
@ -95,13 +101,8 @@ class LangfuseConfig(BaseTracingConfig):
@field_validator("host") @field_validator("host")
@classmethod @classmethod
def set_value(cls, v, info: ValidationInfo): def host_validator(cls, v, info: ValidationInfo):
if v is None or v == "": return cls.validate_endpoint_url(v, "https://api.langfuse.com")
v = "https://api.langfuse.com"
if not v.startswith("https://") and not v.startswith("http://"):
raise ValueError("host must start with https:// or http://")
return v
class LangSmithConfig(BaseTracingConfig): class LangSmithConfig(BaseTracingConfig):
@ -115,13 +116,9 @@ class LangSmithConfig(BaseTracingConfig):
@field_validator("endpoint") @field_validator("endpoint")
@classmethod @classmethod
def set_value(cls, v, info: ValidationInfo): def endpoint_validator(cls, v, info: ValidationInfo):
if v is None or v == "": # LangSmith only allows HTTPS
v = "https://api.smith.langchain.com" return validate_url(v, "https://api.smith.langchain.com", allowed_schemes=("https",))
if not v.startswith("https://"):
raise ValueError("endpoint must start with https://")
return v
class OpikConfig(BaseTracingConfig): class OpikConfig(BaseTracingConfig):
@ -137,22 +134,12 @@ class OpikConfig(BaseTracingConfig):
@field_validator("project") @field_validator("project")
@classmethod @classmethod
def project_validator(cls, v, info: ValidationInfo): def project_validator(cls, v, info: ValidationInfo):
if v is None or v == "": return cls.validate_project_field(v, "Default Project")
v = "Default Project"
return v
@field_validator("url") @field_validator("url")
@classmethod @classmethod
def url_validator(cls, v, info: ValidationInfo): def url_validator(cls, v, info: ValidationInfo):
if v is None or v == "": return validate_url_with_path(v, "https://www.comet.com/opik/api/", required_suffix="/api/")
v = "https://www.comet.com/opik/api/"
if not v.startswith(("https://", "http://")):
raise ValueError("url must start with https:// or http://")
if not v.endswith("/api/"):
raise ValueError("url should ends with /api/")
return v
class WeaveConfig(BaseTracingConfig): class WeaveConfig(BaseTracingConfig):
@ -168,20 +155,15 @@ class WeaveConfig(BaseTracingConfig):
@field_validator("endpoint") @field_validator("endpoint")
@classmethod @classmethod
def set_value(cls, v, info: ValidationInfo): def endpoint_validator(cls, v, info: ValidationInfo):
if v is None or v == "": # Weave only allows HTTPS for endpoint
v = "https://trace.wandb.ai" return validate_url(v, "https://trace.wandb.ai", allowed_schemes=("https",))
if not v.startswith("https://"):
raise ValueError("endpoint must start with https://")
return v
@field_validator("host") @field_validator("host")
@classmethod @classmethod
def validate_host(cls, v, info: ValidationInfo): def host_validator(cls, v, info: ValidationInfo):
if v is not None and v != "": if v is not None and v.strip() != "":
if not v.startswith(("https://", "http://")): return validate_url(v, v, allowed_schemes=("https", "http"))
raise ValueError("host must start with https:// or http://")
return v return v

View File

@ -1,6 +1,7 @@
from contextlib import contextmanager from contextlib import contextmanager
from datetime import datetime from datetime import datetime
from typing import Optional, Union from typing import Optional, Union
from urllib.parse import urlparse
from extensions.ext_database import db from extensions.ext_database import db
from models.model import Message from models.model import Message
@ -60,3 +61,83 @@ def generate_dotted_order(
return current_segment return current_segment
return f"{parent_dotted_order}.{current_segment}" return f"{parent_dotted_order}.{current_segment}"
def validate_url(url: str, default_url: str, allowed_schemes: tuple = ("https", "http")) -> str:
"""
Validate and normalize URL with proper error handling
Args:
url: The URL to validate
default_url: Default URL to use if input is None or empty
allowed_schemes: Tuple of allowed URL schemes (default: https, http)
Returns:
Normalized URL string
Raises:
ValueError: If URL format is invalid or scheme not allowed
"""
if not url or url.strip() == "":
return default_url
# Parse URL to validate format
parsed = urlparse(url)
# Check if scheme is allowed
if parsed.scheme not in allowed_schemes:
raise ValueError(f"URL scheme must be one of: {', '.join(allowed_schemes)}")
# Reconstruct URL with only scheme, netloc (removing path, query, fragment)
normalized_url = f"{parsed.scheme}://{parsed.netloc}"
return normalized_url
def validate_url_with_path(url: str, default_url: str, required_suffix: str | None = None) -> str:
"""
Validate URL that may include path components
Args:
url: The URL to validate
default_url: Default URL to use if input is None or empty
required_suffix: Optional suffix that URL must end with
Returns:
Validated URL string
Raises:
ValueError: If URL format is invalid or doesn't match required suffix
"""
if not url or url.strip() == "":
return default_url
# Parse URL to validate format
parsed = urlparse(url)
# Check if scheme is allowed
if parsed.scheme not in ("https", "http"):
raise ValueError("URL must start with https:// or http://")
# Check required suffix if specified
if required_suffix and not url.endswith(required_suffix):
raise ValueError(f"URL should end with {required_suffix}")
return url
def validate_project_name(project: str, default_name: str) -> str:
"""
Validate and normalize project name
Args:
project: Project name to validate
default_name: Default name to use if input is None or empty
Returns:
Normalized project name
"""
if not project or project.strip() == "":
return default_name
return project.strip()

View File

@ -0,0 +1 @@
# Unit tests for core ops module

View File

@ -0,0 +1,309 @@
import pytest
from pydantic import ValidationError
from core.ops.entities.config_entity import (
ArizeConfig,
LangfuseConfig,
LangSmithConfig,
OpikConfig,
PhoenixConfig,
TracingProviderEnum,
WeaveConfig,
)
class TestTracingProviderEnum:
"""Test cases for TracingProviderEnum"""
def test_enum_values(self):
"""Test that all expected enum values are present"""
assert TracingProviderEnum.ARIZE == "arize"
assert TracingProviderEnum.PHOENIX == "phoenix"
assert TracingProviderEnum.LANGFUSE == "langfuse"
assert TracingProviderEnum.LANGSMITH == "langsmith"
assert TracingProviderEnum.OPIK == "opik"
assert TracingProviderEnum.WEAVE == "weave"
class TestArizeConfig:
"""Test cases for ArizeConfig"""
def test_valid_config(self):
"""Test valid Arize configuration"""
config = ArizeConfig(
api_key="test_key", space_id="test_space", project="test_project", endpoint="https://custom.arize.com"
)
assert config.api_key == "test_key"
assert config.space_id == "test_space"
assert config.project == "test_project"
assert config.endpoint == "https://custom.arize.com"
def test_default_values(self):
"""Test default values are set correctly"""
config = ArizeConfig()
assert config.api_key is None
assert config.space_id is None
assert config.project is None
assert config.endpoint == "https://otlp.arize.com"
def test_project_validation_empty(self):
"""Test project validation with empty value"""
config = ArizeConfig(project="")
assert config.project == "default"
def test_project_validation_none(self):
"""Test project validation with None value"""
config = ArizeConfig(project=None)
assert config.project == "default"
def test_endpoint_validation_empty(self):
"""Test endpoint validation with empty value"""
config = ArizeConfig(endpoint="")
assert config.endpoint == "https://otlp.arize.com"
def test_endpoint_validation_with_path(self):
"""Test endpoint validation normalizes URL by removing path"""
config = ArizeConfig(endpoint="https://custom.arize.com/api/v1")
assert config.endpoint == "https://custom.arize.com"
def test_endpoint_validation_invalid_scheme(self):
"""Test endpoint validation rejects invalid schemes"""
with pytest.raises(ValidationError, match="URL scheme must be one of"):
ArizeConfig(endpoint="ftp://invalid.com")
def test_endpoint_validation_no_scheme(self):
"""Test endpoint validation rejects URLs without scheme"""
with pytest.raises(ValidationError, match="URL scheme must be one of"):
ArizeConfig(endpoint="invalid.com")
class TestPhoenixConfig:
"""Test cases for PhoenixConfig"""
def test_valid_config(self):
"""Test valid Phoenix configuration"""
config = PhoenixConfig(api_key="test_key", project="test_project", endpoint="https://custom.phoenix.com")
assert config.api_key == "test_key"
assert config.project == "test_project"
assert config.endpoint == "https://custom.phoenix.com"
def test_default_values(self):
"""Test default values are set correctly"""
config = PhoenixConfig()
assert config.api_key is None
assert config.project is None
assert config.endpoint == "https://app.phoenix.arize.com"
def test_project_validation_empty(self):
"""Test project validation with empty value"""
config = PhoenixConfig(project="")
assert config.project == "default"
def test_endpoint_validation_with_path(self):
"""Test endpoint validation normalizes URL by removing path"""
config = PhoenixConfig(endpoint="https://custom.phoenix.com/api/v1")
assert config.endpoint == "https://custom.phoenix.com"
class TestLangfuseConfig:
"""Test cases for LangfuseConfig"""
def test_valid_config(self):
"""Test valid Langfuse configuration"""
config = LangfuseConfig(public_key="public_key", secret_key="secret_key", host="https://custom.langfuse.com")
assert config.public_key == "public_key"
assert config.secret_key == "secret_key"
assert config.host == "https://custom.langfuse.com"
def test_default_values(self):
"""Test default values are set correctly"""
config = LangfuseConfig(public_key="public", secret_key="secret")
assert config.host == "https://api.langfuse.com"
def test_missing_required_fields(self):
"""Test that required fields are enforced"""
with pytest.raises(ValidationError):
LangfuseConfig()
with pytest.raises(ValidationError):
LangfuseConfig(public_key="public")
with pytest.raises(ValidationError):
LangfuseConfig(secret_key="secret")
def test_host_validation_empty(self):
"""Test host validation with empty value"""
config = LangfuseConfig(public_key="public", secret_key="secret", host="")
assert config.host == "https://api.langfuse.com"
class TestLangSmithConfig:
"""Test cases for LangSmithConfig"""
def test_valid_config(self):
"""Test valid LangSmith configuration"""
config = LangSmithConfig(api_key="test_key", project="test_project", endpoint="https://custom.smith.com")
assert config.api_key == "test_key"
assert config.project == "test_project"
assert config.endpoint == "https://custom.smith.com"
def test_default_values(self):
"""Test default values are set correctly"""
config = LangSmithConfig(api_key="key", project="project")
assert config.endpoint == "https://api.smith.langchain.com"
def test_missing_required_fields(self):
"""Test that required fields are enforced"""
with pytest.raises(ValidationError):
LangSmithConfig()
with pytest.raises(ValidationError):
LangSmithConfig(api_key="key")
with pytest.raises(ValidationError):
LangSmithConfig(project="project")
def test_endpoint_validation_https_only(self):
"""Test endpoint validation only allows HTTPS"""
with pytest.raises(ValidationError, match="URL scheme must be one of"):
LangSmithConfig(api_key="key", project="project", endpoint="http://insecure.com")
class TestOpikConfig:
"""Test cases for OpikConfig"""
def test_valid_config(self):
"""Test valid Opik configuration"""
config = OpikConfig(
api_key="test_key",
project="test_project",
workspace="test_workspace",
url="https://custom.comet.com/opik/api/",
)
assert config.api_key == "test_key"
assert config.project == "test_project"
assert config.workspace == "test_workspace"
assert config.url == "https://custom.comet.com/opik/api/"
def test_default_values(self):
"""Test default values are set correctly"""
config = OpikConfig()
assert config.api_key is None
assert config.project is None
assert config.workspace is None
assert config.url == "https://www.comet.com/opik/api/"
def test_project_validation_empty(self):
"""Test project validation with empty value"""
config = OpikConfig(project="")
assert config.project == "Default Project"
def test_url_validation_empty(self):
"""Test URL validation with empty value"""
config = OpikConfig(url="")
assert config.url == "https://www.comet.com/opik/api/"
def test_url_validation_missing_suffix(self):
"""Test URL validation requires /api/ suffix"""
with pytest.raises(ValidationError, match="URL should end with /api/"):
OpikConfig(url="https://custom.comet.com/opik/")
def test_url_validation_invalid_scheme(self):
"""Test URL validation rejects invalid schemes"""
with pytest.raises(ValidationError, match="URL must start with https:// or http://"):
OpikConfig(url="ftp://custom.comet.com/opik/api/")
class TestWeaveConfig:
"""Test cases for WeaveConfig"""
def test_valid_config(self):
"""Test valid Weave configuration"""
config = WeaveConfig(
api_key="test_key",
entity="test_entity",
project="test_project",
endpoint="https://custom.wandb.ai",
host="https://custom.host.com",
)
assert config.api_key == "test_key"
assert config.entity == "test_entity"
assert config.project == "test_project"
assert config.endpoint == "https://custom.wandb.ai"
assert config.host == "https://custom.host.com"
def test_default_values(self):
"""Test default values are set correctly"""
config = WeaveConfig(api_key="key", project="project")
assert config.entity is None
assert config.endpoint == "https://trace.wandb.ai"
assert config.host is None
def test_missing_required_fields(self):
"""Test that required fields are enforced"""
with pytest.raises(ValidationError):
WeaveConfig()
with pytest.raises(ValidationError):
WeaveConfig(api_key="key")
with pytest.raises(ValidationError):
WeaveConfig(project="project")
def test_endpoint_validation_https_only(self):
"""Test endpoint validation only allows HTTPS"""
with pytest.raises(ValidationError, match="URL scheme must be one of"):
WeaveConfig(api_key="key", project="project", endpoint="http://insecure.wandb.ai")
def test_host_validation_optional(self):
"""Test host validation is optional but validates when provided"""
config = WeaveConfig(api_key="key", project="project", host=None)
assert config.host is None
config = WeaveConfig(api_key="key", project="project", host="")
assert config.host == ""
config = WeaveConfig(api_key="key", project="project", host="https://valid.host.com")
assert config.host == "https://valid.host.com"
def test_host_validation_invalid_scheme(self):
"""Test host validation rejects invalid schemes when provided"""
with pytest.raises(ValidationError, match="URL scheme must be one of"):
WeaveConfig(api_key="key", project="project", host="ftp://invalid.host.com")
class TestConfigIntegration:
"""Integration tests for configuration classes"""
def test_all_configs_can_be_instantiated(self):
"""Test that all config classes can be instantiated with valid data"""
configs = [
ArizeConfig(api_key="key"),
PhoenixConfig(api_key="key"),
LangfuseConfig(public_key="public", secret_key="secret"),
LangSmithConfig(api_key="key", project="project"),
OpikConfig(api_key="key"),
WeaveConfig(api_key="key", project="project"),
]
for config in configs:
assert config is not None
def test_url_normalization_consistency(self):
"""Test that URL normalization works consistently across configs"""
# Test that paths are removed from endpoints
arize_config = ArizeConfig(endpoint="https://arize.com/api/v1/test")
phoenix_config = PhoenixConfig(endpoint="https://phoenix.com/api/v2/")
assert arize_config.endpoint == "https://arize.com"
assert phoenix_config.endpoint == "https://phoenix.com"
def test_project_default_values(self):
"""Test that project default values are set correctly"""
arize_config = ArizeConfig(project="")
phoenix_config = PhoenixConfig(project="")
opik_config = OpikConfig(project="")
assert arize_config.project == "default"
assert phoenix_config.project == "default"
assert opik_config.project == "Default Project"

View File

@ -0,0 +1,138 @@
import pytest
from core.ops.utils import validate_project_name, validate_url, validate_url_with_path
class TestValidateUrl:
"""Test cases for validate_url function"""
def test_valid_https_url(self):
"""Test valid HTTPS URL"""
result = validate_url("https://example.com", "https://default.com")
assert result == "https://example.com"
def test_valid_http_url(self):
"""Test valid HTTP URL"""
result = validate_url("http://example.com", "https://default.com")
assert result == "http://example.com"
def test_url_with_path_removed(self):
"""Test that URL path is removed during normalization"""
result = validate_url("https://example.com/api/v1/test", "https://default.com")
assert result == "https://example.com"
def test_url_with_query_removed(self):
"""Test that URL query parameters are removed"""
result = validate_url("https://example.com?param=value", "https://default.com")
assert result == "https://example.com"
def test_url_with_fragment_removed(self):
"""Test that URL fragments are removed"""
result = validate_url("https://example.com#section", "https://default.com")
assert result == "https://example.com"
def test_empty_url_returns_default(self):
"""Test empty URL returns default"""
result = validate_url("", "https://default.com")
assert result == "https://default.com"
def test_none_url_returns_default(self):
"""Test None URL returns default"""
result = validate_url(None, "https://default.com")
assert result == "https://default.com"
def test_whitespace_url_returns_default(self):
"""Test whitespace URL returns default"""
result = validate_url(" ", "https://default.com")
assert result == "https://default.com"
def test_invalid_scheme_raises_error(self):
"""Test invalid scheme raises ValueError"""
with pytest.raises(ValueError, match="URL scheme must be one of"):
validate_url("ftp://example.com", "https://default.com")
def test_no_scheme_raises_error(self):
"""Test URL without scheme raises ValueError"""
with pytest.raises(ValueError, match="URL scheme must be one of"):
validate_url("example.com", "https://default.com")
def test_custom_allowed_schemes(self):
"""Test custom allowed schemes"""
result = validate_url("https://example.com", "https://default.com", allowed_schemes=("https",))
assert result == "https://example.com"
with pytest.raises(ValueError, match="URL scheme must be one of"):
validate_url("http://example.com", "https://default.com", allowed_schemes=("https",))
class TestValidateUrlWithPath:
"""Test cases for validate_url_with_path function"""
def test_valid_url_with_path(self):
"""Test valid URL with path"""
result = validate_url_with_path("https://example.com/api/v1", "https://default.com")
assert result == "https://example.com/api/v1"
def test_valid_url_with_required_suffix(self):
"""Test valid URL with required suffix"""
result = validate_url_with_path("https://example.com/api/", "https://default.com", required_suffix="/api/")
assert result == "https://example.com/api/"
def test_url_without_required_suffix_raises_error(self):
"""Test URL without required suffix raises error"""
with pytest.raises(ValueError, match="URL should end with /api/"):
validate_url_with_path("https://example.com/api", "https://default.com", required_suffix="/api/")
def test_empty_url_returns_default(self):
"""Test empty URL returns default"""
result = validate_url_with_path("", "https://default.com")
assert result == "https://default.com"
def test_none_url_returns_default(self):
"""Test None URL returns default"""
result = validate_url_with_path(None, "https://default.com")
assert result == "https://default.com"
def test_invalid_scheme_raises_error(self):
"""Test invalid scheme raises ValueError"""
with pytest.raises(ValueError, match="URL must start with https:// or http://"):
validate_url_with_path("ftp://example.com", "https://default.com")
def test_no_scheme_raises_error(self):
"""Test URL without scheme raises ValueError"""
with pytest.raises(ValueError, match="URL must start with https:// or http://"):
validate_url_with_path("example.com", "https://default.com")
class TestValidateProjectName:
"""Test cases for validate_project_name function"""
def test_valid_project_name(self):
"""Test valid project name"""
result = validate_project_name("my-project", "default")
assert result == "my-project"
def test_empty_project_name_returns_default(self):
"""Test empty project name returns default"""
result = validate_project_name("", "default")
assert result == "default"
def test_none_project_name_returns_default(self):
"""Test None project name returns default"""
result = validate_project_name(None, "default")
assert result == "default"
def test_whitespace_project_name_returns_default(self):
"""Test whitespace project name returns default"""
result = validate_project_name(" ", "default")
assert result == "default"
def test_project_name_with_whitespace_trimmed(self):
"""Test project name with whitespace is trimmed"""
result = validate_project_name(" my-project ", "default")
assert result == "my-project"
def test_custom_default_name(self):
"""Test custom default name"""
result = validate_project_name("", "Custom Default")
assert result == "Custom Default"