2025-04-29 16:53:57 +08:00
#
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
2025-05-16 10:16:43 +08:00
from collections import Counter
2025-04-29 16:53:57 +08:00
from enum import auto
2025-05-06 17:38:06 +08:00
from typing import Annotated , Any
2025-05-20 09:58:26 +08:00
from uuid import UUID
2025-04-29 16:53:57 +08:00
2025-05-06 17:38:06 +08:00
from flask import Request
2025-05-20 09:58:26 +08:00
from pydantic import BaseModel , Field , StringConstraints , ValidationError , field_validator
2025-05-16 10:16:43 +08:00
from pydantic_core import PydanticCustomError
2025-04-29 16:53:57 +08:00
from strenum import StrEnum
2025-05-06 17:38:06 +08:00
from werkzeug . exceptions import BadRequest , UnsupportedMediaType
from api . constants import DATASET_NAME_LIMIT
2025-05-09 19:17:08 +08:00
def validate_and_parse_json_request ( request : Request , validator : type [ BaseModel ] , * , extras : dict [ str , Any ] | None = None , exclude_unset : bool = False ) - > tuple [ dict [ str , Any ] | None , str | None ] :
"""
Validates and parses JSON requests through a multi - stage validation pipeline .
2025-05-06 17:38:06 +08:00
2025-05-09 19:17:08 +08:00
Implements a four - stage validation process :
2025-05-06 17:38:06 +08:00
1. Content - Type verification ( must be application / json )
2. JSON syntax validation
3. Payload structure type checking
4. Pydantic model validation with error formatting
Args :
request ( Request ) : Flask request object containing HTTP payload
2025-05-09 19:17:08 +08:00
validator ( type [ BaseModel ] ) : Pydantic model class for data validation
extras ( dict [ str , Any ] | None ) : Additional fields to merge into payload
before validation . These fields will be removed from the final output
exclude_unset ( bool ) : Whether to exclude fields that have not been explicitly set
2025-05-06 17:38:06 +08:00
Returns :
tuple [ Dict [ str , Any ] | None , str | None ] :
- First element :
- Validated dictionary on success
- None on validation failure
- Second element :
- None on success
- Diagnostic error message on failure
Raises :
2025-05-09 19:17:08 +08:00
UnsupportedMediaType : When Content - Type header is not application / json
2025-05-06 17:38:06 +08:00
BadRequest : For structural JSON syntax errors
ValidationError : When payload violates Pydantic schema rules
Examples :
2025-05-09 19:17:08 +08:00
>> > validate_and_parse_json_request ( valid_request , DatasetSchema )
( { " name " : " Dataset1 " , " format " : " csv " } , None )
>> > validate_and_parse_json_request ( xml_request , DatasetSchema )
( None , " Unsupported content type: Expected application/json, got text/xml " )
>> > validate_and_parse_json_request ( bad_json_request , DatasetSchema )
( None , " Malformed JSON syntax: Missing commas/brackets or invalid encoding " )
Notes :
1. Validation Priority :
- Content - Type verification precedes JSON parsing
- Structural validation occurs before schema validation
2. Extra fields added via ` extras ` parameter are automatically removed
from the final output after validation
2025-05-06 17:38:06 +08:00
"""
try :
payload = request . get_json ( ) or { }
except UnsupportedMediaType :
return None , f " Unsupported content type: Expected application/json, got { request . content_type } "
except BadRequest :
return None , " Malformed JSON syntax: Missing commas/brackets or invalid encoding "
if not isinstance ( payload , dict ) :
return None , f " Invalid request payload: expected object, got { type ( payload ) . __name__ } "
try :
2025-05-09 19:17:08 +08:00
if extras is not None :
payload . update ( extras )
2025-05-06 17:38:06 +08:00
validated_request = validator ( * * payload )
except ValidationError as e :
return None , format_validation_error_message ( e )
2025-05-09 19:17:08 +08:00
parsed_payload = validated_request . model_dump ( by_alias = True , exclude_unset = exclude_unset )
if extras is not None :
for key in list ( parsed_payload . keys ( ) ) :
if key in extras :
del parsed_payload [ key ]
2025-05-06 17:38:06 +08:00
return parsed_payload , None
2025-04-29 16:53:57 +08:00
2025-05-20 09:58:26 +08:00
def validate_and_parse_request_args ( request : Request , validator : type [ BaseModel ] , * , extras : dict [ str , Any ] | None = None ) - > tuple [ dict [ str , Any ] | None , str | None ] :
"""
Validates and parses request arguments against a Pydantic model .
This function performs a complete request validation workflow :
1. Extracts query parameters from the request
2. Merges with optional extra values ( if provided )
3. Validates against the specified Pydantic model
4. Cleans the output by removing extra values
5. Returns either parsed data or an error message
Args :
request ( Request ) : Web framework request object containing query parameters
validator ( type [ BaseModel ] ) : Pydantic model class for validation
extras ( dict [ str , Any ] | None ) : Optional additional values to include in validation
but exclude from final output . Defaults to None .
Returns :
tuple [ dict [ str , Any ] | None , str | None ] :
- First element : Validated / parsed arguments as dict if successful , None otherwise
- Second element : Formatted error message if validation failed , None otherwise
Behavior :
- Query parameters are merged with extras before validation
- Extras are automatically removed from the final output
- All validation errors are formatted into a human - readable string
Raises :
TypeError : If validator is not a Pydantic BaseModel subclass
Examples :
Successful validation :
>> > validate_and_parse_request_args ( request , MyValidator )
( { ' param1 ' : ' value ' } , None )
Failed validation :
>> > validate_and_parse_request_args ( request , MyValidator )
( None , " param1: Field required " )
With extras :
>> > validate_and_parse_request_args ( request , MyValidator , extras = { ' internal_id ' : 123 } )
( { ' param1 ' : ' value ' } , None ) # internal_id removed from output
Notes :
- Uses request . args . to_dict ( ) for Flask - compatible parameter extraction
- Maintains immutability of original request arguments
- Preserves type conversion from Pydantic validation
"""
args = request . args . to_dict ( flat = True )
try :
if extras is not None :
args . update ( extras )
validated_args = validator ( * * args )
except ValidationError as e :
return None , format_validation_error_message ( e )
parsed_args = validated_args . model_dump ( )
if extras is not None :
for key in list ( parsed_args . keys ( ) ) :
if key in extras :
del parsed_args [ key ]
return parsed_args , None
2025-04-30 14:50:23 +08:00
def format_validation_error_message ( e : ValidationError ) - > str :
2025-05-09 19:17:08 +08:00
"""
Formats validation errors into a standardized string format .
2025-05-06 17:38:06 +08:00
Processes pydantic ValidationError objects to create human - readable error messages
containing field locations , error descriptions , and input values .
Args :
e ( ValidationError ) : The validation error instance containing error details
Returns :
str : Formatted error messages joined by newlines . Each line contains :
- Field path ( dot - separated )
- Error message
- Truncated input value ( max 128 chars )
Example :
>> > try :
. . . UserModel ( name = 123 , email = " invalid " )
. . . except ValidationError as e :
. . . print ( format_validation_error_message ( e ) )
Field : < name > - Message : < Input should be a valid string > - Value : < 123 >
Field : < email > - Message : < value is not a valid email address > - Value : < invalid >
"""
2025-04-29 16:53:57 +08:00
error_messages = [ ]
for error in e . errors ( ) :
field = " . " . join ( map ( str , error [ " loc " ] ) )
msg = error [ " msg " ]
input_val = error [ " input " ]
input_str = str ( input_val )
if len ( input_str ) > 128 :
input_str = input_str [ : 125 ] + " ... "
error_msg = f " Field: < { field } > - Message: < { msg } > - Value: < { input_str } > "
error_messages . append ( error_msg )
return " \n " . join ( error_messages )
2025-05-20 09:58:26 +08:00
def normalize_str ( v : Any ) - > Any :
"""
Normalizes string values to a standard format while preserving non - string inputs .
Performs the following transformations when input is a string :
1. Trims leading / trailing whitespace ( str . strip ( ) )
2. Converts to lowercase ( str . lower ( ) )
Non - string inputs are returned unchanged , making this function safe for mixed - type
processing pipelines .
Args :
v ( Any ) : Input value to normalize . Accepts any Python object .
Returns :
Any : Normalized string if input was string - type , original value otherwise .
Behavior Examples :
String Input : " Admin " → " admin "
Empty String : " " → " " ( empty string )
Non - String :
- 123 → 123
- None → None
- [ " User " ] → [ " User " ]
Typical Use Cases :
- Standardizing user input
- Preparing data for case - insensitive comparison
- Cleaning API parameters
- Normalizing configuration values
Edge Cases :
- Unicode whitespace is handled by str . strip ( )
- Locale - independent lowercasing ( str . lower ( ) )
- Preserves falsy values ( 0 , False , etc . )
Example :
>> > normalize_str ( " ReadOnly " )
' readonly '
>> > normalize_str ( 42 )
42
"""
if isinstance ( v , str ) :
stripped = v . strip ( )
normalized = stripped . lower ( )
return normalized
return v
def validate_uuid1_hex ( v : Any ) - > str :
"""
Validates and converts input to a UUID version 1 hexadecimal string .
This function performs strict validation and normalization :
1. Accepts either UUID objects or UUID - formatted strings
2. Verifies the UUID is version 1 ( time - based )
3. Returns the 32 - character hexadecimal representation
Args :
v ( Any ) : Input value to validate . Can be :
- UUID object ( must be version 1 )
- String in UUID format ( e . g . " 550e8400-e29b-41d4-a716-446655440000 " )
Returns :
str : 32 - character lowercase hexadecimal string without hyphens
Example : " 550e8400e29b41d4a716446655440000 "
Raises :
PydanticCustomError : With code " invalid_UUID1_format " when :
- Input is not a UUID object or valid UUID string
- UUID version is not 1
- String doesn ' t match UUID format
Examples :
Valid cases :
>> > validate_uuid1_hex ( " 550e8400-e29b-41d4-a716-446655440000 " )
' 550e8400e29b41d4a716446655440000 '
>> > validate_uuid1_hex ( UUID ( ' 550e8400-e29b-41d4-a716-446655440000 ' ) )
' 550e8400e29b41d4a716446655440000 '
Invalid cases :
>> > validate_uuid1_hex ( " not-a-uuid " ) # raises PydanticCustomError
>> > validate_uuid1_hex ( 12345 ) # raises PydanticCustomError
>> > validate_uuid1_hex ( UUID ( int = 0 ) ) # v4, raises PydanticCustomError
Notes :
- Uses Python ' s built-in UUID parser for format validation
- Version check prevents accidental use of other UUID versions
- Hyphens in input strings are automatically removed in output
"""
try :
uuid_obj = UUID ( v ) if isinstance ( v , str ) else v
if uuid_obj . version != 1 :
raise PydanticCustomError ( " invalid_UUID1_format " , " Must be a UUID1 format " )
return uuid_obj . hex
except ( AttributeError , ValueError , TypeError ) :
raise PydanticCustomError ( " invalid_UUID1_format " , " Invalid UUID1 format " )
2025-04-29 16:53:57 +08:00
class PermissionEnum ( StrEnum ) :
me = auto ( )
team = auto ( )
2025-06-18 09:41:09 +08:00
class ChunkMethodEnum ( StrEnum ) :
2025-04-29 16:53:57 +08:00
naive = auto ( )
book = auto ( )
email = auto ( )
laws = auto ( )
manual = auto ( )
one = auto ( )
paper = auto ( )
picture = auto ( )
presentation = auto ( )
qa = auto ( )
table = auto ( )
tag = auto ( )
class GraphragMethodEnum ( StrEnum ) :
light = auto ( )
general = auto ( )
class Base ( BaseModel ) :
class Config :
extra = " forbid "
class RaptorConfig ( Base ) :
use_raptor : bool = Field ( default = False )
prompt : Annotated [
str ,
StringConstraints ( strip_whitespace = True , min_length = 1 ) ,
Field (
default = " Please summarize the following paragraphs. Be careful with the numbers, do not make things up. Paragraphs as following: \n {cluster_content} \n The above is the content you need to summarize. "
) ,
]
max_token : int = Field ( default = 256 , ge = 1 , le = 2048 )
threshold : float = Field ( default = 0.1 , ge = 0.0 , le = 1.0 )
max_cluster : int = Field ( default = 64 , ge = 1 , le = 1024 )
2025-04-30 14:50:23 +08:00
random_seed : int = Field ( default = 0 , ge = 0 )
2025-04-29 16:53:57 +08:00
class GraphragConfig ( Base ) :
use_graphrag : bool = Field ( default = False )
2025-05-06 17:38:06 +08:00
entity_types : list [ str ] = Field ( default_factory = lambda : [ " organization " , " person " , " geo " , " event " , " category " ] )
2025-04-29 16:53:57 +08:00
method : GraphragMethodEnum = Field ( default = GraphragMethodEnum . light )
community : bool = Field ( default = False )
resolution : bool = Field ( default = False )
class ParserConfig ( Base ) :
auto_keywords : int = Field ( default = 0 , ge = 0 , le = 32 )
auto_questions : int = Field ( default = 0 , ge = 0 , le = 10 )
chunk_token_num : int = Field ( default = 128 , ge = 1 , le = 2048 )
2025-04-30 17:43:42 +08:00
delimiter : str = Field ( default = r " \ n " , min_length = 1 )
2025-05-06 17:38:06 +08:00
graphrag : GraphragConfig | None = None
2025-04-29 16:53:57 +08:00
html4excel : bool = False
layout_recognize : str = " DeepDOC "
2025-05-06 17:38:06 +08:00
raptor : RaptorConfig | None = None
tag_kb_ids : list [ str ] = Field ( default_factory = list )
2025-04-29 16:53:57 +08:00
topn_tags : int = Field ( default = 1 , ge = 1 , le = 10 )
2025-06-23 13:08:11 +08:00
filename_embd_weight : float | None = Field ( default = 0.1 , ge = 0.0 , le = 1.0 )
2025-05-06 17:38:06 +08:00
task_page_size : int | None = Field ( default = None , ge = 1 )
pages : list [ list [ int ] ] | None = None
2025-04-29 16:53:57 +08:00
class CreateDatasetReq ( Base ) :
2025-05-06 17:38:06 +08:00
name : Annotated [ str , StringConstraints ( strip_whitespace = True , min_length = 1 , max_length = DATASET_NAME_LIMIT ) , Field ( . . . ) ]
avatar : str | None = Field ( default = None , max_length = 65535 )
description : str | None = Field ( default = None , max_length = 65535 )
2025-06-25 16:41:32 +08:00
embedding_model : str | None = Field ( default = None , max_length = 255 , serialization_alias = " embd_id " )
2025-05-20 09:58:26 +08:00
permission : PermissionEnum = Field ( default = PermissionEnum . me , min_length = 1 , max_length = 16 )
2025-06-18 09:41:09 +08:00
chunk_method : ChunkMethodEnum = Field ( default = ChunkMethodEnum . naive , min_length = 1 , max_length = 32 , serialization_alias = " parser_id " )
2025-05-12 19:39:18 +08:00
parser_config : ParserConfig | None = Field ( default = None )
2025-04-29 16:53:57 +08:00
@field_validator ( " avatar " )
@classmethod
2025-05-09 19:17:08 +08:00
def validate_avatar_base64 ( cls , v : str | None ) - > str | None :
"""
Validates Base64 - encoded avatar string format and MIME type compliance .
2025-05-06 17:38:06 +08:00
Implements a three - stage validation workflow :
1. MIME prefix existence check
2. MIME type format validation
3. Supported type verification
Args :
v ( str ) : Raw avatar field value
Returns :
str : Validated Base64 string
Raises :
2025-05-16 10:16:43 +08:00
PydanticCustomError : For structural errors in these cases :
2025-05-06 17:38:06 +08:00
- Missing MIME prefix header
- Invalid MIME prefix format
- Unsupported image MIME type
Example :
` ` ` python
# Valid case
CreateDatasetReq ( avatar = " data:image/png;base64,iVBORw0KGg... " )
# Invalid cases
CreateDatasetReq ( avatar = " image/jpeg;base64,... " ) # Missing 'data:' prefix
CreateDatasetReq ( avatar = " data:video/mp4;base64,... " ) # Unsupported MIME type
` ` `
"""
2025-04-29 16:53:57 +08:00
if v is None :
return v
if " , " in v :
prefix , _ = v . split ( " , " , 1 )
if not prefix . startswith ( " data: " ) :
2025-05-16 10:16:43 +08:00
raise PydanticCustomError ( " format_invalid " , " Invalid MIME prefix format. Must start with ' data: ' " )
2025-04-29 16:53:57 +08:00
mime_type = prefix [ 5 : ] . split ( " ; " ) [ 0 ]
supported_mime_types = [ " image/jpeg " , " image/png " ]
if mime_type not in supported_mime_types :
2025-05-16 10:16:43 +08:00
raise PydanticCustomError ( " format_invalid " , " Unsupported MIME type. Allowed: {supported_mime_types} " , { " supported_mime_types " : supported_mime_types } )
2025-04-29 16:53:57 +08:00
return v
else :
2025-05-16 10:16:43 +08:00
raise PydanticCustomError ( " format_invalid " , " Missing MIME prefix. Expected format: data:<mime>;base64,<data> " )
2025-04-29 16:53:57 +08:00
2025-06-25 16:41:32 +08:00
@field_validator ( " embedding_model " , mode = " before " )
@classmethod
def normalize_embedding_model ( cls , v : Any ) - > Any :
if isinstance ( v , str ) :
return v . strip ( )
return v
2025-04-29 16:53:57 +08:00
@field_validator ( " embedding_model " , mode = " after " )
@classmethod
2025-06-25 16:41:32 +08:00
def validate_embedding_model ( cls , v : str | None ) - > str | None :
2025-05-09 19:17:08 +08:00
"""
Validates embedding model identifier format compliance .
2025-05-06 17:38:06 +08:00
Validation pipeline :
1. Structural format verification
2. Component non - empty check
3. Value normalization
Args :
v ( str ) : Raw model identifier
Returns :
str : Validated < model_name > @ < provider > format
Raises :
2025-05-16 10:16:43 +08:00
PydanticCustomError : For these violations :
2025-05-06 17:38:06 +08:00
- Missing @ separator
- Empty model_name / provider
- Invalid component structure
Examples :
Valid : " text-embedding-3-large@openai "
Invalid : " invalid_model " ( no @ )
Invalid : " @openai " ( empty model_name )
Invalid : " text-embedding-3-large@ " ( empty provider )
"""
2025-06-25 16:41:32 +08:00
if isinstance ( v , str ) :
if " @ " not in v :
raise PydanticCustomError ( " format_invalid " , " Embedding model identifier must follow <model_name>@<provider> format " )
2025-05-06 17:38:06 +08:00
2025-06-25 16:41:32 +08:00
components = v . split ( " @ " , 1 )
if len ( components ) != 2 or not all ( components ) :
raise PydanticCustomError ( " format_invalid " , " Both model_name and provider must be non-empty strings " )
2025-05-06 17:38:06 +08:00
2025-06-25 16:41:32 +08:00
model_name , provider = components
if not model_name . strip ( ) or not provider . strip ( ) :
raise PydanticCustomError ( " format_invalid " , " Model name and provider cannot be whitespace-only strings " )
2025-04-29 16:53:57 +08:00
return v
@field_validator ( " permission " , mode = " before " )
@classmethod
2025-05-20 09:58:26 +08:00
def normalize_permission ( cls , v : Any ) - > Any :
return normalize_str ( v )
2025-04-29 16:53:57 +08:00
2025-05-12 19:39:18 +08:00
@field_validator ( " parser_config " , mode = " before " )
@classmethod
def normalize_empty_parser_config ( cls , v : Any ) - > Any :
"""
Normalizes empty parser configuration by converting empty dictionaries to None .
This validator ensures consistent handling of empty parser configurations across
the application by converting empty dicts to None values .
Args :
v ( Any ) : Raw input value for the parser config field
Returns :
Any : Returns None if input is an empty dict , otherwise returns the original value
Example :
>> > normalize_empty_parser_config ( { } )
None
>> > normalize_empty_parser_config ( { " key " : " value " } )
{ " key " : " value " }
"""
if v == { } :
return None
return v
2025-04-29 16:53:57 +08:00
@field_validator ( " parser_config " , mode = " after " )
@classmethod
2025-05-12 19:39:18 +08:00
def validate_parser_config_json_length ( cls , v : ParserConfig | None ) - > ParserConfig | None :
2025-05-09 19:17:08 +08:00
"""
Validates serialized JSON length constraints for parser configuration .
2025-05-06 17:38:06 +08:00
2025-05-09 19:17:08 +08:00
Implements a two - stage validation workflow :
2025-05-12 19:39:18 +08:00
1. Null check - bypass validation for empty configurations
2. Model serialization - convert Pydantic model to JSON string
3. Size verification - enforce maximum allowed payload size
2025-05-06 17:38:06 +08:00
Args :
v ( ParserConfig | None ) : Raw parser configuration object
Returns :
ParserConfig | None : Validated configuration object
Raises :
2025-05-16 10:16:43 +08:00
PydanticCustomError : When serialized JSON exceeds 65 , 535 characters
2025-05-06 17:38:06 +08:00
"""
2025-05-12 19:39:18 +08:00
if v is None :
return None
2025-05-06 17:38:06 +08:00
if ( json_str := v . model_dump_json ( ) ) and len ( json_str ) > 65535 :
2025-05-16 10:16:43 +08:00
raise PydanticCustomError ( " string_too_long " , " Parser config exceeds size limit (max 65,535 characters). Current size: {actual} " , { " actual " : len ( json_str ) } )
2025-04-29 16:53:57 +08:00
return v
2025-05-09 19:17:08 +08:00
class UpdateDatasetReq ( CreateDatasetReq ) :
2025-05-20 09:58:26 +08:00
dataset_id : str = Field ( . . . )
2025-05-09 19:17:08 +08:00
name : Annotated [ str , StringConstraints ( strip_whitespace = True , min_length = 1 , max_length = DATASET_NAME_LIMIT ) , Field ( default = " " ) ]
2025-06-12 15:47:49 +08:00
pagerank : int = Field ( default = 0 , ge = 0 , le = 100 )
2025-05-09 19:17:08 +08:00
2025-05-20 09:58:26 +08:00
@field_validator ( " dataset_id " , mode = " before " )
@classmethod
def validate_dataset_id ( cls , v : Any ) - > str :
return validate_uuid1_hex ( v )
2025-05-16 10:16:43 +08:00
class DeleteReq ( Base ) :
2025-05-20 09:58:26 +08:00
ids : list [ str ] | None = Field ( . . . )
2025-05-16 10:16:43 +08:00
@field_validator ( " ids " , mode = " after " )
2025-05-20 09:58:26 +08:00
@classmethod
def validate_ids ( cls , v_list : list [ str ] | None ) - > list [ str ] | None :
2025-05-16 10:16:43 +08:00
"""
2025-05-20 09:58:26 +08:00
Validates and normalizes a list of UUID strings with None handling .
2025-05-16 10:16:43 +08:00
2025-05-20 09:58:26 +08:00
This post - processing validator performs :
1. None input handling ( pass - through )
2. UUID version 1 validation for each list item
3. Duplicate value detection
4. Returns normalized UUID hex strings or None
2025-05-16 10:16:43 +08:00
Args :
2025-05-20 09:58:26 +08:00
v_list ( list [ str ] | None ) : Input list that has passed initial validation .
Either a list of UUID strings or None .
2025-05-16 10:16:43 +08:00
Returns :
list [ str ] | None :
2025-05-20 09:58:26 +08:00
- None if input was None
- List of normalized UUID hex strings otherwise :
* 32 - character lowercase
* Valid UUID version 1
* Unique within list
2025-05-16 10:16:43 +08:00
Raises :
2025-05-20 09:58:26 +08:00
PydanticCustomError : With structured error details when :
- " invalid_UUID1_format " : Any string fails UUIDv1 validation
- " duplicate_uuids " : If duplicate IDs are detected
2025-05-16 10:16:43 +08:00
2025-05-20 09:58:26 +08:00
Validation Rules :
- None input returns None
- Empty list returns empty list
- All non - None items must be valid UUIDv1
- No duplicates permitted
- Original order preserved
2025-05-16 10:16:43 +08:00
2025-05-20 09:58:26 +08:00
Examples :
Valid cases :
>> > validate_ids ( None )
None
>> > validate_ids ( [ ] )
[ ]
>> > validate_ids ( [ " 550e8400-e29b-41d4-a716-446655440000 " ] )
[ " 550e8400e29b41d4a716446655440000 " ]
Invalid cases :
>> > validate_ids ( [ " invalid " ] )
# raises PydanticCustomError(invalid_UUID1_format)
>> > validate_ids ( [ " 550e... " , " 550e... " ] )
# raises PydanticCustomError(duplicate_uuids)
Security Notes :
- Validates UUID version to prevent version spoofing
- Duplicate check prevents data injection
- None handling maintains pipeline integrity
2025-05-16 10:16:43 +08:00
"""
2025-05-20 09:58:26 +08:00
if v_list is None :
return None
2025-05-16 10:16:43 +08:00
2025-05-20 09:58:26 +08:00
ids_list = [ ]
for v in v_list :
try :
ids_list . append ( validate_uuid1_hex ( v ) )
except PydanticCustomError as e :
raise e
2025-05-16 10:16:43 +08:00
2025-05-20 09:58:26 +08:00
duplicates = [ item for item , count in Counter ( ids_list ) . items ( ) if count > 1 ]
2025-05-16 10:16:43 +08:00
if duplicates :
duplicates_str = " , " . join ( duplicates )
raise PydanticCustomError ( " duplicate_uuids " , " Duplicate ids: ' {duplicate_ids} ' " , { " duplicate_ids " : duplicates_str } )
2025-05-20 09:58:26 +08:00
return ids_list
2025-05-16 10:16:43 +08:00
class DeleteDatasetReq ( DeleteReq ) : . . .
2025-05-20 09:58:26 +08:00
class OrderByEnum ( StrEnum ) :
create_time = auto ( )
update_time = auto ( )
class BaseListReq ( Base ) :
id : str | None = None
name : str | None = None
page : int = Field ( default = 1 , ge = 1 )
page_size : int = Field ( default = 30 , ge = 1 )
orderby : OrderByEnum = Field ( default = OrderByEnum . create_time )
desc : bool = Field ( default = True )
@field_validator ( " id " , mode = " before " )
@classmethod
def validate_id ( cls , v : Any ) - > str :
return validate_uuid1_hex ( v )
@field_validator ( " orderby " , mode = " before " )
@classmethod
def normalize_orderby ( cls , v : Any ) - > Any :
return normalize_str ( v )
class ListDatasetReq ( BaseListReq ) : . . .