Add analyzer_params config for milvus vectordb (#18180)

This commit is contained in:
Rain Wang 2025-04-17 10:38:56 +08:00 committed by GitHub
parent a1d20085e6
commit e8d98e3d89
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 24 additions and 8 deletions

View File

@ -165,6 +165,7 @@ MILVUS_URI=http://127.0.0.1:19530
MILVUS_TOKEN=
MILVUS_USER=root
MILVUS_PASSWORD=Milvus
MILVUS_ANALYZER_PARAMS=
# MyScale configuration
MYSCALE_HOST=127.0.0.1

View File

@ -39,3 +39,8 @@ class MilvusConfig(BaseSettings):
"older versions",
default=True,
)
MILVUS_ANALYZER_PARAMS: Optional[str] = Field(
description='Milvus text analyzer parameters, e.g., {"type": "chinese"} for Chinese segmentation support.',
default=None,
)

View File

@ -32,6 +32,7 @@ class MilvusConfig(BaseModel):
batch_size: int = 100 # Batch size for operations
database: str = "default" # Database name
enable_hybrid_search: bool = False # Flag to enable hybrid search
analyzer_params: Optional[str] = None # Analyzer params
@model_validator(mode="before")
@classmethod
@ -58,6 +59,7 @@ class MilvusConfig(BaseModel):
"user": self.user,
"password": self.password,
"db_name": self.database,
"analyzer_params": self.analyzer_params,
}
@ -300,14 +302,19 @@ class MilvusVector(BaseVector):
# Create the text field, enable_analyzer will be set True to support milvus automatically
# transfer text to sparse_vector, reference: https://milvus.io/docs/full-text-search.md
fields.append(
FieldSchema(
Field.CONTENT_KEY.value,
DataType.VARCHAR,
max_length=65_535,
enable_analyzer=self._hybrid_search_enabled,
)
)
content_field_kwargs: dict[str, Any] = {
"max_length": 65_535,
"enable_analyzer": self._hybrid_search_enabled,
}
if (
self._hybrid_search_enabled
and self._client_config.analyzer_params is not None
and self._client_config.analyzer_params.strip()
):
content_field_kwargs["analyzer_params"] = self._client_config.analyzer_params
fields.append(FieldSchema(Field.CONTENT_KEY.value, DataType.VARCHAR, **content_field_kwargs))
# Create the primary key field
fields.append(FieldSchema(Field.PRIMARY_KEY.value, DataType.INT64, is_primary=True, auto_id=True))
# Create the vector field, supports binary or float vectors
@ -383,5 +390,6 @@ class MilvusVectorFactory(AbstractVectorFactory):
password=dify_config.MILVUS_PASSWORD or "",
database=dify_config.MILVUS_DATABASE or "",
enable_hybrid_search=dify_config.MILVUS_ENABLE_HYBRID_SEARCH or False,
analyzer_params=dify_config.MILVUS_ANALYZER_PARAMS or "",
),
)

View File

@ -410,6 +410,7 @@ MILVUS_TOKEN=
MILVUS_USER=
MILVUS_PASSWORD=
MILVUS_ENABLE_HYBRID_SEARCH=False
MILVUS_ANALYZER_PARAMS=
# MyScale configuration, only available when VECTOR_STORE is `myscale`
# For multi-language support, please set MYSCALE_FTS_PARAMS with referring to:

View File

@ -142,6 +142,7 @@ x-shared-env: &shared-api-worker-env
MILVUS_USER: ${MILVUS_USER:-}
MILVUS_PASSWORD: ${MILVUS_PASSWORD:-}
MILVUS_ENABLE_HYBRID_SEARCH: ${MILVUS_ENABLE_HYBRID_SEARCH:-False}
MILVUS_ANALYZER_PARAMS: ${MILVUS_ANALYZER_PARAMS:-}
MYSCALE_HOST: ${MYSCALE_HOST:-myscale}
MYSCALE_PORT: ${MYSCALE_PORT:-8123}
MYSCALE_USER: ${MYSCALE_USER:-default}