mirror of
https://github.com/langgenius/dify.git
synced 2025-11-30 12:53:33 +00:00
feat:support selecting different ftparser for OceanBase. (#25970)
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
parent
92cddbcc02
commit
208fe3d7de
@ -37,3 +37,11 @@ class OceanBaseVectorConfig(BaseSettings):
|
|||||||
"with older versions",
|
"with older versions",
|
||||||
default=False,
|
default=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
OCEANBASE_FULLTEXT_PARSER: str | None = Field(
|
||||||
|
description=(
|
||||||
|
"Fulltext parser to use for text indexing. Options: 'japanese_ftparser' (Japanese), "
|
||||||
|
"'thai_ftparser' (Thai), 'ik' (Chinese). Default is 'ik'"
|
||||||
|
),
|
||||||
|
default="ik",
|
||||||
|
)
|
||||||
|
|||||||
@ -4,7 +4,7 @@ import math
|
|||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from pydantic import BaseModel, model_validator
|
from pydantic import BaseModel, model_validator
|
||||||
from pyobvector import VECTOR, FtsIndexParam, FtsParser, ObVecClient, l2_distance # type: ignore
|
from pyobvector import VECTOR, ObVecClient, l2_distance # type: ignore
|
||||||
from sqlalchemy import JSON, Column, String
|
from sqlalchemy import JSON, Column, String
|
||||||
from sqlalchemy.dialects.mysql import LONGTEXT
|
from sqlalchemy.dialects.mysql import LONGTEXT
|
||||||
|
|
||||||
@ -117,22 +117,39 @@ class OceanBaseVector(BaseVector):
|
|||||||
columns=cols,
|
columns=cols,
|
||||||
vidxs=vidx_params,
|
vidxs=vidx_params,
|
||||||
)
|
)
|
||||||
try:
|
logger.debug("DEBUG: Table '%s' created successfully", self._collection_name)
|
||||||
|
|
||||||
if self._hybrid_search_enabled:
|
if self._hybrid_search_enabled:
|
||||||
self._client.create_fts_idx_with_fts_index_param(
|
# Get parser from config or use default ik parser
|
||||||
table_name=self._collection_name,
|
parser_name = dify_config.OCEANBASE_FULLTEXT_PARSER or "ik"
|
||||||
fts_idx_param=FtsIndexParam(
|
|
||||||
index_name="fulltext_index_for_col_text",
|
allowed_parsers = ["ik", "japanese_ftparser", "thai_ftparser"]
|
||||||
field_names=["text"],
|
if parser_name not in allowed_parsers:
|
||||||
parser_type=FtsParser.IK,
|
raise ValueError(
|
||||||
),
|
f"Invalid OceanBase full-text parser: {parser_name}. "
|
||||||
|
f"Allowed values are: {', '.join(allowed_parsers)}"
|
||||||
)
|
)
|
||||||
|
logger.debug("Hybrid search is enabled, parser_name='%s'", parser_name)
|
||||||
|
logger.debug(
|
||||||
|
"About to create fulltext index for collection '%s' using parser '%s'",
|
||||||
|
self._collection_name,
|
||||||
|
parser_name,
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
sql_command = f"""ALTER TABLE {self._collection_name}
|
||||||
|
ADD FULLTEXT INDEX fulltext_index_for_col_text (text) WITH PARSER {parser_name}"""
|
||||||
|
logger.debug("DEBUG: Executing SQL: %s", sql_command)
|
||||||
|
self._client.perform_raw_text_sql(sql_command)
|
||||||
|
logger.debug("DEBUG: Fulltext index created successfully for '%s'", self._collection_name)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
logger.exception("Exception occurred while creating fulltext index")
|
||||||
raise Exception(
|
raise Exception(
|
||||||
"Failed to add fulltext index to the target table, your OceanBase version must be 4.3.5.1 or above "
|
"Failed to add fulltext index to the target table, your OceanBase version must be "
|
||||||
+ "to support fulltext index and vector index in the same table",
|
"4.3.5.1 or above to support fulltext index and vector index in the same table"
|
||||||
e,
|
) from e
|
||||||
)
|
else:
|
||||||
|
logger.debug("DEBUG: Hybrid search is NOT enabled for '%s'", self._collection_name)
|
||||||
|
|
||||||
self._client.refresh_metadata([self._collection_name])
|
self._client.refresh_metadata([self._collection_name])
|
||||||
redis_client.set(collection_exist_cache_key, 1, ex=3600)
|
redis_client.set(collection_exist_cache_key, 1, ex=3600)
|
||||||
|
|
||||||
|
|||||||
@ -661,6 +661,7 @@ OCEANBASE_VECTOR_DATABASE=test
|
|||||||
OCEANBASE_CLUSTER_NAME=difyai
|
OCEANBASE_CLUSTER_NAME=difyai
|
||||||
OCEANBASE_MEMORY_LIMIT=6G
|
OCEANBASE_MEMORY_LIMIT=6G
|
||||||
OCEANBASE_ENABLE_HYBRID_SEARCH=false
|
OCEANBASE_ENABLE_HYBRID_SEARCH=false
|
||||||
|
OCEANBASE_FULLTEXT_PARSER=ik
|
||||||
|
|
||||||
# opengauss configurations, only available when VECTOR_STORE is `opengauss`
|
# opengauss configurations, only available when VECTOR_STORE is `opengauss`
|
||||||
OPENGAUSS_HOST=opengauss
|
OPENGAUSS_HOST=opengauss
|
||||||
|
|||||||
@ -504,6 +504,7 @@ services:
|
|||||||
OB_CLUSTER_NAME: ${OCEANBASE_CLUSTER_NAME:-difyai}
|
OB_CLUSTER_NAME: ${OCEANBASE_CLUSTER_NAME:-difyai}
|
||||||
OB_SERVER_IP: 127.0.0.1
|
OB_SERVER_IP: 127.0.0.1
|
||||||
MODE: mini
|
MODE: mini
|
||||||
|
LANG: en_US.UTF-8
|
||||||
ports:
|
ports:
|
||||||
- "${OCEANBASE_VECTOR_PORT:-2881}:2881"
|
- "${OCEANBASE_VECTOR_PORT:-2881}:2881"
|
||||||
healthcheck:
|
healthcheck:
|
||||||
|
|||||||
@ -306,6 +306,7 @@ x-shared-env: &shared-api-worker-env
|
|||||||
OCEANBASE_CLUSTER_NAME: ${OCEANBASE_CLUSTER_NAME:-difyai}
|
OCEANBASE_CLUSTER_NAME: ${OCEANBASE_CLUSTER_NAME:-difyai}
|
||||||
OCEANBASE_MEMORY_LIMIT: ${OCEANBASE_MEMORY_LIMIT:-6G}
|
OCEANBASE_MEMORY_LIMIT: ${OCEANBASE_MEMORY_LIMIT:-6G}
|
||||||
OCEANBASE_ENABLE_HYBRID_SEARCH: ${OCEANBASE_ENABLE_HYBRID_SEARCH:-false}
|
OCEANBASE_ENABLE_HYBRID_SEARCH: ${OCEANBASE_ENABLE_HYBRID_SEARCH:-false}
|
||||||
|
OCEANBASE_FULLTEXT_PARSER: ${OCEANBASE_FULLTEXT_PARSER:-ik}
|
||||||
OPENGAUSS_HOST: ${OPENGAUSS_HOST:-opengauss}
|
OPENGAUSS_HOST: ${OPENGAUSS_HOST:-opengauss}
|
||||||
OPENGAUSS_PORT: ${OPENGAUSS_PORT:-6600}
|
OPENGAUSS_PORT: ${OPENGAUSS_PORT:-6600}
|
||||||
OPENGAUSS_USER: ${OPENGAUSS_USER:-postgres}
|
OPENGAUSS_USER: ${OPENGAUSS_USER:-postgres}
|
||||||
@ -1092,6 +1093,7 @@ services:
|
|||||||
OB_CLUSTER_NAME: ${OCEANBASE_CLUSTER_NAME:-difyai}
|
OB_CLUSTER_NAME: ${OCEANBASE_CLUSTER_NAME:-difyai}
|
||||||
OB_SERVER_IP: 127.0.0.1
|
OB_SERVER_IP: 127.0.0.1
|
||||||
MODE: mini
|
MODE: mini
|
||||||
|
LANG: en_US.UTF-8
|
||||||
ports:
|
ports:
|
||||||
- "${OCEANBASE_VECTOR_PORT:-2881}:2881"
|
- "${OCEANBASE_VECTOR_PORT:-2881}:2881"
|
||||||
healthcheck:
|
healthcheck:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user