Add vector store id reference to embeddings config. (#1662)

This commit is contained in:
Derek Worthen 2025-01-28 10:46:41 -08:00 committed by GitHub
parent 1bbce33f42
commit eeee84e9d9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 43 additions and 18 deletions

View File

@ -0,0 +1,4 @@
{
"type": "patch",
"description": "Add vector store id reference to embeddings config."
}

View File

@ -106,7 +106,7 @@ VECTOR_STORE_TYPE = VectorStoreType.LanceDB.value
VECTOR_STORE_DB_URI = str(Path(OUTPUT_BASE_DIR) / "lancedb")
VECTOR_STORE_CONTAINER_NAME = "default"
VECTOR_STORE_OVERWRITE = True
VECTOR_STORE_INDEX_NAME = "output"
VECTOR_STORE_DEFAULT_ID = "default_vector_store"
# Local Search
LOCAL_SEARCH_TEXT_UNIT_PROP = 0.5

View File

@ -57,18 +57,10 @@ def get_embedding_settings(
embeddings_llm_settings = settings.get_language_model_config(
settings.embeddings.model_id
)
num_entries = len(settings.vector_store)
if num_entries == 1:
store = next(iter(settings.vector_store.values()))
vector_store_settings = store.model_dump()
else:
# The vector_store dict should only have more than one entry for multi-index query
vector_store_settings = None
vector_store_settings = settings.get_vector_store_config(
settings.embeddings.vector_store_id
).model_dump()
if vector_store_settings is None:
return {
"strategy": settings.embeddings.resolved_strategy(embeddings_llm_settings)
}
#
# If we get to this point, settings.vector_store is defined, and there's a specific setting for this embedding.
# settings.vector_store.base contains connection information, or may be undefined

View File

@ -40,7 +40,7 @@ models:
# deployment_name: <azure_model_deployment_name>
vector_store:
{defs.VECTOR_STORE_INDEX_NAME}:
{defs.VECTOR_STORE_DEFAULT_ID}:
type: {defs.VECTOR_STORE_TYPE}
db_uri: {defs.VECTOR_STORE_DB_URI}
container_name: {defs.VECTOR_STORE_CONTAINER_NAME}
@ -48,6 +48,7 @@ vector_store:
embeddings:
model_id: {defs.DEFAULT_EMBEDDING_MODEL_ID}
vector_store_id: {defs.VECTOR_STORE_DEFAULT_ID}
### Input settings ###

View File

@ -226,7 +226,7 @@ class GraphRagConfig(BaseModel):
vector_store: dict[str, VectorStoreConfig] = Field(
description="The vector store configuration.",
default={"output": VectorStoreConfig()},
default={defs.VECTOR_STORE_DEFAULT_ID: VectorStoreConfig()},
)
"""The vector store configuration."""
@ -263,6 +263,30 @@ class GraphRagConfig(BaseModel):
return self.models[model_id]
def get_vector_store_config(self, vector_store_id: str) -> VectorStoreConfig:
"""Get a vector store configuration by ID.
Parameters
----------
vector_store_id : str
The ID of the vector store to get. Should match an ID in the vector_store list.
Returns
-------
VectorStoreConfig
The vector store configuration if found.
Raises
------
ValueError
If the vector store ID is not found in the configuration.
"""
if vector_store_id not in self.vector_store:
err_msg = f"Vector Store ID {vector_store_id} not found in configuration. Please rerun `graphrag init` and set the vector store configuration."
raise ValueError(err_msg)
return self.vector_store[vector_store_id]
@model_validator(mode="after")
def _validate_model(self):
"""Validate the model configuration."""

View File

@ -34,6 +34,10 @@ class TextEmbeddingConfig(BaseModel):
description="The model ID to use for text embeddings.",
default=defs.EMBEDDING_MODEL_ID,
)
vector_store_id: str = Field(
description="The vector store ID to use for text embeddings.",
default=defs.VECTOR_STORE_DEFAULT_ID,
)
def resolved_strategy(self, model_config: LanguageModelConfig) -> dict:
"""Get the resolved text embedding strategy."""

View File

@ -3,7 +3,7 @@ claim_extraction:
embeddings:
vector_store:
output:
default_vector_store:
type: "azure_ai_search"
url: ${AZURE_AI_SEARCH_URL_ENDPOINT}
api_key: ${AZURE_AI_SEARCH_API_KEY}

View File

@ -26,7 +26,7 @@ models:
async_mode: threaded
vector_store:
output:
default_vector_store:
type: "lancedb"
db_uri: "./tests/fixtures/min-csv/lancedb"
container_name: "lancedb_ci"

View File

@ -26,7 +26,7 @@ models:
async_mode: threaded
vector_store:
output:
default_vector_store:
type: "azure_ai_search"
url: ${AZURE_AI_SEARCH_URL_ENDPOINT}
api_key: ${AZURE_AI_SEARCH_API_KEY}

View File

@ -50,7 +50,7 @@ DEFAULT_MODEL_CONFIG = {
DEFAULT_GRAPHRAG_CONFIG_SETTINGS = {
"models": DEFAULT_MODEL_CONFIG,
"vector_store": {
"output": {
defs.VECTOR_STORE_DEFAULT_ID: {
"type": defs.VECTOR_STORE_TYPE,
"db_uri": defs.VECTOR_STORE_DB_URI,
"container_name": defs.VECTOR_STORE_CONTAINER_NAME,