mirror of
https://github.com/microsoft/graphrag.git
synced 2025-12-25 22:18:56 +00:00
Add vector store id reference to embeddings config. (#1662)
This commit is contained in:
parent
1bbce33f42
commit
eeee84e9d9
@ -0,0 +1,4 @@
|
||||
{
|
||||
"type": "patch",
|
||||
"description": "Add vector store id reference to embeddings config."
|
||||
}
|
||||
@ -106,7 +106,7 @@ VECTOR_STORE_TYPE = VectorStoreType.LanceDB.value
|
||||
VECTOR_STORE_DB_URI = str(Path(OUTPUT_BASE_DIR) / "lancedb")
|
||||
VECTOR_STORE_CONTAINER_NAME = "default"
|
||||
VECTOR_STORE_OVERWRITE = True
|
||||
VECTOR_STORE_INDEX_NAME = "output"
|
||||
VECTOR_STORE_DEFAULT_ID = "default_vector_store"
|
||||
|
||||
# Local Search
|
||||
LOCAL_SEARCH_TEXT_UNIT_PROP = 0.5
|
||||
|
||||
@ -57,18 +57,10 @@ def get_embedding_settings(
|
||||
embeddings_llm_settings = settings.get_language_model_config(
|
||||
settings.embeddings.model_id
|
||||
)
|
||||
num_entries = len(settings.vector_store)
|
||||
if num_entries == 1:
|
||||
store = next(iter(settings.vector_store.values()))
|
||||
vector_store_settings = store.model_dump()
|
||||
else:
|
||||
# The vector_store dict should only have more than one entry for multi-index query
|
||||
vector_store_settings = None
|
||||
vector_store_settings = settings.get_vector_store_config(
|
||||
settings.embeddings.vector_store_id
|
||||
).model_dump()
|
||||
|
||||
if vector_store_settings is None:
|
||||
return {
|
||||
"strategy": settings.embeddings.resolved_strategy(embeddings_llm_settings)
|
||||
}
|
||||
#
|
||||
# If we get to this point, settings.vector_store is defined, and there's a specific setting for this embedding.
|
||||
# settings.vector_store.base contains connection information, or may be undefined
|
||||
|
||||
@ -40,7 +40,7 @@ models:
|
||||
# deployment_name: <azure_model_deployment_name>
|
||||
|
||||
vector_store:
|
||||
{defs.VECTOR_STORE_INDEX_NAME}:
|
||||
{defs.VECTOR_STORE_DEFAULT_ID}:
|
||||
type: {defs.VECTOR_STORE_TYPE}
|
||||
db_uri: {defs.VECTOR_STORE_DB_URI}
|
||||
container_name: {defs.VECTOR_STORE_CONTAINER_NAME}
|
||||
@ -48,6 +48,7 @@ vector_store:
|
||||
|
||||
embeddings:
|
||||
model_id: {defs.DEFAULT_EMBEDDING_MODEL_ID}
|
||||
vector_store_id: {defs.VECTOR_STORE_DEFAULT_ID}
|
||||
|
||||
### Input settings ###
|
||||
|
||||
|
||||
@ -226,7 +226,7 @@ class GraphRagConfig(BaseModel):
|
||||
|
||||
vector_store: dict[str, VectorStoreConfig] = Field(
|
||||
description="The vector store configuration.",
|
||||
default={"output": VectorStoreConfig()},
|
||||
default={defs.VECTOR_STORE_DEFAULT_ID: VectorStoreConfig()},
|
||||
)
|
||||
"""The vector store configuration."""
|
||||
|
||||
@ -263,6 +263,30 @@ class GraphRagConfig(BaseModel):
|
||||
|
||||
return self.models[model_id]
|
||||
|
||||
def get_vector_store_config(self, vector_store_id: str) -> VectorStoreConfig:
|
||||
"""Get a vector store configuration by ID.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
vector_store_id : str
|
||||
The ID of the vector store to get. Should match an ID in the vector_store list.
|
||||
|
||||
Returns
|
||||
-------
|
||||
VectorStoreConfig
|
||||
The vector store configuration if found.
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
If the vector store ID is not found in the configuration.
|
||||
"""
|
||||
if vector_store_id not in self.vector_store:
|
||||
err_msg = f"Vector Store ID {vector_store_id} not found in configuration. Please rerun `graphrag init` and set the vector store configuration."
|
||||
raise ValueError(err_msg)
|
||||
|
||||
return self.vector_store[vector_store_id]
|
||||
|
||||
@model_validator(mode="after")
|
||||
def _validate_model(self):
|
||||
"""Validate the model configuration."""
|
||||
|
||||
@ -34,6 +34,10 @@ class TextEmbeddingConfig(BaseModel):
|
||||
description="The model ID to use for text embeddings.",
|
||||
default=defs.EMBEDDING_MODEL_ID,
|
||||
)
|
||||
vector_store_id: str = Field(
|
||||
description="The vector store ID to use for text embeddings.",
|
||||
default=defs.VECTOR_STORE_DEFAULT_ID,
|
||||
)
|
||||
|
||||
def resolved_strategy(self, model_config: LanguageModelConfig) -> dict:
|
||||
"""Get the resolved text embedding strategy."""
|
||||
|
||||
2
tests/fixtures/azure/settings.yml
vendored
2
tests/fixtures/azure/settings.yml
vendored
@ -3,7 +3,7 @@ claim_extraction:
|
||||
|
||||
embeddings:
|
||||
vector_store:
|
||||
output:
|
||||
default_vector_store:
|
||||
type: "azure_ai_search"
|
||||
url: ${AZURE_AI_SEARCH_URL_ENDPOINT}
|
||||
api_key: ${AZURE_AI_SEARCH_API_KEY}
|
||||
|
||||
2
tests/fixtures/min-csv/settings.yml
vendored
2
tests/fixtures/min-csv/settings.yml
vendored
@ -26,7 +26,7 @@ models:
|
||||
async_mode: threaded
|
||||
|
||||
vector_store:
|
||||
output:
|
||||
default_vector_store:
|
||||
type: "lancedb"
|
||||
db_uri: "./tests/fixtures/min-csv/lancedb"
|
||||
container_name: "lancedb_ci"
|
||||
|
||||
2
tests/fixtures/text/settings.yml
vendored
2
tests/fixtures/text/settings.yml
vendored
@ -26,7 +26,7 @@ models:
|
||||
async_mode: threaded
|
||||
|
||||
vector_store:
|
||||
output:
|
||||
default_vector_store:
|
||||
type: "azure_ai_search"
|
||||
url: ${AZURE_AI_SEARCH_URL_ENDPOINT}
|
||||
api_key: ${AZURE_AI_SEARCH_API_KEY}
|
||||
|
||||
@ -50,7 +50,7 @@ DEFAULT_MODEL_CONFIG = {
|
||||
DEFAULT_GRAPHRAG_CONFIG_SETTINGS = {
|
||||
"models": DEFAULT_MODEL_CONFIG,
|
||||
"vector_store": {
|
||||
"output": {
|
||||
defs.VECTOR_STORE_DEFAULT_ID: {
|
||||
"type": defs.VECTOR_STORE_TYPE,
|
||||
"db_uri": defs.VECTOR_STORE_DB_URI,
|
||||
"container_name": defs.VECTOR_STORE_CONTAINER_NAME,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user