mirror of
https://github.com/microsoft/graphrag.git
synced 2025-12-04 19:07:52 +00:00
Add backwards compatibility patch for vector store (#1334)
This commit is contained in:
parent
83026bdb26
commit
0cc79b9cf7
@ -0,0 +1,4 @@
|
|||||||
|
{
|
||||||
|
"type": "patch",
|
||||||
|
"description": "add backwards compatibility patch to vector store."
|
||||||
|
}
|
||||||
3
.vscode/extensions.json
vendored
3
.vscode/extensions.json
vendored
@ -6,6 +6,7 @@
|
|||||||
"ms-python.vscode-pylance",
|
"ms-python.vscode-pylance",
|
||||||
"bierner.markdown-mermaid",
|
"bierner.markdown-mermaid",
|
||||||
"streetsidesoftware.code-spell-checker",
|
"streetsidesoftware.code-spell-checker",
|
||||||
"ronnidc.nunjucks"
|
"ronnidc.nunjucks",
|
||||||
|
"lucien-martijn.parquet-visualizer",
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|||||||
@ -182,18 +182,56 @@ async def local_search(
|
|||||||
------
|
------
|
||||||
TODO: Document any exceptions to expect.
|
TODO: Document any exceptions to expect.
|
||||||
"""
|
"""
|
||||||
# TODO: must update filepath of lancedb (if used) until the new config engine has been implemented
|
#################################### BEGIN PATCH ####################################
|
||||||
|
# TODO: remove the following patch that checks for a vector_store prior to v1 release
|
||||||
|
# TODO: this is a backwards compatibility patch that injects the default vector_store settings into the config if it is not present
|
||||||
|
# Only applicable in situations involving a local vector_store (lancedb). The general idea:
|
||||||
|
# if vector_store not in config:
|
||||||
|
# 1. assume user is running local if vector_store is not in config
|
||||||
|
# 2. insert default vector_store in config
|
||||||
|
# 3 .create lancedb vector_store instance
|
||||||
|
# 4. upload vector embeddings from the input dataframes to the vector_store
|
||||||
|
backwards_compatible = False
|
||||||
|
if not config.embeddings.vector_store:
|
||||||
|
backwards_compatible = True
|
||||||
|
from graphrag.query.input.loaders.dfs import store_entity_semantic_embeddings
|
||||||
|
from graphrag.vector_stores.lancedb import LanceDBVectorStore
|
||||||
|
|
||||||
|
config.embeddings.vector_store = {
|
||||||
|
"type": "lancedb",
|
||||||
|
"db_uri": f"{Path(config.storage.base_dir)}/lancedb",
|
||||||
|
"collection_name": "entity_description_embeddings",
|
||||||
|
"overwrite": True,
|
||||||
|
}
|
||||||
|
_entities = read_indexer_entities(nodes, entities, community_level)
|
||||||
|
description_embedding_store = LanceDBVectorStore(
|
||||||
|
db_uri=config.embeddings.vector_store["db_uri"],
|
||||||
|
collection_name=config.embeddings.vector_store["collection_name"],
|
||||||
|
overwrite=config.embeddings.vector_store["overwrite"],
|
||||||
|
)
|
||||||
|
description_embedding_store.connect(
|
||||||
|
db_uri=config.embeddings.vector_store["db_uri"]
|
||||||
|
)
|
||||||
|
# dump embeddings from the entities list to the description_embedding_store
|
||||||
|
store_entity_semantic_embeddings(
|
||||||
|
entities=_entities, vectorstore=description_embedding_store
|
||||||
|
)
|
||||||
|
#################################### END PATCH ####################################
|
||||||
|
|
||||||
|
# TODO: update filepath of lancedb (if used) until the new config engine has been implemented
|
||||||
# TODO: remove the type ignore annotations below once the new config engine has been refactored
|
# TODO: remove the type ignore annotations below once the new config engine has been refactored
|
||||||
vector_store_type = config.embeddings.vector_store.get("type") # type: ignore
|
vector_store_type = config.embeddings.vector_store.get("type") # type: ignore
|
||||||
vector_store_args = config.embeddings.vector_store
|
vector_store_args = config.embeddings.vector_store
|
||||||
if vector_store_type == "lancedb":
|
if vector_store_type == VectorStoreType.LanceDB and not backwards_compatible:
|
||||||
db_uri = config.embeddings.vector_store["db_uri"] # type: ignore
|
db_uri = config.embeddings.vector_store["db_uri"] # type: ignore
|
||||||
lancedb_dir = Path(config.root_dir).resolve() / db_uri
|
lancedb_dir = Path(config.root_dir).resolve() / db_uri
|
||||||
vector_store_args["db_uri"] = str(lancedb_dir) # type: ignore
|
vector_store_args["db_uri"] = str(lancedb_dir) # type: ignore
|
||||||
|
|
||||||
reporter.info(f"Vector Store Args: {redact(vector_store_args)}") # type: ignore
|
reporter.info(f"Vector Store Args: {redact(vector_store_args)}") # type: ignore
|
||||||
description_embedding_store = _get_embedding_description_store(
|
if not backwards_compatible: # can remove this check and always set the description_embedding_store before v1 release
|
||||||
config_args=vector_store_args, # type: ignore
|
description_embedding_store = _get_embedding_description_store(
|
||||||
)
|
config_args=vector_store_args, # type: ignore
|
||||||
|
)
|
||||||
|
|
||||||
_entities = read_indexer_entities(nodes, entities, community_level)
|
_entities = read_indexer_entities(nodes, entities, community_level)
|
||||||
_covariates = read_indexer_covariates(covariates) if covariates is not None else []
|
_covariates = read_indexer_covariates(covariates) if covariates is not None else []
|
||||||
@ -205,7 +243,7 @@ async def local_search(
|
|||||||
entities=_entities,
|
entities=_entities,
|
||||||
relationships=read_indexer_relationships(relationships),
|
relationships=read_indexer_relationships(relationships),
|
||||||
covariates={"claims": _covariates},
|
covariates={"claims": _covariates},
|
||||||
description_embedding_store=description_embedding_store,
|
description_embedding_store=description_embedding_store, # type: ignore
|
||||||
response_type=response_type,
|
response_type=response_type,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -251,18 +289,56 @@ async def local_search_streaming(
|
|||||||
------
|
------
|
||||||
TODO: Document any exceptions to expect.
|
TODO: Document any exceptions to expect.
|
||||||
"""
|
"""
|
||||||
|
#################################### BEGIN PATCH ####################################
|
||||||
|
# TODO: remove the following patch that checks for a vector_store prior to v1 release
|
||||||
|
# TODO: this is a backwards compatibility patch that injects the default vector_store settings into the config if it is not present
|
||||||
|
# Only applicable in situations involving a local vector_store (lancedb). The general idea:
|
||||||
|
# if vector_store not in config:
|
||||||
|
# 1. assume user is running local if vector_store is not in config
|
||||||
|
# 2. insert default vector_store in config
|
||||||
|
# 3 .create lancedb vector_store instance
|
||||||
|
# 4. upload vector embeddings from the input dataframes to the vector_store
|
||||||
|
backwards_compatible = False
|
||||||
|
if not config.embeddings.vector_store:
|
||||||
|
backwards_compatible = True
|
||||||
|
from graphrag.query.input.loaders.dfs import store_entity_semantic_embeddings
|
||||||
|
from graphrag.vector_stores.lancedb import LanceDBVectorStore
|
||||||
|
|
||||||
|
config.embeddings.vector_store = {
|
||||||
|
"type": "lancedb",
|
||||||
|
"db_uri": f"{Path(config.storage.base_dir)}/lancedb",
|
||||||
|
"collection_name": "entity_description_embeddings",
|
||||||
|
"overwrite": True,
|
||||||
|
}
|
||||||
|
_entities = read_indexer_entities(nodes, entities, community_level)
|
||||||
|
description_embedding_store = LanceDBVectorStore(
|
||||||
|
db_uri=config.embeddings.vector_store["db_uri"],
|
||||||
|
collection_name=config.embeddings.vector_store["collection_name"],
|
||||||
|
overwrite=config.embeddings.vector_store["overwrite"],
|
||||||
|
)
|
||||||
|
description_embedding_store.connect(
|
||||||
|
db_uri=config.embeddings.vector_store["db_uri"]
|
||||||
|
)
|
||||||
|
# dump embeddings from the entities list to the description_embedding_store
|
||||||
|
store_entity_semantic_embeddings(
|
||||||
|
entities=_entities, vectorstore=description_embedding_store
|
||||||
|
)
|
||||||
|
#################################### END PATCH ####################################
|
||||||
|
|
||||||
# TODO: must update filepath of lancedb (if used) until the new config engine has been implemented
|
# TODO: must update filepath of lancedb (if used) until the new config engine has been implemented
|
||||||
# TODO: remove the type ignore annotations below once the new config engine has been refactored
|
# TODO: remove the type ignore annotations below once the new config engine has been refactored
|
||||||
vector_store_type = config.embeddings.vector_store["type"] # type: ignore
|
vector_store_type = config.embeddings.vector_store.get("type") # type: ignore
|
||||||
vector_store_args = config.embeddings.vector_store
|
vector_store_args = config.embeddings.vector_store
|
||||||
if vector_store_type == VectorStoreType.LanceDB:
|
if vector_store_type == VectorStoreType.LanceDB and not backwards_compatible:
|
||||||
db_uri = config.embeddings.vector_store["db_uri"] # type: ignore
|
db_uri = config.embeddings.vector_store["db_uri"] # type: ignore
|
||||||
lancedb_dir = Path(config.root_dir).resolve() / db_uri
|
lancedb_dir = Path(config.root_dir).resolve() / db_uri
|
||||||
vector_store_args["db_uri"] = str(lancedb_dir) # type: ignore
|
vector_store_args["db_uri"] = str(lancedb_dir) # type: ignore
|
||||||
|
|
||||||
reporter.info(f"Vector Store Args: {redact(vector_store_args)}") # type: ignore
|
reporter.info(f"Vector Store Args: {redact(vector_store_args)}") # type: ignore
|
||||||
description_embedding_store = _get_embedding_description_store(
|
if not backwards_compatible: # can remove this check and always set the description_embedding_store before v1 release
|
||||||
config_args=vector_store_args, # type: ignore
|
description_embedding_store = _get_embedding_description_store(
|
||||||
)
|
config_args=vector_store_args, # type: ignore
|
||||||
|
)
|
||||||
|
|
||||||
_entities = read_indexer_entities(nodes, entities, community_level)
|
_entities = read_indexer_entities(nodes, entities, community_level)
|
||||||
_covariates = read_indexer_covariates(covariates) if covariates is not None else []
|
_covariates = read_indexer_covariates(covariates) if covariates is not None else []
|
||||||
@ -274,7 +350,7 @@ async def local_search_streaming(
|
|||||||
entities=_entities,
|
entities=_entities,
|
||||||
relationships=read_indexer_relationships(relationships),
|
relationships=read_indexer_relationships(relationships),
|
||||||
covariates={"claims": _covariates},
|
covariates={"claims": _covariates},
|
||||||
description_embedding_store=description_embedding_store,
|
description_embedding_store=description_embedding_store, # type: ignore
|
||||||
response_type=response_type,
|
response_type=response_type,
|
||||||
)
|
)
|
||||||
search_result = search_engine.astream_search(query=query)
|
search_result = search_engine.astream_search(query=query)
|
||||||
|
|||||||
4
tests/fixtures/min-csv/settings.yml
vendored
4
tests/fixtures/min-csv/settings.yml
vendored
@ -6,12 +6,11 @@ embeddings:
|
|||||||
type: "lancedb"
|
type: "lancedb"
|
||||||
db_uri: "./tests/fixtures/min-csv/lancedb"
|
db_uri: "./tests/fixtures/min-csv/lancedb"
|
||||||
collection_name: "lancedb_ci"
|
collection_name: "lancedb_ci"
|
||||||
|
overwrite: True
|
||||||
store_in_table: True
|
store_in_table: True
|
||||||
|
|
||||||
entity_name_description:
|
entity_name_description:
|
||||||
title_column: "name"
|
title_column: "name"
|
||||||
# id_column: "id"
|
# id_column: "id"
|
||||||
# overwrite: true
|
|
||||||
# entity_name: ...
|
# entity_name: ...
|
||||||
# relationship_description: ...
|
# relationship_description: ...
|
||||||
# community_report_full_content: ...
|
# community_report_full_content: ...
|
||||||
@ -20,7 +19,6 @@ embeddings:
|
|||||||
# document_raw_content: ...
|
# document_raw_content: ...
|
||||||
# text_unit_text: ...
|
# text_unit_text: ...
|
||||||
|
|
||||||
|
|
||||||
storage:
|
storage:
|
||||||
type: file # or blob
|
type: file # or blob
|
||||||
base_dir: "output/${timestamp}/artifacts"
|
base_dir: "output/${timestamp}/artifacts"
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user