graphrag/tests/integration/storage/test_factory.py
Derek Worthen c644338bae
Refactor config (#1593)
* Refactor config

- Add new ModelConfig to represent LLM settings
    - Combines LLMParameters, ParallelizationParameters, encoding_model, and async_mode
- Add top level models config that is a list of available LLM ModelConfigs
- Remove LLMConfig inheritance and delete LLMConfig
    - Replace the inheritance with a model_id reference to the ModelConfig listed in the top level models config
- Remove all fallbacks and hydration logic from create_graphrag_config
    - This removes the automatic env variable overrides
- Support env variables within config files using Templating
    - This requires "$" to be escaped with extra "$" so ".*\\.txt$" becomes ".*\\.txt$$"
- Update init content to initialize new config file with the ModelConfig structure

* Use dict of ModelConfig instead of list

* Add model validations and unit tests

* Fix ruff checks

* Add semversioner change

* Fix unit tests

* validate root_dir in pydantic model

* Rename ModelConfig to LanguageModelConfig

* Rename ModelConfigMissingError to LanguageModelConfigMissingError

* Add validationg for unexpected API keys

* Allow skipping pydantic validation for testing/mocking purposes.

* Add default lm configs to verb tests

* smoke test

* remove config from flows to fix llm arg mapping

* Fix embedding llm arg mapping

* Remove timestamp from smoke test outputs

* Remove unused "subworkflows" smoke test properties

* Add models to smoke test configs

* Update smoke test output path

* Send logs to logs folder

* Fix output path

* Fix csv test file pattern

* Update placeholder

* Format

* Instantiate default model configs

* Fix unit tests for config defaults

* Fix migration notebook

* Remove create_pipeline_config

* Remove several unused config models

* Remove indexing embedding and input configs

* Move embeddings function to config

* Remove skip_workflows

* Remove skip embeddings in favor of explicit naming

* fix unit test spelling mistake

* self.models[model_id] is already a language model. Remove redundant casting.

* update validation errors to instruct users to rerun graphrag init

* instantiate LanguageModelConfigs with validation

* skip validation in unit tests

* update verb tests to use default model settings instead of skipping validation

* test using llm settings

* cleanup verb tests

* remove unsafe default model config

* remove the ability to skip pydantic validation

* remove None union types when default values are set

* move vector_store from embeddings to top level of config and delete resolve_paths

* update vector store settings

* fix vector store and smoke tests

* fix serializing vector_store settings

* fix vector_store usage

* fix vector_store type

* support cli overrides for loading graphrag config

* rename storage to output

* Add --force flag to init

* Remove run_id and resume, fix Drift config assignment

* Ruff

---------

Co-authored-by: Nathan Evans <github@talkswithnumbers.com>
Co-authored-by: Alonso Guevara <alonsog@microsoft.com>
2025-01-21 17:52:06 -06:00

76 lines
2.8 KiB
Python

# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License
"""StorageFactory Tests.
These tests will test the StorageFactory class and the creation of each storage type that is natively supported.
"""
import sys
import pytest
from graphrag.config.enums import OutputType
from graphrag.storage.blob_pipeline_storage import BlobPipelineStorage
from graphrag.storage.cosmosdb_pipeline_storage import CosmosDBPipelineStorage
from graphrag.storage.factory import StorageFactory
from graphrag.storage.file_pipeline_storage import FilePipelineStorage
from graphrag.storage.memory_pipeline_storage import MemoryPipelineStorage
# cspell:disable-next-line well-known-key
WELL_KNOWN_BLOB_STORAGE_KEY = "DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;"
# cspell:disable-next-line well-known-key
WELL_KNOWN_COSMOS_CONNECTION_STRING = "AccountEndpoint=https://127.0.0.1:8081/;AccountKey=C2y6yDjf5/R+ob0N8A7Cgv30VRDJIWEHLM+4QDU5DE2nQ9nDuVTqobD4b8mGGyPMbIZnqyMsEcaGQy67XIw/Jw=="
def test_create_blob_storage():
kwargs = {
"type": "blob",
"connection_string": WELL_KNOWN_BLOB_STORAGE_KEY,
"base_dir": "testbasedir",
"container_name": "testcontainer",
}
storage = StorageFactory.create_storage(OutputType.blob, kwargs)
assert isinstance(storage, BlobPipelineStorage)
@pytest.mark.skipif(
not sys.platform.startswith("win"),
reason="cosmosdb emulator is only available on windows runners at this time",
)
def test_create_cosmosdb_storage():
kwargs = {
"type": "cosmosdb",
"connection_string": WELL_KNOWN_COSMOS_CONNECTION_STRING,
"base_dir": "testdatabase",
"container_name": "testcontainer",
}
storage = StorageFactory.create_storage(OutputType.cosmosdb, kwargs)
assert isinstance(storage, CosmosDBPipelineStorage)
def test_create_file_storage():
kwargs = {"type": "file", "base_dir": "/tmp/teststorage"}
storage = StorageFactory.create_storage(OutputType.file, kwargs)
assert isinstance(storage, FilePipelineStorage)
def test_create_memory_storage():
kwargs = {"type": "memory"}
storage = StorageFactory.create_storage(OutputType.memory, kwargs)
assert isinstance(storage, MemoryPipelineStorage)
def test_register_and_create_custom_storage():
class CustomStorage:
def __init__(self, **kwargs):
pass
StorageFactory.register("custom", CustomStorage)
storage = StorageFactory.create_storage("custom", {})
assert isinstance(storage, CustomStorage)
def test_create_unknown_storage():
with pytest.raises(ValueError, match="Unknown storage type: unknown"):
StorageFactory.create_storage("unknown", {})