mirror of
https://github.com/microsoft/graphrag.git
synced 2025-07-26 18:31:46 +00:00

* added cosmosdb constructor and database methods * added rest of abstract method headers * added cosmos db container methods * implemented has and delete methods * finished implementing abstract class methods * integrated class into storage factory * integrated cosmosdb class into cache factory * added support for new config file fields * replaced primary key cosmosdb initialization with connection strings * modified cosmosdb setter to require json * Fix non-default emitters * Format * Ruff * ruff * first successful run of cosmosdb indexing * removed extraneous container_name setting * require base_dir to be typed as str * reverted merged changed from closed branch * removed nested try statement * readded initial non-parquet emitter fix * added basic support for parquet emitter using internal conversions * merged with main and resolved conflicts * fixed more merge conflicts * added cosmosdb functionality to query pipeline * tested query for cosmosdb * collapsed cosmosdb schema to use minimal containers and databases * simplified create_database and create_container functions * ruff fixes and semversioner * spellcheck and ci fixes * updated pyproject toml and lock file * apply fixes after merge from main * add temporary comments * refactor cache factory * refactored storage factory * minor formatting * update dictionary * fix spellcheck typo * fix default value * fix pydantic model defaults * update pydantic models * fix init_content * cleanup how factory passes parameters to file storage * remove unnecessary output file type * update pydantic model * cleanup code * implemented clear method * fix merge from main * add test stub for cosmosdb * regenerate lock file * modified set method to collapse parquet rows * modified get method to collapse parquet rows * updated has and delete methods and docstrings to adhere to new schema * added prefix helper function * replaced delimiter for prefixed id * verified empty tests are passing * fix merges from main * add find test * update cicd step name * tested querying for new schema * resolved errors from merge conflicts * refactored set method to handle cache in new schema * refactored get method to handle cache in new schema * force unique ids to be written to cosmos for nodes * found bug with has and delete methods * modified has and delete to work with cache in new schema * fix the merge from main * minor typo fixes * update lock file * spellcheck fix * fix init function signature * minor formatting updates * remove https protocol * change localhost to 127.0.0.1 address * update pytest to use bacj engine * verified cache tests * improved speed of has function * resolved pytest error with find function * added test for child method * make container_name variable private as _container_name * minor variable name fix * cleanup cosmos pytest and make the cosmosdb storage class operations more efficient * update cicd to use different cosmosdb emulator * test with http protocol * added pytest for clear() * add longer timeout for cosmosdb emulator startup * revert http connection back to https * add comments to cicd code for future dev usage * set to container and database clients to none upon deletion * ruff changes * add comments to cicd code * removed unneeded None statements and ruff fixes * more ruff fixes * Update test_run.py * remove unnecessary call to delete container * ruff format updates * Reverted test_run.py * fix ruff formatter errors * cleanup variable names to be more consistent * remove extra semversioner file * revert pydantic model changes * revert pydantic model change * revert pydantic model change * re-enable inline formatting rule * update documentation in dev guide --------- Co-authored-by: Alonso Guevara <alonsog@microsoft.com> Co-authored-by: Josh Bradley <joshbradley@microsoft.com>
75 lines
2.3 KiB
Python
75 lines
2.3 KiB
Python
# Copyright (c) 2024 Microsoft Corporation.
|
|
# Licensed under the MIT License
|
|
import asyncio
|
|
import os
|
|
import unittest
|
|
|
|
from graphrag.cache.json_pipeline_cache import JsonPipelineCache
|
|
from graphrag.storage.file_pipeline_storage import (
|
|
FilePipelineStorage,
|
|
)
|
|
|
|
TEMP_DIR = "./.tmp"
|
|
|
|
|
|
def create_cache():
|
|
storage = FilePipelineStorage(root_dir=os.path.join(os.getcwd(), ".tmp"))
|
|
return JsonPipelineCache(storage)
|
|
|
|
|
|
class TestFilePipelineCache(unittest.IsolatedAsyncioTestCase):
|
|
def setUp(self):
|
|
self.cache = create_cache()
|
|
|
|
def tearDown(self):
|
|
asyncio.run(self.cache.clear())
|
|
|
|
async def test_cache_clear(self):
|
|
# Create a cache directory
|
|
if not os.path.exists(TEMP_DIR):
|
|
os.mkdir(TEMP_DIR)
|
|
with open(f"{TEMP_DIR}/test1", "w") as f:
|
|
f.write("This is test1 file.")
|
|
with open(f"{TEMP_DIR}/test2", "w") as f:
|
|
f.write("This is test2 file.")
|
|
|
|
# this invokes cache.clear()
|
|
await self.cache.clear()
|
|
|
|
# Check if the cache directory is empty
|
|
files = os.listdir(TEMP_DIR)
|
|
assert len(files) == 0
|
|
|
|
async def test_child_cache(self):
|
|
await self.cache.set("test1", "test1")
|
|
assert os.path.exists(f"{TEMP_DIR}/test1")
|
|
|
|
child = self.cache.child("test")
|
|
assert os.path.exists(f"{TEMP_DIR}/test")
|
|
|
|
await child.set("test2", "test2")
|
|
assert os.path.exists(f"{TEMP_DIR}/test/test2")
|
|
|
|
await self.cache.set("test1", "test1")
|
|
await self.cache.delete("test1")
|
|
assert not os.path.exists(f"{TEMP_DIR}/test1")
|
|
|
|
async def test_cache_has(self):
|
|
test1 = "this is a test file"
|
|
await self.cache.set("test1", test1)
|
|
|
|
assert await self.cache.has("test1")
|
|
assert not await self.cache.has("NON_EXISTENT")
|
|
assert await self.cache.get("NON_EXISTENT") is None
|
|
|
|
async def test_get_set(self):
|
|
test1 = "this is a test file"
|
|
test2 = "\\n test"
|
|
test3 = "\\\\\\"
|
|
await self.cache.set("test1", test1)
|
|
await self.cache.set("test2", test2)
|
|
await self.cache.set("test3", test3)
|
|
assert await self.cache.get("test1") == test1
|
|
assert await self.cache.get("test2") == test2
|
|
assert await self.cache.get("test3") == test3
|