mirror of
https://github.com/microsoft/graphrag.git
synced 2025-08-17 21:22:00 +00:00
Add stricter filtering and tests for cli data directory discovery (#910)
* Add stricter filtering and tests for cli data directory discovery * Semver * Ignore ruff on error type * Format * Fix for windows paths * Fix for windows paths * Uncomment blob tests * Sort by timestamp name instead of modified date * Format * Add additional folder name test
This commit is contained in:
parent
d68e323193
commit
ac504e31a0
@ -0,0 +1,4 @@
|
|||||||
|
{
|
||||||
|
"type": "patch",
|
||||||
|
"description": "Improves filtering for data dir inferring"
|
||||||
|
}
|
@ -4,7 +4,7 @@
|
|||||||
"""Command line interface for the query module."""
|
"""Command line interface for the query module."""
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import os
|
import re
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import cast
|
from typing import cast
|
||||||
|
|
||||||
@ -129,7 +129,9 @@ def _infer_data_dir(root: str) -> str:
|
|||||||
output = Path(root) / "output"
|
output = Path(root) / "output"
|
||||||
# use the latest data-run folder
|
# use the latest data-run folder
|
||||||
if output.exists():
|
if output.exists():
|
||||||
folders = sorted(output.iterdir(), key=os.path.getmtime, reverse=True)
|
expr = re.compile(r"\d{8}-\d{6}")
|
||||||
|
filtered = [f for f in output.iterdir() if f.is_dir() and expr.match(f.name)]
|
||||||
|
folders = sorted(filtered, key=lambda f: f.name, reverse=True)
|
||||||
if len(folders) > 0:
|
if len(folders) > 0:
|
||||||
folder = folders[0]
|
folder = folders[0]
|
||||||
return str((folder / "artifacts").absolute())
|
return str((folder / "artifacts").absolute())
|
||||||
|
2
tests/unit/query/__init__.py
Normal file
2
tests/unit/query/__init__.py
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
# Copyright (c) 2024 Microsoft Corporation.
|
||||||
|
# Licensed under the MIT License
|
0
tests/unit/query/data/hidden/output/.hidden
Normal file
0
tests/unit/query/data/hidden/output/.hidden
Normal file
32
tests/unit/query/test_infer_data_dir.py
Normal file
32
tests/unit/query/test_infer_data_dir.py
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
# Copyright (c) 2024 Microsoft Corporation.
|
||||||
|
# Licensed under the MIT License
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from graphrag.query.cli import _infer_data_dir
|
||||||
|
|
||||||
|
|
||||||
|
def test_infer_data_dir():
|
||||||
|
root = "./tests/unit/query/data/defaults"
|
||||||
|
result = Path(_infer_data_dir(root))
|
||||||
|
assert result.parts[-2] == "20240812-121000"
|
||||||
|
|
||||||
|
|
||||||
|
def test_infer_data_dir_ignores_hidden_files():
|
||||||
|
"""A hidden file, starting with '.', will naturally be selected as latest data directory."""
|
||||||
|
root = "./tests/unit/query/data/hidden"
|
||||||
|
result = Path(_infer_data_dir(root))
|
||||||
|
assert result.parts[-2] == "20240812-121000"
|
||||||
|
|
||||||
|
|
||||||
|
def test_infer_data_dir_ignores_non_numeric():
|
||||||
|
root = "./tests/unit/query/data/non-numeric"
|
||||||
|
result = Path(_infer_data_dir(root))
|
||||||
|
assert result.parts[-2] == "20240812-121000"
|
||||||
|
|
||||||
|
|
||||||
|
def test_infer_data_dir_throws_on_no_match():
|
||||||
|
root = "./tests/unit/query/data/empty"
|
||||||
|
with pytest.raises(ValueError): # noqa PT011 (this is what is actually thrown...)
|
||||||
|
_infer_data_dir(root)
|
Loading…
x
Reference in New Issue
Block a user