diff --git a/.semversioner/next-release/patch-20240812232353903544.json b/.semversioner/next-release/patch-20240812232353903544.json new file mode 100644 index 00000000..733b4062 --- /dev/null +++ b/.semversioner/next-release/patch-20240812232353903544.json @@ -0,0 +1,4 @@ +{ + "type": "patch", + "description": "Improves filtering for data dir inferring" +} diff --git a/graphrag/query/cli.py b/graphrag/query/cli.py index 16cfe0c9..915807a3 100644 --- a/graphrag/query/cli.py +++ b/graphrag/query/cli.py @@ -4,7 +4,7 @@ """Command line interface for the query module.""" import asyncio -import os +import re from pathlib import Path from typing import cast @@ -129,7 +129,9 @@ def _infer_data_dir(root: str) -> str: output = Path(root) / "output" # use the latest data-run folder if output.exists(): - folders = sorted(output.iterdir(), key=os.path.getmtime, reverse=True) + expr = re.compile(r"\d{8}-\d{6}") + filtered = [f for f in output.iterdir() if f.is_dir() and expr.match(f.name)] + folders = sorted(filtered, key=lambda f: f.name, reverse=True) if len(folders) > 0: folder = folders[0] return str((folder / "artifacts").absolute()) diff --git a/tests/unit/query/__init__.py b/tests/unit/query/__init__.py new file mode 100644 index 00000000..0a3e38ad --- /dev/null +++ b/tests/unit/query/__init__.py @@ -0,0 +1,2 @@ +# Copyright (c) 2024 Microsoft Corporation. +# Licensed under the MIT License diff --git a/tests/unit/query/data/defaults/output/20240812-120000/empty.txt b/tests/unit/query/data/defaults/output/20240812-120000/empty.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/unit/query/data/defaults/output/20240812-121000/empty.txt b/tests/unit/query/data/defaults/output/20240812-121000/empty.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/unit/query/data/empty/something-else/empty.txt b/tests/unit/query/data/empty/something-else/empty.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/unit/query/data/hidden/output/.another/empty.txt b/tests/unit/query/data/hidden/output/.another/empty.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/unit/query/data/hidden/output/.hidden b/tests/unit/query/data/hidden/output/.hidden new file mode 100644 index 00000000..e69de29b diff --git a/tests/unit/query/data/hidden/output/20240812-120000/empty.txt b/tests/unit/query/data/hidden/output/20240812-120000/empty.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/unit/query/data/hidden/output/20240812-121000/empty.txt b/tests/unit/query/data/hidden/output/20240812-121000/empty.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/unit/query/data/non-numeric/output/20240812-120000/empty.txt b/tests/unit/query/data/non-numeric/output/20240812-120000/empty.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/unit/query/data/non-numeric/output/20240812-121000/empty.txt b/tests/unit/query/data/non-numeric/output/20240812-121000/empty.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/unit/query/data/non-numeric/output/something-else/empty.txt b/tests/unit/query/data/non-numeric/output/something-else/empty.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/unit/query/test_infer_data_dir.py b/tests/unit/query/test_infer_data_dir.py new file mode 100644 index 00000000..c950eb35 --- /dev/null +++ b/tests/unit/query/test_infer_data_dir.py @@ -0,0 +1,32 @@ +# Copyright (c) 2024 Microsoft Corporation. +# Licensed under the MIT License +from pathlib import Path + +import pytest + +from graphrag.query.cli import _infer_data_dir + + +def test_infer_data_dir(): + root = "./tests/unit/query/data/defaults" + result = Path(_infer_data_dir(root)) + assert result.parts[-2] == "20240812-121000" + + +def test_infer_data_dir_ignores_hidden_files(): + """A hidden file, starting with '.', will naturally be selected as latest data directory.""" + root = "./tests/unit/query/data/hidden" + result = Path(_infer_data_dir(root)) + assert result.parts[-2] == "20240812-121000" + + +def test_infer_data_dir_ignores_non_numeric(): + root = "./tests/unit/query/data/non-numeric" + result = Path(_infer_data_dir(root)) + assert result.parts[-2] == "20240812-121000" + + +def test_infer_data_dir_throws_on_no_match(): + root = "./tests/unit/query/data/empty" + with pytest.raises(ValueError): # noqa PT011 (this is what is actually thrown...) + _infer_data_dir(root)