mirror of
https://github.com/microsoft/graphrag.git
synced 2025-06-26 23:19:58 +00:00
Feat/update cli (#1376)
* Add update cli option with default storage * Semver * Semver * Pyright * Format
This commit is contained in:
parent
baa261c8e9
commit
20c120288b
@ -0,0 +1,4 @@
|
||||
{
|
||||
"type": "patch",
|
||||
"description": "Add update cli entrypoint for incremental indexing"
|
||||
}
|
1
.vscode/settings.json
vendored
1
.vscode/settings.json
vendored
@ -3,6 +3,7 @@
|
||||
"**/.yarn": true,
|
||||
"**/.pnp.*": true
|
||||
},
|
||||
"editor.formatOnSave": false,
|
||||
"eslint.nodePath": ".yarn/sdks",
|
||||
"typescript.tsdk": ".yarn/sdks/typescript/lib",
|
||||
"typescript.enablePromptUseWorkspaceTsdk": true,
|
||||
|
@ -79,11 +79,76 @@ def index_cli(
|
||||
output_dir: Path | None,
|
||||
):
|
||||
"""Run the pipeline with the given config."""
|
||||
config = load_config(root_dir, config_filepath)
|
||||
|
||||
_run_index(
|
||||
config=config,
|
||||
verbose=verbose,
|
||||
resume=resume,
|
||||
memprofile=memprofile,
|
||||
cache=cache,
|
||||
reporter=reporter,
|
||||
emit=emit,
|
||||
dry_run=dry_run,
|
||||
skip_validation=skip_validation,
|
||||
output_dir=output_dir,
|
||||
)
|
||||
|
||||
|
||||
def update_cli(
|
||||
root_dir: Path,
|
||||
verbose: bool,
|
||||
memprofile: bool,
|
||||
cache: bool,
|
||||
reporter: ReporterType,
|
||||
config_filepath: Path | None,
|
||||
emit: list[TableEmitterType],
|
||||
skip_validation: bool,
|
||||
output_dir: Path | None,
|
||||
):
|
||||
"""Run the pipeline with the given config."""
|
||||
config = load_config(root_dir, config_filepath)
|
||||
|
||||
# Check if update storage exist, if not configure it with default values
|
||||
if not config.update_index_storage:
|
||||
from graphrag.config.defaults import STORAGE_TYPE, UPDATE_STORAGE_BASE_DIR
|
||||
from graphrag.config.models.storage_config import StorageConfig
|
||||
|
||||
config.update_index_storage = StorageConfig(
|
||||
type=STORAGE_TYPE,
|
||||
base_dir=UPDATE_STORAGE_BASE_DIR,
|
||||
)
|
||||
|
||||
_run_index(
|
||||
config=config,
|
||||
verbose=verbose,
|
||||
resume=False,
|
||||
memprofile=memprofile,
|
||||
cache=cache,
|
||||
reporter=reporter,
|
||||
emit=emit,
|
||||
dry_run=False,
|
||||
skip_validation=skip_validation,
|
||||
output_dir=output_dir,
|
||||
)
|
||||
|
||||
|
||||
def _run_index(
|
||||
config,
|
||||
verbose,
|
||||
resume,
|
||||
memprofile,
|
||||
cache,
|
||||
reporter,
|
||||
emit,
|
||||
dry_run,
|
||||
skip_validation,
|
||||
output_dir,
|
||||
):
|
||||
progress_reporter = create_progress_reporter(reporter)
|
||||
info, error, success = _logger(progress_reporter)
|
||||
run_id = resume or time.strftime("%Y%m%d-%H%M%S")
|
||||
|
||||
config = load_config(root_dir, config_filepath)
|
||||
config.storage.base_dir = str(output_dir) if output_dir else config.storage.base_dir
|
||||
config.reporting.base_dir = (
|
||||
str(output_dir) if output_dir else config.reporting.base_dir
|
||||
|
@ -16,7 +16,7 @@ from graphrag.logging import ReporterType
|
||||
from graphrag.prompt_tune.generator import MAX_TOKEN_COUNT
|
||||
from graphrag.prompt_tune.loader import MIN_CHUNK_SIZE
|
||||
|
||||
from .index import index_cli
|
||||
from .index import index_cli, update_cli
|
||||
from .initialize import initialize_project_at
|
||||
from .prompt_tune import prompt_tune
|
||||
from .query import run_drift_search, run_global_search, run_local_search
|
||||
@ -129,6 +129,71 @@ def _index_cli(
|
||||
)
|
||||
|
||||
|
||||
@app.command("update")
|
||||
def _update_cli(
|
||||
config: Annotated[
|
||||
Path | None,
|
||||
typer.Option(
|
||||
help="The configuration to use.", exists=True, file_okay=True, readable=True
|
||||
),
|
||||
] = None,
|
||||
root: Annotated[
|
||||
Path,
|
||||
typer.Option(
|
||||
help="The project root directory.",
|
||||
exists=True,
|
||||
dir_okay=True,
|
||||
writable=True,
|
||||
resolve_path=True,
|
||||
),
|
||||
] = Path(), # set default to current directory
|
||||
verbose: Annotated[
|
||||
bool, typer.Option(help="Run the indexing pipeline with verbose logging")
|
||||
] = False,
|
||||
memprofile: Annotated[
|
||||
bool, typer.Option(help="Run the indexing pipeline with memory profiling")
|
||||
] = False,
|
||||
reporter: Annotated[
|
||||
ReporterType, typer.Option(help="The progress reporter to use.")
|
||||
] = ReporterType.RICH,
|
||||
emit: Annotated[
|
||||
str, typer.Option(help="The data formats to emit, comma-separated.")
|
||||
] = TableEmitterType.Parquet.value,
|
||||
cache: Annotated[bool, typer.Option(help="Use LLM cache.")] = True,
|
||||
skip_validation: Annotated[
|
||||
bool,
|
||||
typer.Option(
|
||||
help="Skip any preflight validation. Useful when running no LLM steps."
|
||||
),
|
||||
] = False,
|
||||
output: Annotated[
|
||||
Path | None,
|
||||
typer.Option(
|
||||
help="Indexing pipeline output directory. Overrides storage.base_dir in the configuration file.",
|
||||
dir_okay=True,
|
||||
writable=True,
|
||||
resolve_path=True,
|
||||
),
|
||||
] = None,
|
||||
):
|
||||
"""
|
||||
Update an existing knowledge graph index.
|
||||
|
||||
Applies a default storage configuration (if not provided by config), saving the new index to the local file system in the `update_output` folder.
|
||||
"""
|
||||
update_cli(
|
||||
root_dir=root,
|
||||
verbose=verbose,
|
||||
memprofile=memprofile,
|
||||
cache=cache,
|
||||
reporter=ReporterType(reporter),
|
||||
config_filepath=config,
|
||||
emit=[TableEmitterType(value.strip()) for value in emit.split(",")],
|
||||
skip_validation=skip_validation,
|
||||
output_dir=output,
|
||||
)
|
||||
|
||||
|
||||
@app.command("prompt-tune")
|
||||
def _prompt_tune_cli(
|
||||
root: Annotated[
|
||||
|
@ -139,6 +139,7 @@ test_smoke = "pytest ./tests/smoke"
|
||||
test_notebook = "pytest ./tests/notebook"
|
||||
test_verbs = "pytest ./tests/verbs"
|
||||
index = "python -m graphrag index"
|
||||
update = "python -m graphrag update"
|
||||
init = "python -m graphrag init"
|
||||
query = "python -m graphrag query"
|
||||
prompt_tune = "python -m graphrag prompt-tune"
|
||||
|
Loading…
x
Reference in New Issue
Block a user