mirror of
https://github.com/microsoft/graphrag.git
synced 2025-06-26 23:19:58 +00:00
Feat/update cli (#1376)
* Add update cli option with default storage * Semver * Semver * Pyright * Format
This commit is contained in:
parent
baa261c8e9
commit
20c120288b
@ -0,0 +1,4 @@
|
|||||||
|
{
|
||||||
|
"type": "patch",
|
||||||
|
"description": "Add update cli entrypoint for incremental indexing"
|
||||||
|
}
|
1
.vscode/settings.json
vendored
1
.vscode/settings.json
vendored
@ -3,6 +3,7 @@
|
|||||||
"**/.yarn": true,
|
"**/.yarn": true,
|
||||||
"**/.pnp.*": true
|
"**/.pnp.*": true
|
||||||
},
|
},
|
||||||
|
"editor.formatOnSave": false,
|
||||||
"eslint.nodePath": ".yarn/sdks",
|
"eslint.nodePath": ".yarn/sdks",
|
||||||
"typescript.tsdk": ".yarn/sdks/typescript/lib",
|
"typescript.tsdk": ".yarn/sdks/typescript/lib",
|
||||||
"typescript.enablePromptUseWorkspaceTsdk": true,
|
"typescript.enablePromptUseWorkspaceTsdk": true,
|
||||||
|
@ -79,11 +79,76 @@ def index_cli(
|
|||||||
output_dir: Path | None,
|
output_dir: Path | None,
|
||||||
):
|
):
|
||||||
"""Run the pipeline with the given config."""
|
"""Run the pipeline with the given config."""
|
||||||
|
config = load_config(root_dir, config_filepath)
|
||||||
|
|
||||||
|
_run_index(
|
||||||
|
config=config,
|
||||||
|
verbose=verbose,
|
||||||
|
resume=resume,
|
||||||
|
memprofile=memprofile,
|
||||||
|
cache=cache,
|
||||||
|
reporter=reporter,
|
||||||
|
emit=emit,
|
||||||
|
dry_run=dry_run,
|
||||||
|
skip_validation=skip_validation,
|
||||||
|
output_dir=output_dir,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def update_cli(
|
||||||
|
root_dir: Path,
|
||||||
|
verbose: bool,
|
||||||
|
memprofile: bool,
|
||||||
|
cache: bool,
|
||||||
|
reporter: ReporterType,
|
||||||
|
config_filepath: Path | None,
|
||||||
|
emit: list[TableEmitterType],
|
||||||
|
skip_validation: bool,
|
||||||
|
output_dir: Path | None,
|
||||||
|
):
|
||||||
|
"""Run the pipeline with the given config."""
|
||||||
|
config = load_config(root_dir, config_filepath)
|
||||||
|
|
||||||
|
# Check if update storage exist, if not configure it with default values
|
||||||
|
if not config.update_index_storage:
|
||||||
|
from graphrag.config.defaults import STORAGE_TYPE, UPDATE_STORAGE_BASE_DIR
|
||||||
|
from graphrag.config.models.storage_config import StorageConfig
|
||||||
|
|
||||||
|
config.update_index_storage = StorageConfig(
|
||||||
|
type=STORAGE_TYPE,
|
||||||
|
base_dir=UPDATE_STORAGE_BASE_DIR,
|
||||||
|
)
|
||||||
|
|
||||||
|
_run_index(
|
||||||
|
config=config,
|
||||||
|
verbose=verbose,
|
||||||
|
resume=False,
|
||||||
|
memprofile=memprofile,
|
||||||
|
cache=cache,
|
||||||
|
reporter=reporter,
|
||||||
|
emit=emit,
|
||||||
|
dry_run=False,
|
||||||
|
skip_validation=skip_validation,
|
||||||
|
output_dir=output_dir,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _run_index(
|
||||||
|
config,
|
||||||
|
verbose,
|
||||||
|
resume,
|
||||||
|
memprofile,
|
||||||
|
cache,
|
||||||
|
reporter,
|
||||||
|
emit,
|
||||||
|
dry_run,
|
||||||
|
skip_validation,
|
||||||
|
output_dir,
|
||||||
|
):
|
||||||
progress_reporter = create_progress_reporter(reporter)
|
progress_reporter = create_progress_reporter(reporter)
|
||||||
info, error, success = _logger(progress_reporter)
|
info, error, success = _logger(progress_reporter)
|
||||||
run_id = resume or time.strftime("%Y%m%d-%H%M%S")
|
run_id = resume or time.strftime("%Y%m%d-%H%M%S")
|
||||||
|
|
||||||
config = load_config(root_dir, config_filepath)
|
|
||||||
config.storage.base_dir = str(output_dir) if output_dir else config.storage.base_dir
|
config.storage.base_dir = str(output_dir) if output_dir else config.storage.base_dir
|
||||||
config.reporting.base_dir = (
|
config.reporting.base_dir = (
|
||||||
str(output_dir) if output_dir else config.reporting.base_dir
|
str(output_dir) if output_dir else config.reporting.base_dir
|
||||||
|
@ -16,7 +16,7 @@ from graphrag.logging import ReporterType
|
|||||||
from graphrag.prompt_tune.generator import MAX_TOKEN_COUNT
|
from graphrag.prompt_tune.generator import MAX_TOKEN_COUNT
|
||||||
from graphrag.prompt_tune.loader import MIN_CHUNK_SIZE
|
from graphrag.prompt_tune.loader import MIN_CHUNK_SIZE
|
||||||
|
|
||||||
from .index import index_cli
|
from .index import index_cli, update_cli
|
||||||
from .initialize import initialize_project_at
|
from .initialize import initialize_project_at
|
||||||
from .prompt_tune import prompt_tune
|
from .prompt_tune import prompt_tune
|
||||||
from .query import run_drift_search, run_global_search, run_local_search
|
from .query import run_drift_search, run_global_search, run_local_search
|
||||||
@ -129,6 +129,71 @@ def _index_cli(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@app.command("update")
|
||||||
|
def _update_cli(
|
||||||
|
config: Annotated[
|
||||||
|
Path | None,
|
||||||
|
typer.Option(
|
||||||
|
help="The configuration to use.", exists=True, file_okay=True, readable=True
|
||||||
|
),
|
||||||
|
] = None,
|
||||||
|
root: Annotated[
|
||||||
|
Path,
|
||||||
|
typer.Option(
|
||||||
|
help="The project root directory.",
|
||||||
|
exists=True,
|
||||||
|
dir_okay=True,
|
||||||
|
writable=True,
|
||||||
|
resolve_path=True,
|
||||||
|
),
|
||||||
|
] = Path(), # set default to current directory
|
||||||
|
verbose: Annotated[
|
||||||
|
bool, typer.Option(help="Run the indexing pipeline with verbose logging")
|
||||||
|
] = False,
|
||||||
|
memprofile: Annotated[
|
||||||
|
bool, typer.Option(help="Run the indexing pipeline with memory profiling")
|
||||||
|
] = False,
|
||||||
|
reporter: Annotated[
|
||||||
|
ReporterType, typer.Option(help="The progress reporter to use.")
|
||||||
|
] = ReporterType.RICH,
|
||||||
|
emit: Annotated[
|
||||||
|
str, typer.Option(help="The data formats to emit, comma-separated.")
|
||||||
|
] = TableEmitterType.Parquet.value,
|
||||||
|
cache: Annotated[bool, typer.Option(help="Use LLM cache.")] = True,
|
||||||
|
skip_validation: Annotated[
|
||||||
|
bool,
|
||||||
|
typer.Option(
|
||||||
|
help="Skip any preflight validation. Useful when running no LLM steps."
|
||||||
|
),
|
||||||
|
] = False,
|
||||||
|
output: Annotated[
|
||||||
|
Path | None,
|
||||||
|
typer.Option(
|
||||||
|
help="Indexing pipeline output directory. Overrides storage.base_dir in the configuration file.",
|
||||||
|
dir_okay=True,
|
||||||
|
writable=True,
|
||||||
|
resolve_path=True,
|
||||||
|
),
|
||||||
|
] = None,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Update an existing knowledge graph index.
|
||||||
|
|
||||||
|
Applies a default storage configuration (if not provided by config), saving the new index to the local file system in the `update_output` folder.
|
||||||
|
"""
|
||||||
|
update_cli(
|
||||||
|
root_dir=root,
|
||||||
|
verbose=verbose,
|
||||||
|
memprofile=memprofile,
|
||||||
|
cache=cache,
|
||||||
|
reporter=ReporterType(reporter),
|
||||||
|
config_filepath=config,
|
||||||
|
emit=[TableEmitterType(value.strip()) for value in emit.split(",")],
|
||||||
|
skip_validation=skip_validation,
|
||||||
|
output_dir=output,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@app.command("prompt-tune")
|
@app.command("prompt-tune")
|
||||||
def _prompt_tune_cli(
|
def _prompt_tune_cli(
|
||||||
root: Annotated[
|
root: Annotated[
|
||||||
|
@ -139,6 +139,7 @@ test_smoke = "pytest ./tests/smoke"
|
|||||||
test_notebook = "pytest ./tests/notebook"
|
test_notebook = "pytest ./tests/notebook"
|
||||||
test_verbs = "pytest ./tests/verbs"
|
test_verbs = "pytest ./tests/verbs"
|
||||||
index = "python -m graphrag index"
|
index = "python -m graphrag index"
|
||||||
|
update = "python -m graphrag update"
|
||||||
init = "python -m graphrag init"
|
init = "python -m graphrag init"
|
||||||
query = "python -m graphrag query"
|
query = "python -m graphrag query"
|
||||||
prompt_tune = "python -m graphrag prompt-tune"
|
prompt_tune = "python -m graphrag prompt-tune"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user