feat(cli): add restore-indices CLI command (#13820)

This commit is contained in:
Harshal Sheth 2025-06-19 04:03:47 -07:00 committed by GitHub
parent 85b29c9361
commit 4aa3a928c0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 68 additions and 0 deletions

View File

@ -1,5 +1,7 @@
# Restore Indices Endpoint
This is reference material for the REST.li `restoreIndices` endpoint. For general information on reindexing and restoring indices, see [Restore Indices](../../how/restore-indices.md).
You can do a HTTP POST request to `/gms/operations?action=restoreIndices` endpoint with the `urn` as part of JSON Payload to restore indices for the particular URN, or with the `urnLike` regex to restore for `batchSize` URNs matching the pattern starting from `start`.
```

View File

@ -177,6 +177,10 @@ All Aspects:
For Rest.li, see [Restore Indices API](../api/restli/restore-indices.md).
### CLI
The [datahub CLI](../cli.md) also supports a utility command for restoring indices. Check the `datahub check restore-indices --help` help text for more details.
## Best Practices
In general, this process is not required to run unless there has been a disruption of storage services or infrastructure,

View File

@ -478,3 +478,30 @@ def server_config() -> None:
server_config = graph.get_server_config()
click.echo(pprint.pformat(server_config))
@check.command()
@click.option(
"--urn", required=True, help="The urn or urn pattern (supports % for wildcard)"
)
@click.option("--aspect", default=None, help="Filter to a specific aspect name.")
@click.option(
"--start", type=int, default=None, help="Row number of sql store to restore from."
)
@click.option("--batch-size", type=int, default=None, help="How many rows to restore.")
def restore_indices(
urn: str,
aspect: Optional[str],
start: Optional[int],
batch_size: Optional[int],
) -> None:
"""Resync metadata changes into the search and graph indices."""
graph = get_default_graph(ClientMode.CLI)
result = graph.restore_indices(
urn_pattern=urn,
aspect=aspect,
start=start,
batch_size=batch_size,
)
click.echo(result)

View File

@ -1429,6 +1429,41 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
related_aspects = response.get("relatedAspects", [])
return reference_count, related_aspects
def restore_indices(
self,
urn_pattern: str,
aspect: Optional[str] = None,
start: Optional[int] = None,
batch_size: Optional[int] = None,
) -> str:
"""Restore the indices for a given urn or urn-like pattern.
Args:
urn_pattern: The exact URN or a pattern (with % for wildcard) to match URNs.
aspect: Optional aspect string to restore indices for a specific aspect.
start: Optional integer to decide which row number of sql store to restore from. Default: 0.
batch_size: Optional integer to decide how many rows to restore. Default: 10.
Returns:
A string containing the result of the restore indices operation. This format is subject to change.
"""
if "%" in urn_pattern:
payload_obj: dict = {"urnLike": urn_pattern}
else:
payload_obj = {"urn": urn_pattern}
if aspect is not None:
payload_obj["aspect"] = aspect
if start is not None:
payload_obj["start"] = start
if batch_size is not None:
payload_obj["batchSize"] = batch_size
raw_result = self._post_generic(
f"{self._gms_server}/operations?action=restoreIndices", payload_obj
)
result = raw_result["value"]
logger.debug(f"Restore indices result: {result}")
return result
@functools.lru_cache
def _make_schema_resolver(
self,