diff --git a/docs/api/restli/restore-indices.md b/docs/api/restli/restore-indices.md index 8c03b9dbc1..30061996b4 100644 --- a/docs/api/restli/restore-indices.md +++ b/docs/api/restli/restore-indices.md @@ -1,5 +1,7 @@ # Restore Indices Endpoint +This is reference material for the REST.li `restoreIndices` endpoint. For general information on reindexing and restoring indices, see [Restore Indices](../../how/restore-indices.md). + You can do a HTTP POST request to `/gms/operations?action=restoreIndices` endpoint with the `urn` as part of JSON Payload to restore indices for the particular URN, or with the `urnLike` regex to restore for `batchSize` URNs matching the pattern starting from `start`. ``` diff --git a/docs/how/restore-indices.md b/docs/how/restore-indices.md index 955c7c9841..50bcdee8ba 100644 --- a/docs/how/restore-indices.md +++ b/docs/how/restore-indices.md @@ -177,6 +177,10 @@ All Aspects: For Rest.li, see [Restore Indices API](../api/restli/restore-indices.md). +### CLI + +The [datahub CLI](../cli.md) also supports a utility command for restoring indices. Check the `datahub check restore-indices --help` help text for more details. + ## Best Practices In general, this process is not required to run unless there has been a disruption of storage services or infrastructure, diff --git a/metadata-ingestion/src/datahub/cli/check_cli.py b/metadata-ingestion/src/datahub/cli/check_cli.py index abe107ee46..fa063524d5 100644 --- a/metadata-ingestion/src/datahub/cli/check_cli.py +++ b/metadata-ingestion/src/datahub/cli/check_cli.py @@ -478,3 +478,30 @@ def server_config() -> None: server_config = graph.get_server_config() click.echo(pprint.pformat(server_config)) + + +@check.command() +@click.option( + "--urn", required=True, help="The urn or urn pattern (supports % for wildcard)" +) +@click.option("--aspect", default=None, help="Filter to a specific aspect name.") +@click.option( + "--start", type=int, default=None, help="Row number of sql store to restore from." +) +@click.option("--batch-size", type=int, default=None, help="How many rows to restore.") +def restore_indices( + urn: str, + aspect: Optional[str], + start: Optional[int], + batch_size: Optional[int], +) -> None: + """Resync metadata changes into the search and graph indices.""" + graph = get_default_graph(ClientMode.CLI) + + result = graph.restore_indices( + urn_pattern=urn, + aspect=aspect, + start=start, + batch_size=batch_size, + ) + click.echo(result) diff --git a/metadata-ingestion/src/datahub/ingestion/graph/client.py b/metadata-ingestion/src/datahub/ingestion/graph/client.py index 902e418677..aabe25cf0a 100644 --- a/metadata-ingestion/src/datahub/ingestion/graph/client.py +++ b/metadata-ingestion/src/datahub/ingestion/graph/client.py @@ -1429,6 +1429,41 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI): related_aspects = response.get("relatedAspects", []) return reference_count, related_aspects + def restore_indices( + self, + urn_pattern: str, + aspect: Optional[str] = None, + start: Optional[int] = None, + batch_size: Optional[int] = None, + ) -> str: + """Restore the indices for a given urn or urn-like pattern. + + Args: + urn_pattern: The exact URN or a pattern (with % for wildcard) to match URNs. + aspect: Optional aspect string to restore indices for a specific aspect. + start: Optional integer to decide which row number of sql store to restore from. Default: 0. + batch_size: Optional integer to decide how many rows to restore. Default: 10. + + Returns: + A string containing the result of the restore indices operation. This format is subject to change. + """ + if "%" in urn_pattern: + payload_obj: dict = {"urnLike": urn_pattern} + else: + payload_obj = {"urn": urn_pattern} + if aspect is not None: + payload_obj["aspect"] = aspect + if start is not None: + payload_obj["start"] = start + if batch_size is not None: + payload_obj["batchSize"] = batch_size + raw_result = self._post_generic( + f"{self._gms_server}/operations?action=restoreIndices", payload_obj + ) + result = raw_result["value"] + logger.debug(f"Restore indices result: {result}") + return result + @functools.lru_cache def _make_schema_resolver( self,