mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-08-22 16:08:13 +00:00
Data insights with metadata command-line (#11045)
* Data insights with metadata command-line * Python Checkstyle * Python Checkstyle
This commit is contained in:
parent
de70c585bc
commit
dda5963e34
49
ingestion/src/metadata/cli/insight.py
Normal file
49
ingestion/src/metadata/cli/insight.py
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
# Copyright 2021 Collate
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
"""
|
||||||
|
Data Insigt utility for the metadata CLI
|
||||||
|
"""
|
||||||
|
import pathlib
|
||||||
|
import sys
|
||||||
|
import traceback
|
||||||
|
|
||||||
|
from metadata.config.common import load_config_file
|
||||||
|
from metadata.data_insight.api.workflow import DataInsightWorkflow
|
||||||
|
from metadata.utils.logger import cli_logger
|
||||||
|
from metadata.utils.workflow_output_handler import WorkflowType, print_init_error
|
||||||
|
|
||||||
|
logger = cli_logger()
|
||||||
|
|
||||||
|
|
||||||
|
def run_insight(config_path: str) -> None:
|
||||||
|
"""
|
||||||
|
Run the Data Insigt workflow from a config path
|
||||||
|
to a JSON or YAML file
|
||||||
|
:param config_path: Path to load JSON config
|
||||||
|
"""
|
||||||
|
|
||||||
|
config_file = pathlib.Path(config_path)
|
||||||
|
config_dict = None
|
||||||
|
try:
|
||||||
|
config_dict = load_config_file(config_file)
|
||||||
|
workflow = DataInsightWorkflow.create(config_dict)
|
||||||
|
logger.debug(f"Using config: {workflow.config}")
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug(traceback.format_exc())
|
||||||
|
print_init_error(exc, config_dict, WorkflowType.INSIGHT)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
workflow.execute()
|
||||||
|
workflow.stop()
|
||||||
|
workflow.print_status()
|
||||||
|
ret = workflow.result_status()
|
||||||
|
sys.exit(ret)
|
@ -22,6 +22,7 @@ from metadata.cli.backup import UploadDestinationType, run_backup
|
|||||||
from metadata.cli.dataquality import run_test
|
from metadata.cli.dataquality import run_test
|
||||||
from metadata.cli.docker import BACKEND_DATABASES, DockerActions, run_docker
|
from metadata.cli.docker import BACKEND_DATABASES, DockerActions, run_docker
|
||||||
from metadata.cli.ingest import run_ingest
|
from metadata.cli.ingest import run_ingest
|
||||||
|
from metadata.cli.insight import run_insight
|
||||||
from metadata.cli.openmetadata_dag_config_migration import (
|
from metadata.cli.openmetadata_dag_config_migration import (
|
||||||
run_openmetadata_dag_config_migration,
|
run_openmetadata_dag_config_migration,
|
||||||
)
|
)
|
||||||
@ -44,6 +45,7 @@ class MetadataCommands(Enum):
|
|||||||
BACKUP = "backup"
|
BACKUP = "backup"
|
||||||
RESTORE = "restore"
|
RESTORE = "restore"
|
||||||
WEBHOOK = "webhook"
|
WEBHOOK = "webhook"
|
||||||
|
INSIGHT = "insight"
|
||||||
OPENMETADATA_IMPORTS_MIGRATION = "openmetadata_imports_migration"
|
OPENMETADATA_IMPORTS_MIGRATION = "openmetadata_imports_migration"
|
||||||
OPENMETADATA_DAG_CONFIG_MIGRATION = "openmetadata_dag_config_migration"
|
OPENMETADATA_DAG_CONFIG_MIGRATION = "openmetadata_dag_config_migration"
|
||||||
|
|
||||||
@ -378,13 +380,18 @@ def get_parser(args=None):
|
|||||||
help="Simple Webserver to test webhook metadata events",
|
help="Simple Webserver to test webhook metadata events",
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
create_common_config_parser_args(
|
||||||
|
sub_parser.add_parser(
|
||||||
|
MetadataCommands.INSIGHT.value, help="Data Insigt Workflow"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
add_metadata_args(parser)
|
add_metadata_args(parser)
|
||||||
parser.add_argument("--debug", help="Debug Mode", action="store_true")
|
parser.add_argument("--debug", help="Debug Mode", action="store_true")
|
||||||
return parser.parse_args(args)
|
return parser.parse_args(args)
|
||||||
|
|
||||||
|
|
||||||
def metadata(args=None):
|
def metadata(args=None): # pylint: disable=too-many-branches
|
||||||
"""
|
"""
|
||||||
This method implements parsing of the arguments passed from CLI
|
This method implements parsing of the arguments passed from CLI
|
||||||
"""
|
"""
|
||||||
@ -400,6 +407,8 @@ def metadata(args=None):
|
|||||||
|
|
||||||
if metadata_workflow == MetadataCommands.INGEST.value:
|
if metadata_workflow == MetadataCommands.INGEST.value:
|
||||||
run_ingest(config_path=config_file)
|
run_ingest(config_path=config_file)
|
||||||
|
if metadata_workflow == MetadataCommands.INSIGHT.value:
|
||||||
|
run_insight(config_path=config_file)
|
||||||
if metadata_workflow == MetadataCommands.PROFILE.value:
|
if metadata_workflow == MetadataCommands.PROFILE.value:
|
||||||
run_profiler(config_path=config_file)
|
run_profiler(config_path=config_file)
|
||||||
if metadata_workflow == MetadataCommands.TEST.value:
|
if metadata_workflow == MetadataCommands.TEST.value:
|
||||||
|
20
ingestion/src/metadata/examples/workflows/data_insight.yaml
Normal file
20
ingestion/src/metadata/examples/workflows/data_insight.yaml
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
source:
|
||||||
|
type: dataInsight
|
||||||
|
serviceName: OpenMetadata
|
||||||
|
sourceConfig:
|
||||||
|
config:
|
||||||
|
type: MetadataToElasticSearch
|
||||||
|
processor:
|
||||||
|
type: data-insight-processor
|
||||||
|
config: {}
|
||||||
|
sink:
|
||||||
|
type: elasticsearch
|
||||||
|
config:
|
||||||
|
es_host: localhost
|
||||||
|
es_port: 9200
|
||||||
|
recreate_indexes: false
|
||||||
|
workflowConfig:
|
||||||
|
loggerLevel: DEBUG
|
||||||
|
openMetadataServerConfig:
|
||||||
|
hostPort: http://localhost:8585/api
|
||||||
|
authProvider: no-auth
|
@ -74,6 +74,7 @@ class WorkflowType(Enum):
|
|||||||
TEST = "test"
|
TEST = "test"
|
||||||
LINEAGE = "lineage"
|
LINEAGE = "lineage"
|
||||||
USAGE = "usage"
|
USAGE = "usage"
|
||||||
|
INSIGHT = "insight"
|
||||||
|
|
||||||
|
|
||||||
EXAMPLES_WORKFLOW_PATH: Path = Path(__file__).parent / "../examples" / "workflows"
|
EXAMPLES_WORKFLOW_PATH: Path = Path(__file__).parent / "../examples" / "workflows"
|
||||||
|
@ -276,6 +276,72 @@ with DAG(
|
|||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
# Run Data Insights using the metadata CLI
|
||||||
|
|
||||||
|
### 1. Define the YAML Config
|
||||||
|
|
||||||
|
This is a sample config for Data Insights:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
source:
|
||||||
|
type: dataInsight
|
||||||
|
serviceName: OpenMetadata
|
||||||
|
sourceConfig:
|
||||||
|
config:
|
||||||
|
type: MetadataToElasticSearch
|
||||||
|
processor:
|
||||||
|
type: data-insight-processor
|
||||||
|
config: {}
|
||||||
|
sink:
|
||||||
|
type: elasticsearch
|
||||||
|
config:
|
||||||
|
es_host: localhost
|
||||||
|
es_port: 9200
|
||||||
|
recreate_indexes: false
|
||||||
|
workflowConfig:
|
||||||
|
loggerLevel: DEBUG
|
||||||
|
openMetadataServerConfig:
|
||||||
|
hostPort: "<OpenMetadata host and port>"
|
||||||
|
authProvider: openmetadata
|
||||||
|
securityConfig:
|
||||||
|
jwtToken: '{bot_jwt_token}'
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Source Configuration - Source Config
|
||||||
|
|
||||||
|
- To send the metadata to OpenMetadata, it needs to be specified as `type: MetadataToElasticSearch`.
|
||||||
|
|
||||||
|
|
||||||
|
#### processor Configuration
|
||||||
|
|
||||||
|
- To send the metadata to OpenMetadata, it needs to be specified as `type: data-insight-processor`.
|
||||||
|
|
||||||
|
#### Workflow Configuration
|
||||||
|
|
||||||
|
The main property here is the `openMetadataServerConfig`, where you can define the host and security provider of your OpenMetadata installation.
|
||||||
|
|
||||||
|
For a simple, local installation using our docker containers, this looks like:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
workflowConfig:
|
||||||
|
openMetadataServerConfig:
|
||||||
|
hostPort: 'http://localhost:8585/api'
|
||||||
|
authProvider: openmetadata
|
||||||
|
securityConfig:
|
||||||
|
jwtToken: '{bot_jwt_token}'
|
||||||
|
```
|
||||||
|
|
||||||
|
We support different security providers. You can find their definitions [here](https://github.com/open-metadata/OpenMetadata/tree/main/openmetadata-spec/src/main/resources/json/schema/security/client).
|
||||||
|
You can find the different implementation of the ingestion below.
|
||||||
|
|
||||||
|
### 2. Run with the CLI
|
||||||
|
|
||||||
|
First, we will need to save the YAML file. Afterward, and with all requirements installed, we can run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
metadata insight -c <path-to-yaml>
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
# Run Elasticsearch Reindex using the Airflow SDK
|
# Run Elasticsearch Reindex using the Airflow SDK
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user