mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-10-04 05:16:47 +00:00
* SAP Hana skeleton * Add SAP Hana Connector * Fix ingestion and docs * Prep SAP Hana Profiler * Linting * Update index.md * Revert: Update index.md --------- Co-authored-by: Ayush Shah <ayush@getcollate.io>
This commit is contained in:
parent
aa490cb716
commit
11c07ee8ab
@ -218,6 +218,7 @@ plugins: Dict[str, Set[str]] = {
|
||||
},
|
||||
"sagemaker": {VERSIONS["boto3"]},
|
||||
"salesforce": {"simple_salesforce==1.11.4"},
|
||||
"sap-hana": {"hdbcli", "sqlalchemy-hana"},
|
||||
"singlestore": {VERSIONS["pymysql"]},
|
||||
"sklearn": {VERSIONS["scikit-learn"]},
|
||||
"snowflake": {"snowflake-sqlalchemy~=1.4"},
|
||||
|
@ -117,7 +117,7 @@ def init_empty_connection_options() -> ConnectionOptions:
|
||||
return ConnectionOptions(__root__={})
|
||||
|
||||
|
||||
def get_connection_url_common(connection):
|
||||
def get_connection_url_common(connection) -> str:
|
||||
"""
|
||||
Common method for building the source connection urls
|
||||
"""
|
||||
|
@ -347,13 +347,13 @@ def test_connection_db_schema_sources(
|
||||
"""
|
||||
queries = queries or {}
|
||||
|
||||
def custom_executor(engine, inspector_fn_str: str):
|
||||
def custom_executor(engine_: Engine, inspector_fn_str: str):
|
||||
"""
|
||||
Check if we can list tables or views from a given schema
|
||||
or a random one
|
||||
"""
|
||||
|
||||
inspector = inspect(engine)
|
||||
inspector = inspect(engine_)
|
||||
inspector_fn = getattr(inspector, inspector_fn_str)
|
||||
|
||||
if service_connection.databaseSchema:
|
||||
|
@ -0,0 +1,166 @@
|
||||
# Copyright 2021 Collate
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
Source connection handler
|
||||
"""
|
||||
from functools import partial
|
||||
from typing import Callable, Dict, Optional
|
||||
from urllib.parse import quote_plus
|
||||
|
||||
from sqlalchemy import inspect
|
||||
from sqlalchemy.engine import Engine
|
||||
|
||||
from metadata.generated.schema.entity.automations.workflow import (
|
||||
Workflow as AutomationWorkflow,
|
||||
)
|
||||
from metadata.generated.schema.entity.services.connections.database.sapHanaConnection import (
|
||||
HdbUserStoreConnection,
|
||||
SapHanaConnection,
|
||||
SqlConnection,
|
||||
)
|
||||
from metadata.ingestion.connections.builders import (
|
||||
create_generic_db_connection,
|
||||
get_connection_args_common,
|
||||
get_connection_options_dict,
|
||||
)
|
||||
from metadata.ingestion.connections.test_connections import (
|
||||
execute_inspector_func,
|
||||
test_connection_engine_step,
|
||||
test_connection_steps,
|
||||
)
|
||||
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
||||
|
||||
|
||||
def get_database_connection_url(connection: SapHanaConnection) -> str:
|
||||
"""
|
||||
Build the SQLConnection URL for the database connection
|
||||
"""
|
||||
|
||||
conn = connection.connection
|
||||
|
||||
if not isinstance(conn, SqlConnection):
|
||||
raise ValueError("Database Connection requires the SQL connection details")
|
||||
|
||||
url = (
|
||||
f"{connection.scheme.value}://"
|
||||
f"{quote_plus(conn.username)}:"
|
||||
f"{quote_plus(conn.password.get_secret_value())}@"
|
||||
f"{conn.hostPort}"
|
||||
)
|
||||
|
||||
if hasattr(connection, "database"):
|
||||
url += f"/{connection.database}" if connection.database else ""
|
||||
|
||||
options = get_connection_options_dict(connection)
|
||||
if options:
|
||||
if hasattr(conn, "database") and not conn.database:
|
||||
url += "/"
|
||||
params = "&".join(
|
||||
f"{key}={quote_plus(value)}" for (key, value) in options.items() if value
|
||||
)
|
||||
url = f"{url}?{params}"
|
||||
return url
|
||||
|
||||
|
||||
def get_hdb_connection_url(connection: SapHanaConnection) -> str:
|
||||
"""
|
||||
Build the SQLConnection URL for the database connection
|
||||
"""
|
||||
|
||||
if not isinstance(connection.connection, HdbUserStoreConnection):
|
||||
raise ValueError("Database Connection requires the SQL connection details")
|
||||
|
||||
return f"{connection.scheme.value}://userkey={connection.connection.userKey}"
|
||||
|
||||
|
||||
def get_connection(connection: SapHanaConnection) -> Engine:
|
||||
"""
|
||||
Create connection
|
||||
"""
|
||||
|
||||
if isinstance(connection.connection, SqlConnection):
|
||||
return create_generic_db_connection(
|
||||
connection=connection,
|
||||
get_connection_url_fn=get_database_connection_url,
|
||||
get_connection_args_fn=get_connection_args_common,
|
||||
)
|
||||
|
||||
if isinstance(connection.connection, HdbUserStoreConnection):
|
||||
return create_generic_db_connection(
|
||||
connection=connection,
|
||||
get_connection_url_fn=get_hdb_connection_url,
|
||||
get_connection_args_fn=get_connection_args_common,
|
||||
)
|
||||
|
||||
raise ValueError("Unrecognized SAP Hana connection type!")
|
||||
|
||||
|
||||
def _build_test_fn_dict(
|
||||
engine: Engine, service_connection: SapHanaConnection
|
||||
) -> Dict[str, Callable]:
|
||||
"""
|
||||
Build the test connection steps dict
|
||||
"""
|
||||
|
||||
def custom_executor(engine_: Engine, inspector_fn_str: str):
|
||||
"""
|
||||
Check if we can list tables or views from a given schema
|
||||
or a random one
|
||||
"""
|
||||
|
||||
inspector = inspect(engine_)
|
||||
inspector_fn = getattr(inspector, inspector_fn_str)
|
||||
|
||||
# HDB connection won't have a databaseSchema
|
||||
if getattr(service_connection.connection, "databaseSchema"):
|
||||
inspector_fn(service_connection.connection.databaseSchema)
|
||||
else:
|
||||
schema_name = inspector.get_schema_names() or []
|
||||
for schema in schema_name:
|
||||
inspector_fn(schema)
|
||||
break
|
||||
|
||||
if isinstance(service_connection.connection, SqlConnection):
|
||||
return {
|
||||
"CheckAccess": partial(test_connection_engine_step, engine),
|
||||
"GetSchemas": partial(execute_inspector_func, engine, "get_schema_names"),
|
||||
"GetTables": partial(custom_executor, engine, "get_table_names"),
|
||||
"GetViews": partial(custom_executor, engine, "get_view_names"),
|
||||
}
|
||||
|
||||
if isinstance(service_connection.connection, HdbUserStoreConnection):
|
||||
return {
|
||||
"CheckAccess": partial(test_connection_engine_step, engine),
|
||||
"GetSchemas": partial(execute_inspector_func, engine, "get_schema_names"),
|
||||
"GetTables": partial(custom_executor, engine, "get_table_names"),
|
||||
"GetViews": partial(custom_executor, engine, "get_view_names"),
|
||||
}
|
||||
|
||||
raise ValueError(f"Unknown connection type for {service_connection.connection}")
|
||||
|
||||
|
||||
def test_connection(
|
||||
metadata: OpenMetadata,
|
||||
engine: Engine,
|
||||
service_connection: SapHanaConnection,
|
||||
automation_workflow: Optional[AutomationWorkflow] = None,
|
||||
) -> None:
|
||||
"""
|
||||
Test connection. This can be executed either as part
|
||||
of a metadata workflow or during an Automation Workflow
|
||||
"""
|
||||
|
||||
test_connection_steps(
|
||||
metadata=metadata,
|
||||
test_fn=_build_test_fn_dict(engine, service_connection),
|
||||
service_fqn=service_connection.type.value,
|
||||
automation_workflow=automation_workflow,
|
||||
)
|
@ -0,0 +1,75 @@
|
||||
# Copyright 2021 Collate
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
SAP Hana source module
|
||||
"""
|
||||
from typing import Iterable
|
||||
|
||||
from sqlalchemy import inspect
|
||||
|
||||
from metadata.generated.schema.entity.services.connections.database.sapHanaConnection import (
|
||||
SapHanaConnection,
|
||||
)
|
||||
from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import (
|
||||
OpenMetadataConnection,
|
||||
)
|
||||
from metadata.generated.schema.metadataIngestion.workflow import (
|
||||
Source as WorkflowSource,
|
||||
)
|
||||
from metadata.ingestion.api.source import InvalidSourceException
|
||||
from metadata.ingestion.source.database.common_db_source import CommonDbSourceService
|
||||
from metadata.utils.logger import ingestion_logger
|
||||
|
||||
logger = ingestion_logger()
|
||||
|
||||
|
||||
class SaphanaSource(CommonDbSourceService):
|
||||
"""
|
||||
Implements the necessary methods to extract
|
||||
Database metadata from Mysql Source
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def create(cls, config_dict, metadata_config: OpenMetadataConnection):
|
||||
config: WorkflowSource = WorkflowSource.parse_obj(config_dict)
|
||||
connection: SapHanaConnection = config.serviceConnection.__root__.config
|
||||
if not isinstance(connection, SapHanaConnection):
|
||||
raise InvalidSourceException(
|
||||
f"Expected SapHanaConnection, but got {connection}"
|
||||
)
|
||||
return cls(config, metadata_config)
|
||||
|
||||
def get_database_names(self) -> Iterable[str]:
|
||||
"""
|
||||
Check if the db is configured, or query the name
|
||||
"""
|
||||
self.inspector = inspect(self.engine)
|
||||
|
||||
if getattr(self.service_connection.connection, "database"):
|
||||
yield self.service_connection.connection.database
|
||||
|
||||
else:
|
||||
try:
|
||||
yield self.connection.execute(
|
||||
"SELECT DATABASE_NAME FROM M_DATABASE"
|
||||
).fetchone()[0]
|
||||
except Exception as err:
|
||||
raise RuntimeError(
|
||||
f"Error retrieving database name from the source - [{err}]."
|
||||
" A way through this error is by specifying the `database` in the service connection."
|
||||
)
|
||||
|
||||
def get_raw_database_schema_names(self) -> Iterable[str]:
|
||||
if self.service_connection.connection.__dict__.get("databaseSchema"):
|
||||
yield self.service_connection.connection.databaseSchema
|
||||
else:
|
||||
for schema_name in self.inspector.get_schema_names():
|
||||
yield schema_name
|
@ -50,3 +50,8 @@ def _(*_, **__):
|
||||
@compiles(ConnTestFn, Dialects.IbmDbSa)
|
||||
def _(*_, **__):
|
||||
return "SELECT 42 FROM SYSIBM.SYSDUMMY1;"
|
||||
|
||||
|
||||
@compiles(ConnTestFn, Dialects.Hana)
|
||||
def _(*_, **__):
|
||||
return "SELECT 42 FROM DUMMY"
|
||||
|
@ -47,6 +47,7 @@ def _(element, compiler, **kw):
|
||||
@compiles(LenFn, Dialects.Oracle)
|
||||
@compiles(LenFn, Dialects.IbmDbSa)
|
||||
@compiles(LenFn, Dialects.Db2)
|
||||
@compiles(LenFn, Dialects.Hana)
|
||||
def _(element, compiler, **kw):
|
||||
return "LENGTH(%s)" % compiler.process(element.clauses, **kw)
|
||||
|
||||
|
@ -59,6 +59,7 @@ def _(element, compiler, **kw):
|
||||
@compiles(ModuloFn, Dialects.IbmDbSa)
|
||||
@compiles(ModuloFn, Dialects.Db2)
|
||||
@compiles(ModuloFn, Dialects.Vertica)
|
||||
@compiles(ModuloFn, Dialects.Hana)
|
||||
def _(element, compiler, **kw):
|
||||
"""Modulo function for specific dialect"""
|
||||
value, base = validate_and_compile(element, compiler, **kw)
|
||||
|
@ -46,6 +46,7 @@ def _(*_, **__):
|
||||
@compiles(RandomNumFn, Dialects.MySQL)
|
||||
@compiles(RandomNumFn, Dialects.IbmDbSa)
|
||||
@compiles(RandomNumFn, Dialects.Db2)
|
||||
@compiles(RandomNumFn, Dialects.Hana)
|
||||
def _(*_, **__):
|
||||
return "ABS(RAND()) * 100"
|
||||
|
||||
|
@ -54,6 +54,7 @@ class Dialects(Enum):
|
||||
Druid = "druid"
|
||||
DynamoDB = "dynamoDB"
|
||||
Glue = "glue"
|
||||
Hana = "hana"
|
||||
Hive = b"hive" # Hive requires bytes
|
||||
Impala = "impala"
|
||||
IbmDbSa = "ibm_db_sa"
|
||||
|
@ -124,17 +124,27 @@ auth_backends = airflow.api.auth.backend.basic_auth
|
||||
After installing the Airflow APIs, you will need to update your OpenMetadata Server.
|
||||
|
||||
The OpenMetadata server takes all its configurations from a YAML file. You can find them in our [repo](https://github.com/open-metadata/OpenMetadata/tree/main/conf). In
|
||||
`openmetadata.yaml`, update the `airflowConfiguration` section accordingly.
|
||||
`openmetadata.yaml`, update the `pipelineServiceClientConfiguration` section accordingly.
|
||||
|
||||
```yaml
|
||||
# For Bare Metal Installations
|
||||
[...]
|
||||
|
||||
airflowConfiguration:
|
||||
apiEndpoint: ${AIRFLOW_HOST:-http://localhost:8080}
|
||||
pipelineServiceClientConfiguration:
|
||||
className: ${PIPELINE_SERVICE_CLIENT_CLASS_NAME:-"org.openmetadata.service.clients.pipeline.airflow.AirflowRESTClient"}
|
||||
apiEndpoint: ${PIPELINE_SERVICE_CLIENT_ENDPOINT:-http://localhost:8080}
|
||||
metadataApiEndpoint: ${SERVER_HOST_API_URL:-http://localhost:8585/api}
|
||||
hostIp: ${PIPELINE_SERVICE_CLIENT_HOST_IP:-""}
|
||||
verifySSL: ${PIPELINE_SERVICE_CLIENT_VERIFY_SSL:-"no-ssl"} # Possible values are "no-ssl", "ignore", "validate"
|
||||
sslConfig:
|
||||
validate:
|
||||
certificatePath: ${PIPELINE_SERVICE_CLIENT_SSL_CERT_PATH:-""} # Local path for the Pipeline Service Client
|
||||
|
||||
# Default required parameters for Airflow as Pipeline Service Client
|
||||
parameters:
|
||||
username: ${AIRFLOW_USERNAME:-admin}
|
||||
password: ${AIRFLOW_PASSWORD:-admin}
|
||||
metadataApiEndpoint: ${SERVER_HOST_API_URL:-http://localhost:8585/api}
|
||||
timeout: ${AIRFLOW_TIMEOUT:-10}
|
||||
|
||||
[...]
|
||||
```
|
||||
@ -142,7 +152,7 @@ airflowConfiguration:
|
||||
If using Docker, make sure that you are passing the correct environment variables:
|
||||
|
||||
```env
|
||||
AIRFLOW_HOST: ${AIRFLOW_HOST:-http://ingestion:8080}
|
||||
PIPELINE_SERVICE_CLIENT_ENDPOINT: ${PIPELINE_SERVICE_CLIENT_ENDPOINT:-http://ingestion:8080}
|
||||
SERVER_HOST_API_URL: ${SERVER_HOST_API_URL:-http://openmetadata-server:8585/api}
|
||||
```
|
||||
|
||||
@ -169,11 +179,11 @@ What we need to verify here is that the OpenMetadata server can reach the Airflo
|
||||
hosting your OpenMetadata server and running a query against the `/health` endpoint. For example:
|
||||
|
||||
```bash
|
||||
$ curl -XGET ${AIRFLOW_HOST}/api/v1/openmetadata/health
|
||||
$ curl -XGET ${PIPELINE_SERVICE_CLIENT_ENDPOINT}/api/v1/openmetadata/health
|
||||
{"status": "healthy", "version": "x.y.z"}
|
||||
```
|
||||
|
||||
It is important to do this validation passing the command as is (i.e., `curl -XGET ${AIRFLOW_HOST}/api/v1/openmetadata/health`)
|
||||
It is important to do this validation passing the command as is (i.e., `curl -XGET ${PIPELINE_SERVICE_CLIENT_ENDPOINT}/api/v1/openmetadata/health`)
|
||||
and allowing the environment to do the substitution for you. That's the only way we can be sure that the setup is
|
||||
correct.
|
||||
|
||||
@ -193,7 +203,7 @@ Note that in this example we are assuming:
|
||||
A generic call would look like:
|
||||
|
||||
```bash
|
||||
curl -XPOST <AIRFLOW_HOST>/api/v1/openmetadata/enable --data-raw '{"dag_id": "<DAG name>"}' -u "<user>:<password>" --header 'Content-Type: application/json'
|
||||
curl -XPOST <PIPELINE_SERVICE_CLIENT_ENDPOINT>/api/v1/openmetadata/enable --data-raw '{"dag_id": "<DAG name>"}' -u "<user>:<password>" --header 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
Please update it accordingly.
|
||||
@ -239,7 +249,7 @@ Validate the installation, making sure that from the OpenMetadata server you can
|
||||
call to `/health` gives us the proper response:
|
||||
|
||||
```bash
|
||||
$ curl -XGET ${AIRFLOW_HOST}/api/v1/openmetadata/health
|
||||
$ curl -XGET ${PIPELINE_SERVICE_CLIENT_ENDPOINT}/api/v1/openmetadata/health
|
||||
{"status": "healthy", "version": "x.y.z"}
|
||||
```
|
||||
|
||||
|
@ -44,12 +44,12 @@ the following docs to connect using Airflow SDK or with the CLI.
|
||||
{% tile
|
||||
title="Ingest with Airflow"
|
||||
description="Configure the ingestion using Airflow SDK"
|
||||
link="/connectors/database/athena/airflow"
|
||||
link="/connectors/database/mysql/airflow"
|
||||
/ %}
|
||||
{% tile
|
||||
title="Ingest with the CLI"
|
||||
description="Run a one-time ingestion using the metadata CLI"
|
||||
link="/connectors/database/athena/cli"
|
||||
link="/connectors/database/mysql/cli"
|
||||
/ %}
|
||||
|
||||
{% /tilesContainer %}
|
||||
|
@ -0,0 +1,774 @@
|
||||
---
|
||||
title: Run SAP Hana Connector using Airflow SDK
|
||||
slug: /connectors/database/sap-hana/airflow
|
||||
---
|
||||
|
||||
# Run SAP Hana using the Airflow SDK
|
||||
|
||||
{% multiTablesWrapper %}
|
||||
|
||||
| Feature | Status |
|
||||
| :----------------- |:-----------------------------|
|
||||
| Stage | BETA |
|
||||
| Metadata | {% icon iconName="check" /%} |
|
||||
| Query Usage | {% icon iconName="cross" /%} |
|
||||
| Data Profiler | {% icon iconName="check" /%} |
|
||||
| Data Quality | {% icon iconName="check" /%} |
|
||||
| Lineage | Partially via Views |
|
||||
| DBT | {% icon iconName="cross" /%} |
|
||||
|
||||
| Feature | Status |
|
||||
| :----------- | :--------------------------- |
|
||||
| Lineage | Partially via Views |
|
||||
| Table-level | {% icon iconName="check" /%} |
|
||||
| Column-level | {% icon iconName="check" /%} |
|
||||
|
||||
{% /multiTablesWrapper %}
|
||||
|
||||
In this section, we provide guides and references to use the SAP Hana connector.
|
||||
|
||||
Configure and schedule SAP Hana metadata and profiler workflows from the OpenMetadata UI:
|
||||
|
||||
- [Requirements](#requirements)
|
||||
- [Metadata Ingestion](#metadata-ingestion)
|
||||
- [Data Profiler](#data-profiler)
|
||||
- [dbt Integration](#dbt-integration)
|
||||
|
||||
## Requirements
|
||||
|
||||
{%inlineCallout icon="description" bold="OpenMetadata 1.1 or later" href="/deployment"%}
|
||||
To deploy OpenMetadata, check the Deployment guides.
|
||||
{%/inlineCallout%}
|
||||
|
||||
To run the Ingestion via the UI you'll need to use the OpenMetadata Ingestion Container, which comes shipped with
|
||||
custom Airflow plugins to handle the workflow deployment.
|
||||
|
||||
{% note %}
|
||||
The connector is compatible with HANA or HANA express versions since HANA SPS 2.
|
||||
{% /note %}
|
||||
|
||||
### Python Requirements
|
||||
|
||||
To run the SAP Hana ingestion, you will need to install:
|
||||
|
||||
```bash
|
||||
pip3 install "openmetadata-ingestion[sap-hana]"
|
||||
```
|
||||
|
||||
## Metadata Ingestion
|
||||
|
||||
All connectors are defined as JSON Schemas.
|
||||
[Here](https://github.com/open-metadata/OpenMetadata/blob/main/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/database/sapHanaConnection.json)
|
||||
you can find the structure to create a connection to SAP Hana.
|
||||
|
||||
In order to create and run a Metadata Ingestion workflow, we will follow
|
||||
the steps to create a YAML configuration able to connect to the source,
|
||||
process the Entities if needed, and reach the OpenMetadata server.
|
||||
|
||||
The workflow is modeled around the following
|
||||
[JSON Schema](https://github.com/open-metadata/OpenMetadata/blob/main/openmetadata-spec/src/main/resources/json/schema/metadataIngestion/workflow.json)
|
||||
|
||||
### 1. Define the YAML Config
|
||||
|
||||
This is a sample config for SAP Hana:
|
||||
|
||||
{% codePreview %}
|
||||
|
||||
{% codeInfoContainer %}
|
||||
|
||||
#### Source Configuration - Service Connection
|
||||
|
||||
We support two possible connection types:
|
||||
1. **SQL Connection**, where you will the username, password and host.
|
||||
2. **HDB User Store** [connection](https://help.sap.com/docs/SAP_HANA_PLATFORM/b3ee5778bc2e4a089d3299b82ec762a7/dd95ac9dbb571014a7d7f0234d762fdb.html?version=2.0.05&locale=en-US).
|
||||
Note that the HDB Store will need to be locally available to the instance running the ingestion process.
|
||||
If you are unsure about this setting, you can run the ingestion process passing the usual SQL connection details.
|
||||
|
||||
##### SQL Connection
|
||||
|
||||
If using the SQL Connection, inform:
|
||||
|
||||
{% codeInfo srNumber=1 %}
|
||||
|
||||
**hostPort**: Host and port of the SAP Hana service. This should be specified as a string in the format `hostname:port`. E.g., `localhost:39041`, `host.docker.internal:39041`.
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
{% codeInfo srNumber=2 %}
|
||||
|
||||
**username**: Specify the User to connect to SAP Hana. It should have enough privileges to read all the metadata.
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
{% codeInfo srNumber=3 %}
|
||||
|
||||
**password**: Password to connect to SAP Hana.
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
{% codeInfo srNumber=4 %}
|
||||
|
||||
**database**: Optional parameter to connect to a specific database.
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
{% codeInfo srNumber=5 %}
|
||||
|
||||
**databaseSchema**: databaseSchema of the data source. This is an optional parameter, if you would like to restrict the metadata reading to a single schema. When left blank, OpenMetadata Ingestion attempts to scan all the schemas.
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
##### HDB User Store
|
||||
|
||||
If you have a User Store configured, then:
|
||||
|
||||
{% codeInfo srNumber=6 %}
|
||||
|
||||
**userKey**: HDB Store User Key generated from the command `hdbuserstore SET <KEY> <host:port> <USERNAME> <PASSWORD>`.
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
#### Source Configuration - Source Config
|
||||
|
||||
{% codeInfo srNumber=9 %}
|
||||
|
||||
The `sourceConfig` is defined [here](https://github.com/open-metadata/OpenMetadata/blob/main/openmetadata-spec/src/main/resources/json/schema/metadataIngestion/databaseServiceMetadataPipeline.json):
|
||||
|
||||
**markDeletedTables**: To flag tables as soft-deleted if they are not present anymore in the source system.
|
||||
|
||||
**includeTables**: true or false, to ingest table data. Default is true.
|
||||
|
||||
**includeViews**: true or false, to ingest views definitions.
|
||||
|
||||
**databaseFilterPattern**, **schemaFilterPattern**, **tableFilternPattern**: Note that the filter supports regex as include or exclude. You can find examples [here](/connectors/ingestion/workflows/metadata/filter-patterns/database)
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
|
||||
#### Sink Configuration
|
||||
|
||||
{% codeInfo srNumber=10 %}
|
||||
|
||||
To send the metadata to OpenMetadata, it needs to be specified as `type: metadata-rest`.
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
#### Workflow Configuration
|
||||
|
||||
{% codeInfo srNumber=11 %}
|
||||
|
||||
The main property here is the `openMetadataServerConfig`, where you can define the host and security provider of your OpenMetadata installation.
|
||||
|
||||
For a simple, local installation using our docker containers, this looks like:
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
#### Advanced Configuration
|
||||
|
||||
{% codeInfo srNumber=7 %}
|
||||
|
||||
**Connection Options (Optional)**: Enter the details for any additional connection options that can be sent to Athena during the connection. These details must be added as Key-Value pairs.
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
{% codeInfo srNumber=8 %}
|
||||
|
||||
**Connection Arguments (Optional)**: Enter the details for any additional connection arguments such as security or protocol configs that can be sent to Athena during the connection. These details must be added as Key-Value pairs.
|
||||
|
||||
- In case you are using Single-Sign-On (SSO) for authentication, add the `authenticator` details in the Connection Arguments as a Key-Value pair as follows: `"authenticator" : "sso_login_url"`
|
||||
- In case you authenticate with SSO using an external browser popup, then add the `authenticator` details in the Connection Arguments as a Key-Value pair as follows: `"authenticator" : "externalbrowser"`
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
{% /codeInfoContainer %}
|
||||
|
||||
{% codeBlock fileName="filename.yaml" %}
|
||||
|
||||
```yaml
|
||||
source:
|
||||
type: sapHana
|
||||
serviceName: <service name>
|
||||
serviceConnection:
|
||||
config:
|
||||
type: SapHana
|
||||
connection:
|
||||
```
|
||||
```yaml {% srNumber=1 %}
|
||||
## Parameters for the SQL Connection
|
||||
# hostPort: <hostPort>
|
||||
```
|
||||
```yaml {% srNumber=2 %}
|
||||
# username: <username>
|
||||
```
|
||||
```yaml {% srNumber=3 %}
|
||||
# password: <password>
|
||||
```
|
||||
```yaml {% srNumber=4 %}
|
||||
# database: <database>
|
||||
```
|
||||
```yaml {% srNumber=5 %}
|
||||
# databaseSchema: <schema>
|
||||
```
|
||||
```yaml {% srNumber=6 %}
|
||||
## Parameter for the HDB User Store
|
||||
# userKey: <key>
|
||||
```
|
||||
```yaml {% srNumber=7 %}
|
||||
# connectionOptions:
|
||||
# key: value
|
||||
```
|
||||
```yaml {% srNumber=8 %}
|
||||
# connectionArguments:
|
||||
# key: value
|
||||
```
|
||||
```yaml {% srNumber=9 %}
|
||||
sourceConfig:
|
||||
config:
|
||||
type: DatabaseMetadata
|
||||
markDeletedTables: true
|
||||
includeTables: true
|
||||
includeViews: true
|
||||
# includeTags: true
|
||||
# databaseFilterPattern:
|
||||
# includes:
|
||||
# - database1
|
||||
# - database2
|
||||
# excludes:
|
||||
# - database3
|
||||
# - database4
|
||||
# schemaFilterPattern:
|
||||
# includes:
|
||||
# - schema1
|
||||
# - schema2
|
||||
# excludes:
|
||||
# - schema3
|
||||
# - schema4
|
||||
# tableFilterPattern:
|
||||
# includes:
|
||||
# - table1
|
||||
# - table2
|
||||
# excludes:
|
||||
# - table3
|
||||
# - table4
|
||||
```
|
||||
|
||||
```yaml {% srNumber=10 %}
|
||||
sink:
|
||||
type: metadata-rest
|
||||
config: {}
|
||||
```
|
||||
|
||||
```yaml {% srNumber=11 %}
|
||||
workflowConfig:
|
||||
# loggerLevel: DEBUG # DEBUG, INFO, WARN or ERROR
|
||||
openMetadataServerConfig:
|
||||
hostPort: "<OpenMetadata host and port>"
|
||||
authProvider: "<OpenMetadata auth provider>"
|
||||
```
|
||||
|
||||
{% /codeBlock %}
|
||||
|
||||
{% /codePreview %}
|
||||
|
||||
|
||||
### Workflow Configs for Security Provider
|
||||
|
||||
We support different security providers. You can find their definitions [here](https://github.com/open-metadata/OpenMetadata/tree/main/openmetadata-spec/src/main/resources/json/schema/security/client).
|
||||
|
||||
## Openmetadata JWT Auth
|
||||
|
||||
- JWT tokens will allow your clients to authenticate against the OpenMetadata server. To enable JWT Tokens, you will get more details [here](/deployment/security/enable-jwt-tokens).
|
||||
|
||||
```yaml
|
||||
workflowConfig:
|
||||
openMetadataServerConfig:
|
||||
hostPort: "http://localhost:8585/api"
|
||||
authProvider: openmetadata
|
||||
securityConfig:
|
||||
jwtToken: "{bot_jwt_token}"
|
||||
```
|
||||
|
||||
- You can refer to the JWT Troubleshooting section [link](/deployment/security/jwt-troubleshooting) for any issues in your JWT configuration. If you need information on configuring the ingestion with other security providers in your bots, you can follow this doc [link](/deployment/security/workflow-config-auth).
|
||||
|
||||
|
||||
### 2. Prepare the Ingestion DAG
|
||||
|
||||
Create a Python file in your Airflow DAGs directory with the following contents:
|
||||
|
||||
{% codePreview %}
|
||||
|
||||
{% codeInfoContainer %}
|
||||
|
||||
|
||||
{% codeInfo srNumber=10 %}
|
||||
|
||||
#### Import necessary modules
|
||||
|
||||
The `Workflow` class that is being imported is a part of a metadata ingestion framework, which defines a process of getting data from different sources and ingesting it into a central metadata repository.
|
||||
|
||||
Here we are also importing all the basic requirements to parse YAMLs, handle dates and build our DAG.
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
{% codeInfo srNumber=11 %}
|
||||
|
||||
**Default arguments for all tasks in the Airflow DAG.**
|
||||
|
||||
- Default arguments dictionary contains default arguments for tasks in the DAG, including the owner's name, email address, number of retries, retry delay, and execution timeout.
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
{% codeInfo srNumber=12 %}
|
||||
|
||||
- **config**: Specifies config for the metadata ingestion as we prepare above.
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
{% codeInfo srNumber=13 %}
|
||||
|
||||
- **metadata_ingestion_workflow()**: This code defines a function `metadata_ingestion_workflow()` that loads a YAML configuration, creates a `Workflow` object, executes the workflow, checks its status, prints the status to the console, and stops the workflow.
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
{% codeInfo srNumber=14 %}
|
||||
|
||||
- **DAG**: creates a DAG using the Airflow framework, and tune the DAG configurations to whatever fits with your requirements
|
||||
- For more Airflow DAGs creation details visit [here](https://airflow.apache.org/docs/apache-airflow/stable/core-concepts/dags.html#declaring-a-dag).
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
Note that from connector to connector, this recipe will always be the same.
|
||||
By updating the `YAML configuration`, you will be able to extract metadata from different sources.
|
||||
|
||||
{% /codeInfoContainer %}
|
||||
|
||||
{% codeBlock fileName="filename.py" %}
|
||||
|
||||
```python {% srNumber=10 %}
|
||||
import pathlib
|
||||
import yaml
|
||||
from datetime import timedelta
|
||||
from airflow import DAG
|
||||
from metadata.config.common import load_config_file
|
||||
from metadata.ingestion.api.workflow import Workflow
|
||||
from airflow.utils.dates import days_ago
|
||||
|
||||
try:
|
||||
from airflow.operators.python import PythonOperator
|
||||
except ModuleNotFoundError:
|
||||
from airflow.operators.python_operator import PythonOperator
|
||||
|
||||
|
||||
```
|
||||
|
||||
```python {% srNumber=11 %}
|
||||
default_args = {
|
||||
"owner": "user_name",
|
||||
"email": ["username@org.com"],
|
||||
"email_on_failure": False,
|
||||
"retries": 3,
|
||||
"retry_delay": timedelta(minutes=5),
|
||||
"execution_timeout": timedelta(minutes=60)
|
||||
}
|
||||
|
||||
|
||||
```
|
||||
|
||||
```python {% srNumber=12 %}
|
||||
config = """
|
||||
<your YAML configuration>
|
||||
"""
|
||||
|
||||
|
||||
```
|
||||
|
||||
```python {% srNumber=13 %}
|
||||
def metadata_ingestion_workflow():
|
||||
workflow_config = yaml.safe_load(config)
|
||||
workflow = Workflow.create(workflow_config)
|
||||
workflow.execute()
|
||||
workflow.raise_from_status()
|
||||
workflow.print_status()
|
||||
workflow.stop()
|
||||
|
||||
|
||||
```
|
||||
|
||||
```python {% srNumber=14 %}
|
||||
with DAG(
|
||||
"sample_data",
|
||||
default_args=default_args,
|
||||
description="An example DAG which runs a OpenMetadata ingestion workflow",
|
||||
start_date=days_ago(1),
|
||||
is_paused_upon_creation=False,
|
||||
schedule_interval='*/5 * * * *',
|
||||
catchup=False,
|
||||
) as dag:
|
||||
ingest_task = PythonOperator(
|
||||
task_id="ingest_using_recipe",
|
||||
python_callable=metadata_ingestion_workflow,
|
||||
)
|
||||
|
||||
|
||||
```
|
||||
|
||||
{% /codeBlock %}
|
||||
|
||||
{% /codePreview %}
|
||||
|
||||
|
||||
## Data Profiler
|
||||
|
||||
The Data Profiler workflow will be using the `orm-profiler` processor.
|
||||
|
||||
After running a Metadata Ingestion workflow, we can run Data Profiler workflow.
|
||||
While the `serviceName` will be the same to that was used in Metadata Ingestion, so the ingestion bot can get the `serviceConnection` details from the server.
|
||||
|
||||
|
||||
### 1. Define the YAML Config
|
||||
|
||||
This is a sample config for the profiler:
|
||||
|
||||
{% codePreview %}
|
||||
|
||||
{% codeInfoContainer %}
|
||||
|
||||
{% codeInfo srNumber=15 %}
|
||||
#### Source Configuration - Source Config
|
||||
|
||||
You can find all the definitions and types for the `sourceConfig` [here](https://github.com/open-metadata/OpenMetadata/blob/main/openmetadata-spec/src/main/resources/json/schema/metadataIngestion/databaseServiceProfilerPipeline.json).
|
||||
|
||||
**generateSampleData**: Option to turn on/off generating sample data.
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
{% codeInfo srNumber=16 %}
|
||||
|
||||
**profileSample**: Percentage of data or no. of rows we want to execute the profiler and tests on.
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
{% codeInfo srNumber=17 %}
|
||||
|
||||
**threadCount**: Number of threads to use during metric computations.
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
{% codeInfo srNumber=18 %}
|
||||
|
||||
**processPiiSensitive**: Optional configuration to automatically tag columns that might contain sensitive information.
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
{% codeInfo srNumber=19 %}
|
||||
|
||||
**confidence**: Set the Confidence value for which you want the column to be marked
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
|
||||
{% codeInfo srNumber=20 %}
|
||||
|
||||
**timeoutSeconds**: Profiler Timeout in Seconds
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
{% codeInfo srNumber=21 %}
|
||||
|
||||
**databaseFilterPattern**: Regex to only fetch databases that matches the pattern.
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
{% codeInfo srNumber=22 %}
|
||||
|
||||
**schemaFilterPattern**: Regex to only fetch tables or databases that matches the pattern.
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
{% codeInfo srNumber=23 %}
|
||||
|
||||
**tableFilterPattern**: Regex to only fetch tables or databases that matches the pattern.
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
{% codeInfo srNumber=24 %}
|
||||
|
||||
#### Processor Configuration
|
||||
|
||||
Choose the `orm-profiler`. Its config can also be updated to define tests from the YAML itself instead of the UI:
|
||||
|
||||
**tableConfig**: `tableConfig` allows you to set up some configuration at the table level.
|
||||
{% /codeInfo %}
|
||||
|
||||
|
||||
{% codeInfo srNumber=25 %}
|
||||
|
||||
#### Sink Configuration
|
||||
|
||||
To send the metadata to OpenMetadata, it needs to be specified as `type: metadata-rest`.
|
||||
{% /codeInfo %}
|
||||
|
||||
|
||||
{% codeInfo srNumber=26 %}
|
||||
|
||||
#### Workflow Configuration
|
||||
|
||||
The main property here is the `openMetadataServerConfig`, where you can define the host and security provider of your OpenMetadata installation.
|
||||
|
||||
For a simple, local installation using our docker containers, this looks like:
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
{% /codeInfoContainer %}
|
||||
|
||||
{% codeBlock fileName="filename.yaml" %}
|
||||
|
||||
|
||||
```yaml
|
||||
source:
|
||||
type: sapHana
|
||||
serviceName: <service name>
|
||||
sourceConfig:
|
||||
config:
|
||||
type: Profiler
|
||||
```
|
||||
|
||||
```yaml {% srNumber=15 %}
|
||||
generateSampleData: true
|
||||
```
|
||||
```yaml {% srNumber=16 %}
|
||||
# profileSample: 85
|
||||
```
|
||||
```yaml {% srNumber=17 %}
|
||||
# threadCount: 5
|
||||
```
|
||||
```yaml {% srNumber=18 %}
|
||||
processPiiSensitive: false
|
||||
```
|
||||
```yaml {% srNumber=19 %}
|
||||
# confidence: 80
|
||||
```
|
||||
```yaml {% srNumber=20 %}
|
||||
# timeoutSeconds: 43200
|
||||
```
|
||||
```yaml {% srNumber=21 %}
|
||||
# databaseFilterPattern:
|
||||
# includes:
|
||||
# - database1
|
||||
# - database2
|
||||
# excludes:
|
||||
# - database3
|
||||
# - database4
|
||||
```
|
||||
```yaml {% srNumber=22 %}
|
||||
# schemaFilterPattern:
|
||||
# includes:
|
||||
# - schema1
|
||||
# - schema2
|
||||
# excludes:
|
||||
# - schema3
|
||||
# - schema4
|
||||
```
|
||||
```yaml {% srNumber=23 %}
|
||||
# tableFilterPattern:
|
||||
# includes:
|
||||
# - table1
|
||||
# - table2
|
||||
# excludes:
|
||||
# - table3
|
||||
# - table4
|
||||
```
|
||||
|
||||
```yaml {% srNumber=24 %}
|
||||
processor:
|
||||
type: orm-profiler
|
||||
config: {} # Remove braces if adding properties
|
||||
# tableConfig:
|
||||
# - fullyQualifiedName: <table fqn>
|
||||
# profileSample: <number between 0 and 99> # default
|
||||
|
||||
# profileSample: <number between 0 and 99> # default will be 100 if omitted
|
||||
# profileQuery: <query to use for sampling data for the profiler>
|
||||
# columnConfig:
|
||||
# excludeColumns:
|
||||
# - <column name>
|
||||
# includeColumns:
|
||||
# - columnName: <column name>
|
||||
# - metrics:
|
||||
# - MEAN
|
||||
# - MEDIAN
|
||||
# - ...
|
||||
# partitionConfig:
|
||||
# enablePartitioning: <set to true to use partitioning>
|
||||
# partitionColumnName: <partition column name. Must be a timestamp or datetime/date field type>
|
||||
# partitionInterval: <partition interval>
|
||||
# partitionIntervalUnit: <YEAR, MONTH, DAY, HOUR>
|
||||
|
||||
```
|
||||
|
||||
```yaml {% srNumber=25 %}
|
||||
sink:
|
||||
type: metadata-rest
|
||||
config: {}
|
||||
```
|
||||
|
||||
```yaml {% srNumber=26 %}
|
||||
workflowConfig:
|
||||
# loggerLevel: DEBUG # DEBUG, INFO, WARN or ERROR
|
||||
openMetadataServerConfig:
|
||||
hostPort: <OpenMetadata host and port>
|
||||
authProvider: <OpenMetadata auth provider>
|
||||
```
|
||||
|
||||
{% /codeBlock %}
|
||||
|
||||
{% /codePreview %}
|
||||
|
||||
- You can learn more about how to configure and run the Profiler Workflow to extract Profiler data and execute the Data Quality from [here](/connectors/ingestion/workflows/profiler)
|
||||
|
||||
|
||||
|
||||
### 2. Prepare the Profiler DAG
|
||||
|
||||
Here, we follow a similar approach as with the metadata and usage pipelines, although we will use a different Workflow class:
|
||||
|
||||
|
||||
|
||||
|
||||
{% codePreview %}
|
||||
|
||||
{% codeInfoContainer %}
|
||||
|
||||
{% codeInfo srNumber=27 %}
|
||||
|
||||
#### Import necessary modules
|
||||
|
||||
The `ProfilerWorkflow` class that is being imported is a part of a metadata orm_profiler framework, which defines a process of extracting Profiler data.
|
||||
|
||||
Here we are also importing all the basic requirements to parse YAMLs, handle dates and build our DAG.
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
{% codeInfo srNumber=28 %}
|
||||
|
||||
**Default arguments for all tasks in the Airflow DAG.**
|
||||
- Default arguments dictionary contains default arguments for tasks in the DAG, including the owner's name, email address, number of retries, retry delay, and execution timeout.
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
|
||||
{% codeInfo srNumber=29 %}
|
||||
|
||||
- **config**: Specifies config for the profiler as we prepare above.
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
{% codeInfo srNumber=30 %}
|
||||
|
||||
- **metadata_ingestion_workflow()**: This code defines a function `metadata_ingestion_workflow()` that loads a YAML configuration, creates a `ProfilerWorkflow` object, executes the workflow, checks its status, prints the status to the console, and stops the workflow.
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
{% codeInfo srNumber=31 %}
|
||||
|
||||
- **DAG**: creates a DAG using the Airflow framework, and tune the DAG configurations to whatever fits with your requirements
|
||||
- For more Airflow DAGs creation details visit [here](https://airflow.apache.org/docs/apache-airflow/stable/core-concepts/dags.html#declaring-a-dag).
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
{% /codeInfoContainer %}
|
||||
|
||||
{% codeBlock fileName="filename.py" %}
|
||||
|
||||
```python {% srNumber=27 %}
|
||||
import yaml
|
||||
from datetime import timedelta
|
||||
from airflow import DAG
|
||||
from metadata.profiler.api.workflow import ProfilerWorkflow
|
||||
|
||||
try:
|
||||
from airflow.operators.python import PythonOperator
|
||||
except ModuleNotFoundError:
|
||||
from airflow.operators.python_operator import PythonOperator
|
||||
|
||||
from airflow.utils.dates import days_ago
|
||||
|
||||
|
||||
```
|
||||
```python {% srNumber=28 %}
|
||||
default_args = {
|
||||
"owner": "user_name",
|
||||
"email_on_failure": False,
|
||||
"retries": 3,
|
||||
"retry_delay": timedelta(seconds=10),
|
||||
"execution_timeout": timedelta(minutes=60),
|
||||
}
|
||||
|
||||
|
||||
```
|
||||
|
||||
```python {% srNumber=29 %}
|
||||
config = """
|
||||
<your YAML configuration>
|
||||
"""
|
||||
|
||||
|
||||
```
|
||||
|
||||
```python {% srNumber=30 %}
|
||||
def metadata_ingestion_workflow():
|
||||
workflow_config = yaml.safe_load(config)
|
||||
workflow = ProfilerWorkflow.create(workflow_config)
|
||||
workflow.execute()
|
||||
workflow.raise_from_status()
|
||||
workflow.print_status()
|
||||
workflow.stop()
|
||||
|
||||
|
||||
```
|
||||
|
||||
```python {% srNumber=31 %}
|
||||
with DAG(
|
||||
"profiler_example",
|
||||
default_args=default_args,
|
||||
description="An example DAG which runs a OpenMetadata ingestion workflow",
|
||||
start_date=days_ago(1),
|
||||
is_paused_upon_creation=False,
|
||||
catchup=False,
|
||||
) as dag:
|
||||
ingest_task = PythonOperator(
|
||||
task_id="profile_and_test_using_recipe",
|
||||
python_callable=metadata_ingestion_workflow,
|
||||
)
|
||||
|
||||
|
||||
```
|
||||
|
||||
{% /codeBlock %}
|
||||
|
||||
{% /codePreview %}
|
||||
|
||||
|
||||
## dbt Integration
|
||||
|
||||
{% tilesContainer %}
|
||||
|
||||
{% tile
|
||||
icon="mediation"
|
||||
title="dbt Integration"
|
||||
description="Learn more about how to ingest dbt models' definitions and their lineage."
|
||||
link="/connectors/ingestion/workflows/dbt" /%}
|
||||
|
||||
{% /tilesContainer %}
|
||||
|
||||
## Related
|
||||
|
||||
{% tilesContainer %}
|
||||
|
||||
{% tile
|
||||
title="Ingest with the CLI"
|
||||
description="Run a one-time ingestion using the metadata CLI"
|
||||
link="/connectors/database/sap-hana/cli"
|
||||
/ %}
|
||||
|
||||
{% /tilesContainer %}
|
@ -0,0 +1,546 @@
|
||||
---
|
||||
title: Run SAP Hana Connector using the CLI
|
||||
slug: /connectors/database/sap-hana/cli
|
||||
---
|
||||
|
||||
# Run SAP Hana using the metadata CLI
|
||||
|
||||
{% multiTablesWrapper %}
|
||||
|
||||
| Feature | Status |
|
||||
| :----------------- |:-----------------------------|
|
||||
| Stage | BETA |
|
||||
| Metadata | {% icon iconName="check" /%} |
|
||||
| Query Usage | {% icon iconName="cross" /%} |
|
||||
| Data Profiler | {% icon iconName="check" /%} |
|
||||
| Data Quality | {% icon iconName="check" /%} |
|
||||
| Lineage | Partially via Views |
|
||||
| DBT | {% icon iconName="cross" /%} |
|
||||
|
||||
| Feature | Status |
|
||||
| :----------- | :--------------------------- |
|
||||
| Lineage | Partially via Views |
|
||||
| Table-level | {% icon iconName="check" /%} |
|
||||
| Column-level | {% icon iconName="check" /%} |
|
||||
|
||||
{% /multiTablesWrapper %}
|
||||
|
||||
In this section, we provide guides and references to use the SAP Hana connector.
|
||||
|
||||
Configure and schedule SAP Hana metadata and profiler workflows from the OpenMetadata UI:
|
||||
|
||||
- [Requirements](#requirements)
|
||||
- [Metadata Ingestion](#metadata-ingestion)
|
||||
- [Data Profiler](#data-profiler)
|
||||
- [dbt Integration](#dbt-integration)
|
||||
|
||||
## Requirements
|
||||
|
||||
{%inlineCallout icon="description" bold="OpenMetadata 1.1 or later" href="/deployment"%}
|
||||
To deploy OpenMetadata, check the Deployment guides.
|
||||
{%/inlineCallout%}
|
||||
|
||||
To run the Ingestion via the UI you'll need to use the OpenMetadata Ingestion Container, which comes shipped with
|
||||
custom Airflow plugins to handle the workflow deployment.
|
||||
|
||||
{% note %}
|
||||
The connector is compatible with HANA or HANA express versions since HANA SPS 2.
|
||||
{% /note %}
|
||||
|
||||
### Python Requirements
|
||||
|
||||
To run the SAP Hana ingestion, you will need to install:
|
||||
|
||||
```bash
|
||||
pip3 install "openmetadata-ingestion[sap-hana]"
|
||||
```
|
||||
|
||||
## Metadata Ingestion
|
||||
|
||||
All connectors are defined as JSON Schemas.
|
||||
[Here](https://github.com/open-metadata/OpenMetadata/blob/main/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/database/sapHanaConnection.json)
|
||||
you can find the structure to create a connection to SAP Hana.
|
||||
|
||||
In order to create and run a Metadata Ingestion workflow, we will follow
|
||||
the steps to create a YAML configuration able to connect to the source,
|
||||
process the Entities if needed, and reach the OpenMetadata server.
|
||||
|
||||
The workflow is modeled around the following
|
||||
[JSON Schema](https://github.com/open-metadata/OpenMetadata/blob/main/openmetadata-spec/src/main/resources/json/schema/metadataIngestion/workflow.json)
|
||||
|
||||
### 1. Define the YAML Config
|
||||
|
||||
This is a sample config for SAP Hana:
|
||||
|
||||
{% codePreview %}
|
||||
|
||||
{% codeInfoContainer %}
|
||||
|
||||
#### Source Configuration - Service Connection
|
||||
|
||||
We support two possible connection types:
|
||||
1. **SQL Connection**, where you will the username, password and host.
|
||||
2. **HDB User Store** [connection](https://help.sap.com/docs/SAP_HANA_PLATFORM/b3ee5778bc2e4a089d3299b82ec762a7/dd95ac9dbb571014a7d7f0234d762fdb.html?version=2.0.05&locale=en-US).
|
||||
Note that the HDB Store will need to be locally available to the instance running the ingestion process.
|
||||
If you are unsure about this setting, you can run the ingestion process passing the usual SQL connection details.
|
||||
|
||||
##### SQL Connection
|
||||
|
||||
If using the SQL Connection, inform:
|
||||
|
||||
{% codeInfo srNumber=1 %}
|
||||
|
||||
**hostPort**: Host and port of the SAP Hana service. This should be specified as a string in the format `hostname:port`. E.g., `localhost:39041`, `host.docker.internal:39041`.
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
{% codeInfo srNumber=2 %}
|
||||
|
||||
**username**: Specify the User to connect to SAP Hana. It should have enough privileges to read all the metadata.
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
{% codeInfo srNumber=3 %}
|
||||
|
||||
**password**: Password to connect to SAP Hana.
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
{% codeInfo srNumber=4 %}
|
||||
|
||||
**database**: Optional parameter to connect to a specific database.
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
{% codeInfo srNumber=5 %}
|
||||
|
||||
**databaseSchema**: databaseSchema of the data source. This is an optional parameter, if you would like to restrict the metadata reading to a single schema. When left blank, OpenMetadata Ingestion attempts to scan all the schemas.
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
##### HDB User Store
|
||||
|
||||
If you have a User Store configured, then:
|
||||
|
||||
{% codeInfo srNumber=6 %}
|
||||
|
||||
**userKey**: HDB Store User Key generated from the command `hdbuserstore SET <KEY> <host:port> <USERNAME> <PASSWORD>`.
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
#### Source Configuration - Source Config
|
||||
|
||||
{% codeInfo srNumber=9 %}
|
||||
|
||||
The `sourceConfig` is defined [here](https://github.com/open-metadata/OpenMetadata/blob/main/openmetadata-spec/src/main/resources/json/schema/metadataIngestion/databaseServiceMetadataPipeline.json):
|
||||
|
||||
**markDeletedTables**: To flag tables as soft-deleted if they are not present anymore in the source system.
|
||||
|
||||
**includeTables**: true or false, to ingest table data. Default is true.
|
||||
|
||||
**includeViews**: true or false, to ingest views definitions.
|
||||
|
||||
**databaseFilterPattern**, **schemaFilterPattern**, **tableFilternPattern**: Note that the filter supports regex as include or exclude. You can find examples [here](/connectors/ingestion/workflows/metadata/filter-patterns/database)
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
|
||||
#### Sink Configuration
|
||||
|
||||
{% codeInfo srNumber=10 %}
|
||||
|
||||
To send the metadata to OpenMetadata, it needs to be specified as `type: metadata-rest`.
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
#### Workflow Configuration
|
||||
|
||||
{% codeInfo srNumber=11 %}
|
||||
|
||||
The main property here is the `openMetadataServerConfig`, where you can define the host and security provider of your OpenMetadata installation.
|
||||
|
||||
For a simple, local installation using our docker containers, this looks like:
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
#### Advanced Configuration
|
||||
|
||||
{% codeInfo srNumber=7 %}
|
||||
|
||||
**Connection Options (Optional)**: Enter the details for any additional connection options that can be sent to Athena during the connection. These details must be added as Key-Value pairs.
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
{% codeInfo srNumber=8 %}
|
||||
|
||||
**Connection Arguments (Optional)**: Enter the details for any additional connection arguments such as security or protocol configs that can be sent to Athena during the connection. These details must be added as Key-Value pairs.
|
||||
|
||||
- In case you are using Single-Sign-On (SSO) for authentication, add the `authenticator` details in the Connection Arguments as a Key-Value pair as follows: `"authenticator" : "sso_login_url"`
|
||||
- In case you authenticate with SSO using an external browser popup, then add the `authenticator` details in the Connection Arguments as a Key-Value pair as follows: `"authenticator" : "externalbrowser"`
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
{% /codeInfoContainer %}
|
||||
|
||||
{% codeBlock fileName="filename.yaml" %}
|
||||
|
||||
```yaml
|
||||
source:
|
||||
type: sapHana
|
||||
serviceName: <service name>
|
||||
serviceConnection:
|
||||
config:
|
||||
type: SapHana
|
||||
connection:
|
||||
```
|
||||
```yaml {% srNumber=1 %}
|
||||
## Parameters for the SQL Connection
|
||||
# hostPort: <hostPort>
|
||||
```
|
||||
```yaml {% srNumber=2 %}
|
||||
# username: <username>
|
||||
```
|
||||
```yaml {% srNumber=3 %}
|
||||
# password: <password>
|
||||
```
|
||||
```yaml {% srNumber=4 %}
|
||||
# database: <database>
|
||||
```
|
||||
```yaml {% srNumber=5 %}
|
||||
# databaseSchema: <schema>
|
||||
```
|
||||
```yaml {% srNumber=6 %}
|
||||
## Parameter for the HDB User Store
|
||||
# userKey: <key>
|
||||
```
|
||||
```yaml {% srNumber=7 %}
|
||||
# connectionOptions:
|
||||
# key: value
|
||||
```
|
||||
```yaml {% srNumber=8 %}
|
||||
# connectionArguments:
|
||||
# key: value
|
||||
```
|
||||
```yaml {% srNumber=9 %}
|
||||
sourceConfig:
|
||||
config:
|
||||
type: DatabaseMetadata
|
||||
markDeletedTables: true
|
||||
includeTables: true
|
||||
includeViews: true
|
||||
# includeTags: true
|
||||
# databaseFilterPattern:
|
||||
# includes:
|
||||
# - database1
|
||||
# - database2
|
||||
# excludes:
|
||||
# - database3
|
||||
# - database4
|
||||
# schemaFilterPattern:
|
||||
# includes:
|
||||
# - schema1
|
||||
# - schema2
|
||||
# excludes:
|
||||
# - schema3
|
||||
# - schema4
|
||||
# tableFilterPattern:
|
||||
# includes:
|
||||
# - table1
|
||||
# - table2
|
||||
# excludes:
|
||||
# - table3
|
||||
# - table4
|
||||
```
|
||||
|
||||
```yaml {% srNumber=10 %}
|
||||
sink:
|
||||
type: metadata-rest
|
||||
config: {}
|
||||
```
|
||||
|
||||
```yaml {% srNumber=11 %}
|
||||
workflowConfig:
|
||||
# loggerLevel: DEBUG # DEBUG, INFO, WARN or ERROR
|
||||
openMetadataServerConfig:
|
||||
hostPort: "<OpenMetadata host and port>"
|
||||
authProvider: "<OpenMetadata auth provider>"
|
||||
```
|
||||
|
||||
{% /codeBlock %}
|
||||
|
||||
{% /codePreview %}
|
||||
|
||||
|
||||
### Workflow Configs for Security Provider
|
||||
|
||||
We support different security providers. You can find their definitions [here](https://github.com/open-metadata/OpenMetadata/tree/main/openmetadata-spec/src/main/resources/json/schema/security/client).
|
||||
|
||||
## Openmetadata JWT Auth
|
||||
|
||||
- JWT tokens will allow your clients to authenticate against the OpenMetadata server. To enable JWT Tokens, you will get more details [here](/deployment/security/enable-jwt-tokens).
|
||||
|
||||
```yaml
|
||||
workflowConfig:
|
||||
openMetadataServerConfig:
|
||||
hostPort: "http://localhost:8585/api"
|
||||
authProvider: openmetadata
|
||||
securityConfig:
|
||||
jwtToken: "{bot_jwt_token}"
|
||||
```
|
||||
|
||||
- You can refer to the JWT Troubleshooting section [link](/deployment/security/jwt-troubleshooting) for any issues in your JWT configuration. If you need information on configuring the ingestion with other security providers in your bots, you can follow this doc [link](/deployment/security/workflow-config-auth).
|
||||
|
||||
|
||||
### 2. Run with the CLI
|
||||
|
||||
First, we will need to save the YAML file. Afterward, and with all requirements installed, we can run:
|
||||
|
||||
```bash
|
||||
metadata ingest -c <path-to-yaml>
|
||||
```
|
||||
|
||||
Note that from connector to connector, this recipe will always be the same. By updating the YAML configuration,
|
||||
you will be able to extract metadata from different sources.
|
||||
|
||||
## Data Profiler
|
||||
|
||||
The Data Profiler workflow will be using the `orm-profiler` processor.
|
||||
|
||||
After running a Metadata Ingestion workflow, we can run Data Profiler workflow.
|
||||
While the `serviceName` will be the same to that was used in Metadata Ingestion, so the ingestion bot can get the `serviceConnection` details from the server.
|
||||
|
||||
|
||||
### 1. Define the YAML Config
|
||||
|
||||
This is a sample config for the profiler:
|
||||
|
||||
{% codePreview %}
|
||||
|
||||
{% codeInfoContainer %}
|
||||
|
||||
{% codeInfo srNumber=15 %}
|
||||
#### Source Configuration - Source Config
|
||||
|
||||
You can find all the definitions and types for the `sourceConfig` [here](https://github.com/open-metadata/OpenMetadata/blob/main/openmetadata-spec/src/main/resources/json/schema/metadataIngestion/databaseServiceProfilerPipeline.json).
|
||||
|
||||
**generateSampleData**: Option to turn on/off generating sample data.
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
{% codeInfo srNumber=16 %}
|
||||
|
||||
**profileSample**: Percentage of data or no. of rows we want to execute the profiler and tests on.
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
{% codeInfo srNumber=17 %}
|
||||
|
||||
**threadCount**: Number of threads to use during metric computations.
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
{% codeInfo srNumber=18 %}
|
||||
|
||||
**processPiiSensitive**: Optional configuration to automatically tag columns that might contain sensitive information.
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
{% codeInfo srNumber=19 %}
|
||||
|
||||
**confidence**: Set the Confidence value for which you want the column to be marked
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
|
||||
{% codeInfo srNumber=20 %}
|
||||
|
||||
**timeoutSeconds**: Profiler Timeout in Seconds
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
{% codeInfo srNumber=21 %}
|
||||
|
||||
**databaseFilterPattern**: Regex to only fetch databases that matches the pattern.
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
{% codeInfo srNumber=22 %}
|
||||
|
||||
**schemaFilterPattern**: Regex to only fetch tables or databases that matches the pattern.
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
{% codeInfo srNumber=23 %}
|
||||
|
||||
**tableFilterPattern**: Regex to only fetch tables or databases that matches the pattern.
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
{% codeInfo srNumber=24 %}
|
||||
|
||||
#### Processor Configuration
|
||||
|
||||
Choose the `orm-profiler`. Its config can also be updated to define tests from the YAML itself instead of the UI:
|
||||
|
||||
**tableConfig**: `tableConfig` allows you to set up some configuration at the table level.
|
||||
{% /codeInfo %}
|
||||
|
||||
|
||||
{% codeInfo srNumber=25 %}
|
||||
|
||||
#### Sink Configuration
|
||||
|
||||
To send the metadata to OpenMetadata, it needs to be specified as `type: metadata-rest`.
|
||||
{% /codeInfo %}
|
||||
|
||||
|
||||
{% codeInfo srNumber=26 %}
|
||||
|
||||
#### Workflow Configuration
|
||||
|
||||
The main property here is the `openMetadataServerConfig`, where you can define the host and security provider of your OpenMetadata installation.
|
||||
|
||||
For a simple, local installation using our docker containers, this looks like:
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
{% /codeInfoContainer %}
|
||||
|
||||
{% codeBlock fileName="filename.yaml" %}
|
||||
|
||||
|
||||
```yaml
|
||||
source:
|
||||
type: sapHana
|
||||
serviceName: <service name>
|
||||
sourceConfig:
|
||||
config:
|
||||
type: Profiler
|
||||
```
|
||||
|
||||
```yaml {% srNumber=15 %}
|
||||
generateSampleData: true
|
||||
```
|
||||
```yaml {% srNumber=16 %}
|
||||
# profileSample: 85
|
||||
```
|
||||
```yaml {% srNumber=17 %}
|
||||
# threadCount: 5
|
||||
```
|
||||
```yaml {% srNumber=18 %}
|
||||
processPiiSensitive: false
|
||||
```
|
||||
```yaml {% srNumber=19 %}
|
||||
# confidence: 80
|
||||
```
|
||||
```yaml {% srNumber=20 %}
|
||||
# timeoutSeconds: 43200
|
||||
```
|
||||
```yaml {% srNumber=21 %}
|
||||
# databaseFilterPattern:
|
||||
# includes:
|
||||
# - database1
|
||||
# - database2
|
||||
# excludes:
|
||||
# - database3
|
||||
# - database4
|
||||
```
|
||||
```yaml {% srNumber=22 %}
|
||||
# schemaFilterPattern:
|
||||
# includes:
|
||||
# - schema1
|
||||
# - schema2
|
||||
# excludes:
|
||||
# - schema3
|
||||
# - schema4
|
||||
```
|
||||
```yaml {% srNumber=23 %}
|
||||
# tableFilterPattern:
|
||||
# includes:
|
||||
# - table1
|
||||
# - table2
|
||||
# excludes:
|
||||
# - table3
|
||||
# - table4
|
||||
```
|
||||
|
||||
```yaml {% srNumber=24 %}
|
||||
processor:
|
||||
type: orm-profiler
|
||||
config: {} # Remove braces if adding properties
|
||||
# tableConfig:
|
||||
# - fullyQualifiedName: <table fqn>
|
||||
# profileSample: <number between 0 and 99> # default
|
||||
|
||||
# profileSample: <number between 0 and 99> # default will be 100 if omitted
|
||||
# profileQuery: <query to use for sampling data for the profiler>
|
||||
# columnConfig:
|
||||
# excludeColumns:
|
||||
# - <column name>
|
||||
# includeColumns:
|
||||
# - columnName: <column name>
|
||||
# - metrics:
|
||||
# - MEAN
|
||||
# - MEDIAN
|
||||
# - ...
|
||||
# partitionConfig:
|
||||
# enablePartitioning: <set to true to use partitioning>
|
||||
# partitionColumnName: <partition column name. Must be a timestamp or datetime/date field type>
|
||||
# partitionInterval: <partition interval>
|
||||
# partitionIntervalUnit: <YEAR, MONTH, DAY, HOUR>
|
||||
|
||||
```
|
||||
|
||||
```yaml {% srNumber=25 %}
|
||||
sink:
|
||||
type: metadata-rest
|
||||
config: {}
|
||||
```
|
||||
|
||||
```yaml {% srNumber=26 %}
|
||||
workflowConfig:
|
||||
# loggerLevel: DEBUG # DEBUG, INFO, WARN or ERROR
|
||||
openMetadataServerConfig:
|
||||
hostPort: <OpenMetadata host and port>
|
||||
authProvider: <OpenMetadata auth provider>
|
||||
```
|
||||
|
||||
{% /codeBlock %}
|
||||
|
||||
{% /codePreview %}
|
||||
|
||||
- You can learn more about how to configure and run the Profiler Workflow to extract Profiler data and execute the Data Quality from [here](/connectors/ingestion/workflows/profiler)
|
||||
|
||||
### 2. Run with the CLI
|
||||
|
||||
After saving the YAML config, we will run the command the same way we did for the metadata ingestion:
|
||||
|
||||
```bash
|
||||
metadata profile -c <path-to-yaml>
|
||||
```
|
||||
|
||||
Note how instead of running `ingest`, we are using the `profile` command to select the Profiler workflow.
|
||||
|
||||
## dbt Integration
|
||||
|
||||
{% tilesContainer %}
|
||||
|
||||
{% tile
|
||||
icon="mediation"
|
||||
title="dbt Integration"
|
||||
description="Learn more about how to ingest dbt models' definitions and their lineage."
|
||||
link="/connectors/ingestion/workflows/dbt" /%}
|
||||
|
||||
{% /tilesContainer %}
|
||||
|
||||
## Related
|
||||
|
||||
{% tilesContainer %}
|
||||
|
||||
{% tile
|
||||
title="Ingest with Airflow"
|
||||
description="Configure the ingestion using Airflow SDK"
|
||||
link="/connectors/database/sap-hana/airflow"
|
||||
/ %}
|
||||
|
||||
{% /tilesContainer %}
|
@ -0,0 +1,380 @@
|
||||
---
|
||||
title: SAP Hana
|
||||
slug: /connectors/database/sap-hana
|
||||
---
|
||||
|
||||
# SAP Hana
|
||||
|
||||
{% multiTablesWrapper %}
|
||||
|
||||
| Feature | Status |
|
||||
| :----------------- |:-----------------------------|
|
||||
| Stage | BETA |
|
||||
| Metadata | {% icon iconName="check" /%} |
|
||||
| Query Usage | {% icon iconName="cross" /%} |
|
||||
| Data Profiler | {% icon iconName="check" /%} |
|
||||
| Data Quality | {% icon iconName="check" /%} |
|
||||
| Lineage | Partially via Views |
|
||||
| DBT | {% icon iconName="cross" /%} |
|
||||
|
||||
| Feature | Status |
|
||||
| :----------- | :--------------------------- |
|
||||
| Lineage | Partially via Views |
|
||||
| Table-level | {% icon iconName="check" /%} |
|
||||
| Column-level | {% icon iconName="check" /%} |
|
||||
|
||||
{% /multiTablesWrapper %}
|
||||
|
||||
In this section, we provide guides and references to use the SAP Hana connector.
|
||||
|
||||
Configure and schedule SAP Hana metadata and profiler workflows from the OpenMetadata UI:
|
||||
|
||||
- [Requirements](#requirements)
|
||||
- [Metadata Ingestion](#metadata-ingestion)
|
||||
- [Data Profiler](/connectors/ingestion/workflows/profiler)
|
||||
- [Data Quality](/connectors/ingestion/workflows/data-quality)
|
||||
- [dbt Integration](/connectors/ingestion/workflows/dbt)
|
||||
|
||||
If you don't want to use the OpenMetadata Ingestion container to configure the workflows via the UI, then you can check
|
||||
the following docs to connect using Airflow SDK or with the CLI.
|
||||
|
||||
{% tilesContainer %}
|
||||
|
||||
{% tile
|
||||
title="Ingest with Airflow"
|
||||
description="Configure the ingestion using Airflow SDK"
|
||||
link="/connectors/database/sap-hana/airflow"
|
||||
/ %}
|
||||
{% tile
|
||||
title="Ingest with the CLI"
|
||||
description="Run a one-time ingestion using the metadata CLI"
|
||||
link="/connectors/database/sap-hana/cli"
|
||||
/ %}
|
||||
|
||||
{% /tilesContainer %}
|
||||
|
||||
## Requirements
|
||||
|
||||
{%inlineCallout icon="description" bold="OpenMetadata 1.1 or later" href="/deployment"%}
|
||||
To deploy OpenMetadata, check the Deployment guides.
|
||||
{%/inlineCallout%}
|
||||
|
||||
To run the Ingestion via the UI you'll need to use the OpenMetadata Ingestion Container, which comes shipped with
|
||||
custom Airflow plugins to handle the workflow deployment.
|
||||
|
||||
{% note %}
|
||||
The connector is compatible with HANA or HANA express versions since HANA SPS 2.
|
||||
{% /note %}
|
||||
|
||||
### Metadata
|
||||
|
||||
To extract metadata the user used in the connection needs to have access to the `SYS` schema.
|
||||
|
||||
You can create a new user to run the ingestion with:
|
||||
|
||||
```SQL
|
||||
CREATE USER openmetadata PASSWORD Password123;
|
||||
```
|
||||
|
||||
And, if you have password policies forcing users to reset the password, you can disable that policy for this technical user with:
|
||||
|
||||
```SQL
|
||||
ALTER USER openmetadata DISABLE PASSWORD LIFETIME;
|
||||
```
|
||||
|
||||
### Profiler & Data Quality
|
||||
|
||||
Executing the profiler Workflow or data quality tests, will require the user to have `SELECT` permission on the tables/schemas where the profiler/tests will be executed. The user should also be allowed to view information in `tables` for all objects in the database. More information on the profiler workflow setup can be found [here](https://docs.open-metadata.org/connectors/ingestion/workflows/profiler) and data quality tests [here](https://docs.open-metadata.org/connectors/ingestion/workflows/data-quality).
|
||||
|
||||
## Metadata Ingestion
|
||||
|
||||
{% stepsContainer %}
|
||||
|
||||
{% step srNumber=1 %}
|
||||
|
||||
{% stepDescription title="1. Visit the Services Page" %}
|
||||
|
||||
The first step is ingesting the metadata from your sources. To do that create a service connection first. Once a service is created, it can be used to configure
|
||||
metadata, usage, and profiler workflows.
|
||||
|
||||
To visit the Database Services page, click on `Settings` in the top navigation bar and select 'Databases' from left panel.
|
||||
|
||||
{% /stepDescription %}
|
||||
|
||||
{% stepVisualInfo %}
|
||||
|
||||
{% image
|
||||
src="/images/v1.0.0/connectors/visit-database-service-page.png"
|
||||
alt="Visit Services Page"
|
||||
caption="Find Databases option on left panel of the settings page" /%}
|
||||
|
||||
{% /stepVisualInfo %}
|
||||
|
||||
{% /step %}
|
||||
|
||||
{% step srNumber=2 %}
|
||||
|
||||
{% stepDescription title="2. Create a New Service" %}
|
||||
|
||||
Click on the 'Add New Service' button to start the Service creation.
|
||||
|
||||
{% /stepDescription %}
|
||||
|
||||
{% stepVisualInfo %}
|
||||
|
||||
{% image
|
||||
src="/images/v1.0.0/connectors/create-database-service.png"
|
||||
alt="Create a new service"
|
||||
caption="Add a new Service from the Database Services page" /%}
|
||||
|
||||
{% /stepVisualInfo %}
|
||||
|
||||
{% /step %}
|
||||
|
||||
{% step srNumber=3 %}
|
||||
|
||||
{% stepDescription title="3. Select the Service Type" %}
|
||||
|
||||
Select SAP Hana as the service type and click Next.
|
||||
|
||||
{% /stepDescription %}
|
||||
|
||||
{% stepVisualInfo %}
|
||||
|
||||
{% image
|
||||
src="/images/v1.1.0/connectors/sap-hana/select-service.png"
|
||||
alt="Select Service"
|
||||
caption="Select your service from the list" /%}
|
||||
|
||||
{% /stepVisualInfo %}
|
||||
|
||||
{% /step %}
|
||||
|
||||
{% step srNumber=4 %}
|
||||
|
||||
{% stepDescription title="4. Name and Describe your Service" %}
|
||||
|
||||
Provide a name and description for your service as illustrated below.
|
||||
|
||||
#### Service Name
|
||||
|
||||
OpenMetadata uniquely identifies services by their Service Name. Provide
|
||||
a name that distinguishes your deployment from other services, including
|
||||
the other {connector} services that you might be ingesting metadata
|
||||
from.
|
||||
|
||||
{% /stepDescription %}
|
||||
|
||||
{% stepVisualInfo %}
|
||||
|
||||
{% image
|
||||
src="/images/v1.1.0/connectors/sap-hana/add-new-service.png"
|
||||
alt="Add New Service"
|
||||
caption="Provide a Name and description for your Service" /%}
|
||||
|
||||
{% /stepVisualInfo %}
|
||||
|
||||
{% /step %}
|
||||
|
||||
{% step srNumber=5 %}
|
||||
|
||||
{% stepDescription title="5. Configure the Service Connection" %}
|
||||
|
||||
In this step, we will configure the connection settings required for
|
||||
this connector. Please follow the instructions below to ensure that
|
||||
you've configured the connector to read from your SAP Hana service as
|
||||
desired.
|
||||
|
||||
{% /stepDescription %}
|
||||
|
||||
{% stepVisualInfo %}
|
||||
|
||||
{% image
|
||||
src="/images/v1.1.0/connectors/sap-hana/service-connection.png"
|
||||
alt="Configure service connection"
|
||||
caption="Configure the service connection by filling the form" /%}
|
||||
|
||||
{% /stepVisualInfo %}
|
||||
|
||||
{% /step %}
|
||||
|
||||
{% extraContent parentTagName="stepsContainer" %}
|
||||
|
||||
#### Connection Options
|
||||
|
||||
We support two possible connection types:
|
||||
1. **SQL Connection**, where you will the username, password and host.
|
||||
2. **HDB User Store** [connection](https://help.sap.com/docs/SAP_HANA_PLATFORM/b3ee5778bc2e4a089d3299b82ec762a7/dd95ac9dbb571014a7d7f0234d762fdb.html?version=2.0.05&locale=en-US).
|
||||
Note that the HDB Store will need to be locally available to the instance running the ingestion process.
|
||||
If you are unsure about this setting, you can run the ingestion process passing the usual SQL connection details.
|
||||
|
||||
##### SQL Connection
|
||||
|
||||
- **Host and Port**: Host and port of the SAP Hana service. This should be specified as a string in the format `hostname:port`. E.g., `localhost:39041`, `host.docker.internal:39041`.
|
||||
- **Username**: Specify the User to connect to SAP Hana. It should have enough privileges to read all the metadata.
|
||||
- **Password**: Password to connect to SAP Hana.
|
||||
- **database**: Optional parameter to connect to a specific database.
|
||||
- **databaseSchema**: databaseSchema of the data source. This is an optional parameter, if you would like to restrict the metadata reading to a single schema. When left blank, OpenMetadata Ingestion attempts to scan all the schemas.
|
||||
|
||||
##### HDB USer Store
|
||||
|
||||
- **User Key**: HDB Store User Key generated from the command `hdbuserstore SET <KEY> <host:port> <USERNAME> <PASSWORD>`.
|
||||
|
||||
- **Connection Options (Optional)**: Enter the details for any additional connection options that can be sent to Athena during the connection. These details must be added as Key-Value pairs.
|
||||
- **Connection Arguments (Optional)**: Enter the details for any additional connection arguments such as security or protocol configs that can be sent to Athena during the connection. These details must be added as Key-Value pairs.
|
||||
|
||||
{% /extraContent %}
|
||||
|
||||
{% step srNumber=6 %}
|
||||
|
||||
{% stepDescription title="6. Test the Connection" %}
|
||||
|
||||
Once the credentials have been added, click on `Test Connection` and Save
|
||||
the changes.
|
||||
|
||||
{% /stepDescription %}
|
||||
|
||||
{% stepVisualInfo %}
|
||||
|
||||
{% image
|
||||
src="/images/v1.0.0/connectors/test-connection.png"
|
||||
alt="Test Connection"
|
||||
caption="Test the connection and save the Service" /%}
|
||||
|
||||
{% /stepVisualInfo %}
|
||||
|
||||
{% /step %}
|
||||
|
||||
{% step srNumber=7 %}
|
||||
|
||||
{% stepDescription title="7. Configure Metadata Ingestion" %}
|
||||
|
||||
In this step we will configure the metadata ingestion pipeline,
|
||||
Please follow the instructions below
|
||||
|
||||
{% /stepDescription %}
|
||||
|
||||
{% stepVisualInfo %}
|
||||
|
||||
{% image
|
||||
src="/images/v1.0.0/connectors/configure-metadata-ingestion-database.png"
|
||||
alt="Configure Metadata Ingestion"
|
||||
caption="Configure Metadata Ingestion Page" /%}
|
||||
|
||||
{% /stepVisualInfo %}
|
||||
|
||||
{% /step %}
|
||||
|
||||
{% extraContent parentTagName="stepsContainer" %}
|
||||
|
||||
#### Metadata Ingestion Options
|
||||
|
||||
- **Name**: This field refers to the name of ingestion pipeline, you can customize the name or use the generated name.
|
||||
- **Database Filter Pattern (Optional)**: Use to database filter patterns to control whether or not to include database as part of metadata ingestion.
|
||||
- **Include**: Explicitly include databases by adding a list of comma-separated regular expressions to the Include field. OpenMetadata will include all databases with names matching one or more of the supplied regular expressions. All other databases will be excluded.
|
||||
- **Exclude**: Explicitly exclude databases by adding a list of comma-separated regular expressions to the Exclude field. OpenMetadata will exclude all databases with names matching one or more of the supplied regular expressions. All other databases will be included.
|
||||
- **Schema Filter Pattern (Optional)**: Use to schema filter patterns to control whether or not to include schemas as part of metadata ingestion.
|
||||
- **Include**: Explicitly include schemas by adding a list of comma-separated regular expressions to the Include field. OpenMetadata will include all schemas with names matching one or more of the supplied regular expressions. All other schemas will be excluded.
|
||||
- **Exclude**: Explicitly exclude schemas by adding a list of comma-separated regular expressions to the Exclude field. OpenMetadata will exclude all schemas with names matching one or more of the supplied regular expressions. All other schemas will be included.
|
||||
- **Table Filter Pattern (Optional)**: Use to table filter patterns to control whether or not to include tables as part of metadata ingestion.
|
||||
- **Include**: Explicitly include tables by adding a list of comma-separated regular expressions to the Include field. OpenMetadata will include all tables with names matching one or more of the supplied regular expressions. All other tables will be excluded.
|
||||
- **Exclude**: Explicitly exclude tables by adding a list of comma-separated regular expressions to the Exclude field. OpenMetadata will exclude all tables with names matching one or more of the supplied regular expressions. All other tables will be included.
|
||||
- **Include views (toggle)**: Set the Include views toggle to control whether or not to include views as part of metadata ingestion.
|
||||
- **Include tags (toggle)**: Set the 'Include Tags' toggle to control whether to include tags as part of metadata ingestion.
|
||||
- **Enable Debug Log (toggle)**: Set the Enable Debug Log toggle to set the default log level to debug, these logs can be viewed later in Airflow.
|
||||
|
||||
- **Mark Deleted Tables (toggle)**: Set the Mark Deleted Tables toggle to flag tables as soft-deleted if they are not present anymore in the source system.
|
||||
- **Mark Deleted Tables from Filter Only (toggle)**: Set the Mark Deleted Tables from Filter Only toggle to flag tables as soft-deleted if they are not present anymore within the filtered schema or database only. This flag is useful when you have more than one ingestion pipelines. For example if you have a schema
|
||||
|
||||
{% /extraContent %}
|
||||
|
||||
{% step srNumber=8 %}
|
||||
|
||||
{% stepDescription title="8. Schedule the Ingestion and Deploy" %}
|
||||
|
||||
Scheduling can be set up at an hourly, daily, weekly, or manual cadence. The
|
||||
timezone is in UTC. Select a Start Date to schedule for ingestion. It is
|
||||
optional to add an End Date.
|
||||
|
||||
Review your configuration settings. If they match what you intended,
|
||||
click Deploy to create the service and schedule metadata ingestion.
|
||||
|
||||
If something doesn't look right, click the Back button to return to the
|
||||
appropriate step and change the settings as needed.
|
||||
|
||||
After configuring the workflow, you can click on Deploy to create the
|
||||
pipeline.
|
||||
|
||||
{% /stepDescription %}
|
||||
|
||||
{% stepVisualInfo %}
|
||||
|
||||
{% image
|
||||
src="/images/v1.0.0/connectors/schedule.png"
|
||||
alt="Schedule the Workflow"
|
||||
caption="Schedule the Ingestion Pipeline and Deploy" /%}
|
||||
|
||||
{% /stepVisualInfo %}
|
||||
|
||||
{% /step %}
|
||||
|
||||
{% step srNumber=9 %}
|
||||
|
||||
{% stepDescription title="9. View the Ingestion Pipeline" %}
|
||||
|
||||
Once the workflow has been successfully deployed, you can view the
|
||||
Ingestion Pipeline running from the Service Page.
|
||||
|
||||
{% /stepDescription %}
|
||||
|
||||
{% stepVisualInfo %}
|
||||
|
||||
{% image
|
||||
src="/images/v1.0.0/connectors/view-ingestion-pipeline.png"
|
||||
alt="View Ingestion Pipeline"
|
||||
caption="View the Ingestion Pipeline from the Service Page" /%}
|
||||
|
||||
{% /stepVisualInfo %}
|
||||
|
||||
{% /step %}
|
||||
|
||||
{% /stepsContainer %}
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Workflow Deployment Error
|
||||
|
||||
If there were any errors during the workflow deployment process, the
|
||||
Ingestion Pipeline Entity will still be created, but no workflow will be
|
||||
present in the Ingestion container.
|
||||
|
||||
- You can then edit the Ingestion Pipeline and Deploy it again.
|
||||
|
||||
- From the Connection tab, you can also Edit the Service if needed.
|
||||
|
||||
{% image
|
||||
src="/images/v1.0.0/connectors/workflow-deployment-error.png"
|
||||
alt="Workflow Deployment Error"
|
||||
caption="Edit and Deploy the Ingestion Pipeline" /%}
|
||||
|
||||
## Related
|
||||
|
||||
{% tilesContainer %}
|
||||
|
||||
{% tile
|
||||
title="Profiler Workflow"
|
||||
description="Learn more about how to configure the Data Profiler from the UI."
|
||||
link="/connectors/ingestion/workflows/profiler" /%}
|
||||
|
||||
{% tile
|
||||
title="Data Quality Workflow"
|
||||
description="Learn more about how to configure the Data Quality tests from the UI."
|
||||
link="/connectors/ingestion/workflows/data-quality" /%}
|
||||
|
||||
{% tile
|
||||
icon="mediation"
|
||||
title="dbt Integration"
|
||||
description="Learn more about how to ingest dbt models' definitions and their lineage."
|
||||
link="/connectors/ingestion/workflows/dbt" /%}
|
||||
|
||||
{% /tilesContainer %}
|
@ -124,17 +124,27 @@ auth_backends = airflow.api.auth.backend.basic_auth
|
||||
After installing the Airflow APIs, you will need to update your OpenMetadata Server.
|
||||
|
||||
The OpenMetadata server takes all its configurations from a YAML file. You can find them in our [repo](https://github.com/open-metadata/OpenMetadata/tree/main/conf). In
|
||||
`openmetadata.yaml`, update the `airflowConfiguration` section accordingly.
|
||||
`openmetadata.yaml`, update the `pipelineServiceClientConfiguration` section accordingly.
|
||||
|
||||
```yaml
|
||||
# For Bare Metal Installations
|
||||
[...]
|
||||
|
||||
airflowConfiguration:
|
||||
apiEndpoint: ${AIRFLOW_HOST:-http://localhost:8080}
|
||||
pipelineServiceClientConfiguration:
|
||||
className: ${PIPELINE_SERVICE_CLIENT_CLASS_NAME:-"org.openmetadata.service.clients.pipeline.airflow.AirflowRESTClient"}
|
||||
apiEndpoint: ${PIPELINE_SERVICE_CLIENT_ENDPOINT:-http://localhost:8080}
|
||||
metadataApiEndpoint: ${SERVER_HOST_API_URL:-http://localhost:8585/api}
|
||||
hostIp: ${PIPELINE_SERVICE_CLIENT_HOST_IP:-""}
|
||||
verifySSL: ${PIPELINE_SERVICE_CLIENT_VERIFY_SSL:-"no-ssl"} # Possible values are "no-ssl", "ignore", "validate"
|
||||
sslConfig:
|
||||
validate:
|
||||
certificatePath: ${PIPELINE_SERVICE_CLIENT_SSL_CERT_PATH:-""} # Local path for the Pipeline Service Client
|
||||
|
||||
# Default required parameters for Airflow as Pipeline Service Client
|
||||
parameters:
|
||||
username: ${AIRFLOW_USERNAME:-admin}
|
||||
password: ${AIRFLOW_PASSWORD:-admin}
|
||||
metadataApiEndpoint: ${SERVER_HOST_API_URL:-http://localhost:8585/api}
|
||||
timeout: ${AIRFLOW_TIMEOUT:-10}
|
||||
|
||||
[...]
|
||||
```
|
||||
@ -142,7 +152,7 @@ airflowConfiguration:
|
||||
If using Docker, make sure that you are passing the correct environment variables:
|
||||
|
||||
```env
|
||||
AIRFLOW_HOST: ${AIRFLOW_HOST:-http://ingestion:8080}
|
||||
PIPELINE_SERVICE_CLIENT_ENDPOINT: ${PIPELINE_SERVICE_CLIENT_ENDPOINT:-http://ingestion:8080}
|
||||
SERVER_HOST_API_URL: ${SERVER_HOST_API_URL:-http://openmetadata-server:8585/api}
|
||||
```
|
||||
|
||||
@ -169,11 +179,11 @@ What we need to verify here is that the OpenMetadata server can reach the Airflo
|
||||
hosting your OpenMetadata server and running a query against the `/health` endpoint. For example:
|
||||
|
||||
```bash
|
||||
$ curl -XGET ${AIRFLOW_HOST}/api/v1/openmetadata/health
|
||||
$ curl -XGET ${PIPELINE_SERVICE_CLIENT_ENDPOINT}/api/v1/openmetadata/health
|
||||
{"status": "healthy", "version": "x.y.z"}
|
||||
```
|
||||
|
||||
It is important to do this validation passing the command as is (i.e., `curl -XGET ${AIRFLOW_HOST}/api/v1/openmetadata/health`)
|
||||
It is important to do this validation passing the command as is (i.e., `curl -XGET ${PIPELINE_SERVICE_CLIENT_ENDPOINT}/api/v1/openmetadata/health`)
|
||||
and allowing the environment to do the substitution for you. That's the only way we can be sure that the setup is
|
||||
correct.
|
||||
|
||||
@ -193,7 +203,7 @@ Note that in this example we are assuming:
|
||||
A generic call would look like:
|
||||
|
||||
```bash
|
||||
curl -XPOST <AIRFLOW_HOST>/api/v1/openmetadata/enable --data-raw '{"dag_id": "<DAG name>"}' -u "<user>:<password>" --header 'Content-Type: application/json'
|
||||
curl -XPOST <PIPELINE_SERVICE_CLIENT_ENDPOINT>/api/v1/openmetadata/enable --data-raw '{"dag_id": "<DAG name>"}' -u "<user>:<password>" --header 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
Please update it accordingly.
|
||||
@ -239,7 +249,7 @@ Validate the installation, making sure that from the OpenMetadata server you can
|
||||
call to `/health` gives us the proper response:
|
||||
|
||||
```bash
|
||||
$ curl -XGET ${AIRFLOW_HOST}/api/v1/openmetadata/health
|
||||
$ curl -XGET ${PIPELINE_SERVICE_CLIENT_ENDPOINT}/api/v1/openmetadata/health
|
||||
{"status": "healthy", "version": "x.y.z"}
|
||||
```
|
||||
|
||||
|
@ -355,6 +355,12 @@ site_menu:
|
||||
url: /connectors/database/salesforce/airflow
|
||||
- category: Connectors / Database / Salesforce / CLI
|
||||
url: /connectors/database/salesforce/cli
|
||||
- category: Connectors / Database / SAP Hana
|
||||
url: /connectors/database/sap-hana
|
||||
- category: Connectors / Database / SAP Hana / Airflow
|
||||
url: /connectors/database/sap-hana/airflow
|
||||
- category: Connectors / Database / SAP Hana / CLI
|
||||
url: /connectors/database/sap-hana/cli
|
||||
- category: Connectors / Database / SingleStore
|
||||
url: /connectors/database/singlestore
|
||||
- category: Connectors / Database / SingleStore / Airflow
|
||||
|
Binary file not shown.
After Width: | Height: | Size: 40 KiB |
Binary file not shown.
After Width: | Height: | Size: 123 KiB |
Binary file not shown.
After Width: | Height: | Size: 117 KiB |
@ -0,0 +1,34 @@
|
||||
{
|
||||
"name": "SapHana",
|
||||
"fullyQualifiedName": "SapHana",
|
||||
"displayName": "SAP Hana Test Connection",
|
||||
"description": "This Test Connection validates the access against the database and basic metadata extraction of schemas and tables.",
|
||||
"steps": [
|
||||
{
|
||||
"name": "CheckAccess",
|
||||
"description": "Validate that we can properly reach the database and authenticate with the given credentials.",
|
||||
"errorMessage": "Failed to connect to SAP Hana, please validate the credentials",
|
||||
"shortCircuit": true,
|
||||
"mandatory": true
|
||||
},
|
||||
{
|
||||
"name": "GetSchemas",
|
||||
"description": "List all the schemas available to the user.",
|
||||
"errorMessage": "Failed to fetch schemas, please validate if the user has enough privilege to fetch schemas.",
|
||||
"mandatory": true
|
||||
},
|
||||
{
|
||||
"name": "GetTables",
|
||||
"description": "From a given schema, list the tables belonging to that schema. If no schema is specified, we'll list the tables of a random schema.",
|
||||
"errorMessage": "Failed to fetch tables, please validate if the user has enough privilege to fetch tables.",
|
||||
"mandatory": true
|
||||
},
|
||||
{
|
||||
"name": "GetViews",
|
||||
"description": "From a given schema, list the views belonging to that schema. If no schema is specified, we'll list the tables of a random schema.",
|
||||
"errorMessage": "Failed to fetch views, please validate if the user has enough privilege to fetch views.",
|
||||
"mandatory": false
|
||||
}
|
||||
]
|
||||
}
|
||||
|
@ -0,0 +1,122 @@
|
||||
{
|
||||
"$id": "https://open-metadata.org/schema/entity/services/connections/database/sapHanaConnection.json",
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"title": "SapHanaConnection",
|
||||
"description": "Sap Hana Database Connection Config",
|
||||
"type": "object",
|
||||
"javaType": "org.openmetadata.schema.services.connections.database.SapHanaConnection",
|
||||
"definitions": {
|
||||
"sapHanaType": {
|
||||
"description": "Service type.",
|
||||
"type": "string",
|
||||
"enum": ["SapHana"],
|
||||
"default": "SapHana"
|
||||
},
|
||||
"sapHanaScheme": {
|
||||
"description": "SQLAlchemy driver scheme options.",
|
||||
"type": "string",
|
||||
"enum": ["hana"],
|
||||
"default": "hana"
|
||||
},
|
||||
"sqlConnection": {
|
||||
"title": "SQL Connection",
|
||||
"description": "Options to connect to SAP Hana by passing the database information",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"hostPort": {
|
||||
"title": "Host and Port",
|
||||
"description": "Host and port of the Hana service.",
|
||||
"type": "string"
|
||||
},
|
||||
"username": {
|
||||
"title": "Username",
|
||||
"description": "Username to connect to Hana. This user should have privileges to read all the metadata.",
|
||||
"type": "string"
|
||||
},
|
||||
"password": {
|
||||
"title": "Password",
|
||||
"description": "Password to connect to Hana.",
|
||||
"type": "string",
|
||||
"format": "password"
|
||||
},
|
||||
"databaseSchema": {
|
||||
"title": "Database Schema",
|
||||
"description": "Database Schema of the data source. This is an optional parameter, if you would like to restrict the metadata reading to a single schema. When left blank, OpenMetadata Ingestion attempts to scan all the schemas.",
|
||||
"type": "string"
|
||||
},
|
||||
"database": {
|
||||
"title": "Database",
|
||||
"description": "Database of the data source.",
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": ["username", "password", "hostPort"]
|
||||
},
|
||||
"hdbUserStoreConnection": {
|
||||
"title": "HDB User Store Connection",
|
||||
"description": "Use HDB User Store to avoid entering connection-related information manually. This store needs to be present on the client running the ingestion.",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"userKey": {
|
||||
"title": "User Key",
|
||||
"description": "HDB Store User Key generated from the command `hdbuserstore SET <KEY> <host:port> <USERNAME> <PASSWORD>`",
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
}
|
||||
},
|
||||
"properties": {
|
||||
"type": {
|
||||
"title": "Service Type",
|
||||
"description": "Service Type",
|
||||
"$ref": "#/definitions/sapHanaType",
|
||||
"default": "SapHana"
|
||||
},
|
||||
"scheme": {
|
||||
"title": "Connection Scheme",
|
||||
"description": "SQLAlchemy driver scheme options.",
|
||||
"$ref": "#/definitions/sapHanaScheme",
|
||||
"default": "hana"
|
||||
},
|
||||
"connection": {
|
||||
"mask": true,
|
||||
"title": "SAP Hana Connection",
|
||||
"description": "Choose between Database connection or HDB User Store connection.",
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/definitions/sqlConnection"
|
||||
},
|
||||
{
|
||||
"$ref": "#/definitions/hdbUserStoreConnection"
|
||||
}
|
||||
]
|
||||
},
|
||||
"connectionOptions": {
|
||||
"title": "Connection Options",
|
||||
"$ref": "../connectionBasicType.json#/definitions/connectionOptions"
|
||||
},
|
||||
"connectionArguments": {
|
||||
"title": "Connection Arguments",
|
||||
"$ref": "../connectionBasicType.json#/definitions/connectionArguments"
|
||||
},
|
||||
"supportsMetadataExtraction": {
|
||||
"title": "Supports Metadata Extraction",
|
||||
"$ref": "../connectionBasicType.json#/definitions/supportsMetadataExtraction"
|
||||
},
|
||||
"supportsDBTExtraction": {
|
||||
"$ref": "../connectionBasicType.json#/definitions/supportsDBTExtraction"
|
||||
},
|
||||
"supportsProfiler": {
|
||||
"title": "Supports Profiler",
|
||||
"$ref": "../connectionBasicType.json#/definitions/supportsProfiler"
|
||||
},
|
||||
"supportsQueryComment": {
|
||||
"title": "Supports Query Comment",
|
||||
"$ref": "../connectionBasicType.json#/definitions/supportsQueryComment"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": ["connection"]
|
||||
}
|
@ -45,7 +45,8 @@
|
||||
"DomoDatabase",
|
||||
"QueryLog",
|
||||
"CustomDatabase",
|
||||
"Dbt"
|
||||
"Dbt",
|
||||
"SapHana"
|
||||
],
|
||||
"javaEnums": [
|
||||
{
|
||||
@ -140,6 +141,9 @@
|
||||
},
|
||||
{
|
||||
"name": "Dbt"
|
||||
},
|
||||
{
|
||||
"name": "SapHana"
|
||||
}
|
||||
]
|
||||
},
|
||||
@ -239,6 +243,9 @@
|
||||
},
|
||||
{
|
||||
"$ref": "./connections/database/customDatabaseConnection.json"
|
||||
},
|
||||
{
|
||||
"$ref": "./connections/database/sapHanaConnection.json"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
@ -0,0 +1,88 @@
|
||||
# SAP Hana
|
||||
|
||||
In this section, we provide guides and references to use the SAP Hana connector.
|
||||
|
||||
## Requirements
|
||||
|
||||
The connector is compatible with HANA or HANA express versions since HANA SPS 2.
|
||||
|
||||
To extract metadata the user used in the connection needs to have access to the `SYS` schema.
|
||||
|
||||
You can create a new user to run the ingestion with:
|
||||
|
||||
```SQL
|
||||
CREATE USER openmetadata PASSWORD Password123;
|
||||
```
|
||||
|
||||
And, if you have password policies forcing users to reset the password, you can disable that policy for this technical user with:
|
||||
|
||||
```SQL
|
||||
ALTER USER openmetadata DISABLE PASSWORD LIFETIME;
|
||||
```
|
||||
|
||||
### Profiler & Data Quality
|
||||
|
||||
Executing the profiler Workflow or data quality tests, will require the user to have `SELECT` permission on the tables/schemas where the profiler/tests will be executed. The user should also be allowed to view information in `tables` for all objects in the database. More information on the profiler workflow setup can be found [here](https://docs.open-metadata.org/connectors/ingestion/workflows/profiler) and data quality tests [here](https://docs.open-metadata.org/connectors/ingestion/workflows/data-quality).
|
||||
|
||||
You can find further information on the SAP Hana connector in the [docs](https://docs.open-metadata.org/connectors/database/sap-hana).
|
||||
|
||||
## Connection Details
|
||||
|
||||
$$section
|
||||
### Scheme $(id="scheme")
|
||||
SQLAlchemy driver scheme options. We only support `hana`, which is based on `hdbcli`.
|
||||
$$
|
||||
|
||||
$$section
|
||||
### Connection $(id="connection")
|
||||
We support two possible connection types:
|
||||
1. **SQL Connection**, where you will the username, password and host.
|
||||
2. **HDB User Store** [connection](https://help.sap.com/docs/SAP_HANA_PLATFORM/b3ee5778bc2e4a089d3299b82ec762a7/dd95ac9dbb571014a7d7f0234d762fdb.html?version=2.0.05&locale=en-US). Note that the HDB Store will need to be locally available to the instance running the ingestion process. If you are unsure about this setting, you can run the ingestion process passing the usual SQL connection details.
|
||||
$$
|
||||
|
||||
## SQL Connection
|
||||
|
||||
$$section
|
||||
### Host Port $(id="hostPort")
|
||||
Host and port of the SAP Hana service. This should be specified as a string in the format `hostname:port`. E.g., `localhost:39041`, `host.docker.internal:39041`.
|
||||
$$
|
||||
|
||||
$$section
|
||||
### Username $(id="username")
|
||||
Username to connect to SAP Hana. This user should have access to the `SYS` schema to extract metadata. Other workflows may require different permissions. Refer to the section above for more information.
|
||||
$$
|
||||
|
||||
$$section
|
||||
### Password $(id="password")
|
||||
Password for the informed user.
|
||||
$$
|
||||
|
||||
|
||||
$$section
|
||||
### Database $(id="database")
|
||||
Database you want to connect to. If this is not informed, we will use the default's user database.
|
||||
$$
|
||||
|
||||
$$section
|
||||
### Database Schema $(id="databaseSchema")
|
||||
This is an optional parameter. When set, the value will be used to restrict the metadata reading to a single schema (corresponding to the value passed in this field). When left blank, OpenMetadata will scan all the schemas.
|
||||
$$
|
||||
|
||||
## HDB User Store
|
||||
|
||||
$$section
|
||||
### User Key $(id="userKey")
|
||||
HDB Store User Key generated from the command `hdbuserstore SET <KEY> <host:port> <USERNAME> <PASSWORD>`.
|
||||
$$
|
||||
|
||||
---
|
||||
|
||||
$$section
|
||||
### Connection Options $(id="connectionOptions")
|
||||
Additional connection options to build the URL that can be sent to the service during the connection.
|
||||
$$
|
||||
|
||||
$$section
|
||||
### Connection Arguments $(id="connectionArguments")
|
||||
Additional connection arguments such as security or protocol configs that can be sent to the service during connection.
|
||||
$$
|
Binary file not shown.
After Width: | Height: | Size: 6.3 KiB |
@ -65,6 +65,7 @@ import redpanda from '../assets/img/service-icon-redpanda.png';
|
||||
import redshift from '../assets/img/service-icon-redshift.png';
|
||||
import sagemaker from '../assets/img/service-icon-sagemaker.png';
|
||||
import salesforce from '../assets/img/service-icon-salesforce.png';
|
||||
import sapHana from '../assets/img/service-icon-sap-hana.png';
|
||||
import scikit from '../assets/img/service-icon-scikit.png';
|
||||
import singlestore from '../assets/img/service-icon-singlestore.png';
|
||||
import snowflakes from '../assets/img/service-icon-snowflakes.png';
|
||||
@ -136,6 +137,7 @@ export const DYNAMODB = dynamodb;
|
||||
export const SINGLESTORE = singlestore;
|
||||
export const SALESFORCE = salesforce;
|
||||
export const MLFLOW = mlflow;
|
||||
export const SAP_HANA = sapHana;
|
||||
export const SCIKIT = scikit;
|
||||
export const DELTALAKE = deltalake;
|
||||
export const DEFAULT_SERVICE = iconDefaultService;
|
||||
|
@ -37,6 +37,7 @@ import postgresConnection from '../jsons/connectionSchemas/connections/database/
|
||||
import prestoConnection from '../jsons/connectionSchemas/connections/database/prestoConnection.json';
|
||||
import redshiftConnection from '../jsons/connectionSchemas/connections/database/redshiftConnection.json';
|
||||
import salesforceConnection from '../jsons/connectionSchemas/connections/database/salesforceConnection.json';
|
||||
import sapHanaConnection from '../jsons/connectionSchemas/connections/database/sapHanaConnection.json';
|
||||
import singleStoreConnection from '../jsons/connectionSchemas/connections/database/singleStoreConnection.json';
|
||||
import snowflakeConnection from '../jsons/connectionSchemas/connections/database/snowflakeConnection.json';
|
||||
import sqliteConnection from '../jsons/connectionSchemas/connections/database/sqliteConnection.json';
|
||||
@ -187,6 +188,11 @@ export const getDatabaseConfig = (type: DatabaseServiceType) => {
|
||||
|
||||
break;
|
||||
}
|
||||
case DatabaseServiceType.SapHana: {
|
||||
schema = sapHanaConnection;
|
||||
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
schema = mysqlConnection;
|
||||
|
||||
|
@ -88,6 +88,7 @@ import {
|
||||
REDSHIFT,
|
||||
SAGEMAKER,
|
||||
SALESFORCE,
|
||||
SAP_HANA,
|
||||
SCIKIT,
|
||||
serviceTypes,
|
||||
SERVICE_TYPE_MAP,
|
||||
@ -204,6 +205,9 @@ export const serviceTypeLogo = (type: string) => {
|
||||
case DatabaseServiceType.Salesforce:
|
||||
return SALESFORCE;
|
||||
|
||||
case DatabaseServiceType.SapHana:
|
||||
return SAP_HANA;
|
||||
|
||||
case DatabaseServiceType.DeltaLake:
|
||||
return DELTALAKE;
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user