MINOR: Add Matillion SSL + Docs implementation (#17792)

This commit is contained in:
Ayush Shah 2024-09-16 12:57:12 +05:30 committed by GitHub
parent 2da9c34730
commit 2feb7d04ca
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 388 additions and 0 deletions

View File

@ -0,0 +1,31 @@
source:
type: Matillion
serviceName: local_Matillion_123
serviceConnection:
config:
type: Matillion
connection:
type: MatillionETL
hostPort: hostport
username: username
password: password
sslConfig:
caCertificate: |
-----BEGIN CERTIFICATE-----
sample certificate
-----END CERTIFICATE-----
sourceConfig:
config:
type: PipelineMetadata
includeLineage: true
sink:
type: metadata-rest
config: {}
workflowConfig:
loggerLevel: DEBUG # DEBUG, INFO, WARN or ERROR
openMetadataServerConfig:
hostPort: http://localhost:8585/api
authProvider: openmetadata
securityConfig:
jwtToken: "eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg"

View File

@ -45,6 +45,9 @@ from metadata.generated.schema.entity.services.connections.database.salesforceCo
from metadata.generated.schema.entity.services.connections.messaging.kafkaConnection import (
KafkaConnection,
)
from metadata.generated.schema.entity.services.connections.pipeline.matillionConnection import (
MatillionConnection,
)
from metadata.generated.schema.security.ssl import verifySSLConfig
from metadata.ingestion.connections.builders import init_empty_connection_arguments
from metadata.ingestion.models.custom_pydantic import CustomSecretStr
@ -106,6 +109,21 @@ class SSLManager:
connection.connectionArguments.root["ssl"] = ssl_args
return connection
@setup_ssl.register(MatillionConnection)
def _(self, connection):
matillion_connection = cast(MatillionConnection, connection)
if (
matillion_connection.connection
and matillion_connection.connection.sslConfig
):
if matillion_connection.connection.sslConfig.root.caCertificate:
setattr(
matillion_connection.connection.sslConfig.root,
"caCertificate",
self.ca_file_path,
)
return connection
@setup_ssl.register(PostgresConnection)
@setup_ssl.register(RedshiftConnection)
@setup_ssl.register(GreenplumConnection)
@ -174,6 +192,21 @@ def check_ssl_and_init(_) -> None:
return None
@check_ssl_and_init.register(MatillionConnection)
def _(connection) -> Union[SSLManager, None]:
service_connection = cast(MatillionConnection, connection)
if service_connection.connection:
ssl: Optional[
verifySSLConfig.SslConfig
] = service_connection.connection.sslConfig
if ssl and ssl.root.caCertificate:
ssl_dict: dict[str, Union[CustomSecretStr, None]] = {
"ca": ssl.root.caCertificate
}
return SSLManager(**ssl_dict)
return None
@check_ssl_and_init.register(cls=SalesforceConnection)
def _(connection) -> Union[SSLManager, None]:
service_connection = cast(SalesforceConnection, connection)

View File

@ -323,6 +323,68 @@ class TestWorkflowParse(TestCase):
str(err.exception),
)
def test_parsing_matillion_pipeline(self):
"""
Test Matillion JSON Config parsing OK
"""
config_dict = {
"source": {
"type": "Matillion",
"serviceName": "local_Matillion_123",
"serviceConnection": {
"config": {
"type": "Matillion",
"connection": {
"type": "MatillionETL",
"hostPort": "hostport",
"username": "username",
"password": "password",
"sslConfig": {
"caCertificate": "-----BEGIN CERTIFICATE-----\nsample certificate\n-----END CERTIFICATE-----\n"
},
},
}
},
"sourceConfig": {
"config": {"type": "PipelineMetadata", "includeLineage": True}
},
},
"sink": {"type": "metadata-rest", "config": {}},
"workflowConfig": {
"loggerLevel": "DEBUG",
"openMetadataServerConfig": {
"hostPort": "http://localhost:8585/api",
"authProvider": "openmetadata",
"securityConfig": {
"jwtToken": "eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg"
},
},
},
}
self.assertTrue(parse_workflow_config_gracefully(config_dict))
del config_dict["source"]["serviceConnection"]["config"]["connection"][
"sslConfig"
]
self.assertTrue(parse_workflow_config_gracefully(config_dict))
del config_dict["source"]["serviceConnection"]["config"]["connection"][
"username"
]
del config_dict["source"]["serviceConnection"]["config"]["connection"][
"hostPort"
]
del config_dict["source"]["serviceConnection"]["config"]["connection"][
"password"
]
with self.assertRaises(ParsingConfigurationError) as err:
parse_workflow_config_gracefully(config_dict)
self.assertIn(
"We encountered an error parsing the configuration of your MatillionConnection.\nYou might need to review your config based on the original cause of this failure:\n\t - Missing parameter in ('connection', 'hostPort')\n\t - Missing parameter in ('connection', 'username')\n\t - Missing parameter in ('connection', 'password')",
str(err.exception),
)
def test_parsing_ingestion_pipeline_mysql(self):
"""
Test parsing of ingestion_pipeline for MYSQL

View File

@ -326,6 +326,11 @@ site_menu:
url: /connectors/pipeline/kafkaconnect
- category: Connectors / Pipeline / KafkaConnect / Run Externally
url: /connectors/pipeline/kafkaconnect/yaml
- category: Connectors / Pipeline / Matillion
url: /connectors/pipeline/matillion
isCollateOnly: true
- category: Connectors / Pipeline / Matillion / Run Externally
url: /connectors/pipeline/matillion/yaml
- category: Connectors / Pipeline / Databricks Pipeline
url: /connectors/pipeline/databricks-pipeline
- category: Connectors / Pipeline / Databricks Pipeline / Run Externally

View File

@ -0,0 +1,78 @@
---
title: Matillion
slug: /connectors/pipeline/matillion
---
{% connectorDetailsHeader
name="Matillion"
stage="PROD"
platform="Collate"
availableFeatures=["Pipelines", "Lineage"]
unavailableFeatures=["Owners", "Tags", "Pipeline Status"]
/ %}
In this section, we provide guides and references to use the Matillion connector.
Configure and schedule Matillion metadata and profiler workflows from the OpenMetadata UI:
- [Requirements](#requirements)
- [Matillion Versions](#matillion-versions)
- [Metadata Ingestion](#metadata-ingestion)
- [Connection Details](#connection-details)
- [Troubleshooting](#troubleshooting)
- [Workflow Deployment Error](#workflow-deployment-error)
{% partial file="/v1.6/connectors/ingestion-modes-tiles.md" variables={yamlPath: "/connectors/pipeline/matillion/yaml"} /%}
## Requirements
To extract metadata from Matillion, you need to create a user with the following permissions:
- `API` Permission ( While Creating the User, from Admin -> User )
### Matillion Versions
OpenMetadata is integrated with matillion up to version [1.75.0](https://docs.matillion.io/getting-started).
## Metadata Ingestion
{% partial
file="/v1.6/connectors/metadata-ingestion-ui.md"
variables={
connector: "Matillion",
selectServicePath: "/images/v1.6/connectors/matillion/select-service.webp",
addNewServicePath: "/images/v1.6/connectors/matillion/add-new-service.webp",
serviceConnectionPath: "/images/v1.6/connectors/matillion/service-connection.webp",
}
/%}
{% stepsContainer %}
{% extraContent parentTagName="stepsContainer" %}
#### Connection Details
- **hostPort**: The hostname or IP address with the REST API enabled eg.`https://<your-matillion-host-name-here>`
- **username**: The username to authenticate with the Matillion instance.
- **password**: The password to authenticate with the Matillion instance.
- **caCertificate** : CA Certificate to authenticate with the Matillion instance.
{% /extraContent %}
{% partial file="/v1.6/connectors/test-connection.md" /%}
{% partial file="/v1.6/connectors/pipeline/configure-ingestion.md" /%}
{% partial file="/v1.6/connectors/ingestion-schedule-and-deploy.md" /%}
{% /stepsContainer %}
By successfully completing these steps, the lineage information for the service will be displayed.
{% image
src="/images/v1.6/connectors/matillion/lineage.webp"
alt="Matillion Lineage" /%}
{% partial file="/v1.6/connectors/troubleshooting.md" /%}

View File

@ -0,0 +1,141 @@
---
title: Run the Matillion Connector Externally
slug: /connectors/pipeline/matillion/yaml
---
{% connectorDetailsHeader
name="Matillion"
stage="PROD"
platform="Collate"
availableFeatures=["Pipelines", "Lineage"]
unavailableFeatures=["Owners", "Tags", "Pipeline Status"]
/ %}
In this section, we provide guides and references to use the Matillion connector.
Configure and schedule Matillion metadata and profiler workflows from the OpenMetadata UI:
- [Requirements](#requirements)
- [Matillion Versions](#matillion-versions)
- [Metadata Ingestion](#metadata-ingestion)
- [Connection Details](#connection-details)
- [Troubleshooting](#troubleshooting)
- [Workflow Deployment Error](#workflow-deployment-error)
{% partial file="/v1.6/connectors/ingestion-modes-tiles.md" variables={yamlPath: "/connectors/pipeline/matillion/yaml"} /%}
## Requirements
To extract metadata from Matillion, you need to create a user with the following permissions:
- `API` Permission ( While Creating the User, from Admin -> User )
### Python Requirements
{% partial file="/v1.6/connectors/python-requirements.md" /%}
To run the Matillion ingestion, you will need to install:
```bash
pip3 install "openmetadata-ingestion[matillion]"
```
## Metadata Ingestion
All connectors are defined as JSON Schemas.
[Here](https://github.com/open-metadata/OpenMetadata/blob/main/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/pipeline/matillionConnection.json)
you can find the structure to create a connection to Matillion.
In order to create and run a Metadata Ingestion workflow, we will follow
the steps to create a YAML configuration able to connect to the source,
process the Entities if needed, and reach the OpenMetadata server.
The workflow is modeled around the following
[JSON Schema](https://github.com/open-metadata/OpenMetadata/blob/main/openmetadata-spec/src/main/resources/json/schema/metadataIngestion/workflow.json)
### 1. Define the YAML Config
This is a sample config for Matillion:
{% codePreview %}
{% codeInfoContainer %}
#### Source Configuration - Service Connection
{% codeInfo srNumber=1 %}
**hostPort**: The hostname or IP address with the REST API enabled eg.`https://<your-matillion-host-name-here>`
{% /codeInfo %}
{% codeInfo srNumber=2 %}
**username**: The username to authenticate with the Matillion instance.
{% /codeInfo %}
{% codeInfo srNumber=3 %}
**password**: The password to authenticate with the Matillion instance.
{% /codeInfo %}
{% codeInfo srNumber=4 %}
**caCertificate** : CA Certificate to authenticate with the Matillion instance.
{% /codeInfo %}
{% partial file="/v1.6/connectors/yaml/pipeline/source-config-def.md" /%}
{% partial file="/v1.6/connectors/yaml/ingestion-sink-def.md" /%}
{% partial file="/v1.6/connectors/yaml/workflow-config-def.md" /%}
{% /codeInfoContainer %}
{% codeBlock fileName="filename.yaml" %}
```yaml {% isCodeBlock=true %}
source:
type: matillion
serviceName: matillion_service
serviceConnection:
config:
type: Matillion
```
```yaml {% srNumber=1 %}
hostPort: "https://<your-matillion-here>"
```
```yaml {% srNumber=2 %}
username: "username"
```
```yaml {% srNumber=3 %}
password: "password"
```
```yaml {% srNumber=3 %}
sslConfig:
caCertificate: |
-----BEGIN CERTIFICATE-----
sample caCertificateData
-----END CERTIFICATE-----
```
{% partial file="/v1.6/connectors/yaml/pipeline/source-config.md" /%}
{% partial file="/v1.6/connectors/yaml/ingestion-sink.md" /%}
{% partial file="/v1.6/connectors/yaml/workflow-config.md" /%}
{% /codeBlock %}
{% /codePreview %}
{% partial file="/v1.6/connectors/yaml/ingestion-cli.md" /%}

View File

@ -513,6 +513,11 @@ site_menu:
url: /connectors/pipeline/kafkaconnect
- category: Connectors / Pipeline / KafkaConnect / Run Externally
url: /connectors/pipeline/kafkaconnect/yaml
- category: Connectors / Pipeline / Matillion
url: /connectors/pipeline/matillion
isCollateOnly: true
- category: Connectors / Pipeline / Matillion / Run Externally
url: /connectors/pipeline/matillion/yaml
- category: Connectors / Pipeline / Databricks Pipeline
url: /connectors/pipeline/databricks-pipeline
- category: Connectors / Pipeline / Databricks Pipeline / Run Externally

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 114 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 46 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

View File

@ -0,0 +1,32 @@
# Matillion
In this section, we provide guides and references to use the Matillion connector.
## Requirements:
To extract metadata from Matillion, you need to create a user with the following permissions:
- `API` Permission ( While Creating the User, from Admin -> User )
## Connection Details
$$section
### Host Port $(id="hostPort")
This parameter specifies the network location where your Matillion ETL instance is accessible, combining both the hostname.
It should be formatted as a URI string, either `http://hostname` or `https://hostname`, depending on your security requirements.
$$
$$section
### Username $(id="username")
Username to connect to Matillion. This user should have access to the APIs to extract metadata. Other workflows may require different permissions -- refer to the section above for more information.
$$
$$section
### Password $(id="password")
Password of the user account to connect with Matillion.
$$
$$section
### SSL CA $(id="caCertificate")
The CA certificate used for SSL validation.
$$

View File

@ -142,6 +142,7 @@ class ServiceUtilClassBase {
MetadataServiceType.Alation,
APIServiceType.Webhook,
MlModelServiceType.VertexAI,
PipelineServiceType.Matillion,
];
DatabaseServiceTypeSmallCase = this.convertEnumToLowerCase<