diff --git a/openmetadata-docs/content/partials/v1.4/connectors/pipeline/connectors-list.md b/openmetadata-docs/content/partials/v1.4/connectors/pipeline/connectors-list.md index 88fc4e9dc3d..c96d1d03386 100644 --- a/openmetadata-docs/content/partials/v1.4/connectors/pipeline/connectors-list.md +++ b/openmetadata-docs/content/partials/v1.4/connectors/pipeline/connectors-list.md @@ -7,6 +7,7 @@ {% connectorInfoCard name="Domo" stage="PROD" href="/connectors/pipeline/domo-pipeline" platform="OpenMetadata" / %} {% connectorInfoCard name="Fivetran" stage="PROD" href="/connectors/pipeline/fivetran" platform="OpenMetadata" / %} {% connectorInfoCard name="Glue" stage="PROD" href="/connectors/pipeline/glue-pipeline" platform="OpenMetadata" / %} +{% connectorInfoCard name="KafkaConnect" stage="PROD" href="/connectors/pipeline/kafkaconnect" platform="OpenMetadata" / %} {% connectorInfoCard name="NiFi" stage="PROD" href="/connectors/pipeline/nifi" platform="OpenMetadata" / %} {% connectorInfoCard name="Spline" stage="BETA" href="/connectors/pipeline/spline" platform="OpenMetadata" / %} diff --git a/openmetadata-docs/content/partials/v1.4/connectors/yaml/pipeline/source-config.md b/openmetadata-docs/content/partials/v1.4/connectors/yaml/pipeline/source-config.md index b1afdcc1a72..a35f161e9c9 100644 --- a/openmetadata-docs/content/partials/v1.4/connectors/yaml/pipeline/source-config.md +++ b/openmetadata-docs/content/partials/v1.4/connectors/yaml/pipeline/source-config.md @@ -2,6 +2,9 @@ sourceConfig: config: type: PipelineMetadata + # lineageInformation: + # dbServiceNames: [] + # storageServiceNames: [] # markDeletedPipelines: True # includeTags: True # includeLineage: true diff --git a/openmetadata-docs/content/v1.4.x-SNAPSHOT/connectors/pipeline/kafkaconnect/index.md b/openmetadata-docs/content/v1.4.x-SNAPSHOT/connectors/pipeline/kafkaconnect/index.md new file mode 100644 index 00000000000..c3dd99239b4 --- /dev/null +++ b/openmetadata-docs/content/v1.4.x-SNAPSHOT/connectors/pipeline/kafkaconnect/index.md @@ -0,0 +1,105 @@ +--- +title: KafkaConnect +slug: /connectors/pipeline/kafkaconnect +--- + +{% connectorDetailsHeader +name="KafkaConnect" +stage="PROD" +platform="Collate" +availableFeatures=["Pipelines", "Pipeline Status", "Lineage"] +unavailableFeatures=["Owners", "Tags"] +/ %} + + +In this section, we provide guides and references to use the KafkaConnect connector. + +Configure and schedule KafkaConnect metadata and profiler workflows from the OpenMetadata UI: + +- [Requirements](#requirements) + - [KafkaConnect Versions](#kafkaconnect-versions) +- [Metadata Ingestion](#metadata-ingestion) + - [Service Name](#service-name) + - [Connection Details](#connection-details) + - [Metadata Ingestion Options](#metadata-ingestion-options) +- [Troubleshooting](#troubleshooting) + - [Workflow Deployment Error](#workflow-deployment-error) + +{% partial file="/v1.4/connectors/ingestion-modes-tiles.md" variables={yamlPath: "/connectors/pipeline/kafkaconnect/yaml"} /%} + +## Requirements + +### KafkaConnect Versions + +OpenMetadata is integrated with kafkaconnect up to version [3.6.1](https://docs.kafkaconnect.io/getting-started) and will continue to work for future kafkaconnect versions. + +The ingestion framework uses [kafkaconnect python client](https://libraries.io/pypi/kafka-connect-py) to connect to the kafkaconnect instance and perform the API calls + +## Metadata Ingestion + +{% partial + file="/v1.4/connectors/metadata-ingestion-ui.md" + variables={ + connector: "KafkaConnect", + selectServicePath: "/images/v1.4/connectors/kafkaconnect/select-service.webp", + addNewServicePath: "/images/v1.4/connectors/kafkaconnect/add-new-service.webp", + serviceConnectionPath: "/images/v1.4/connectors/kafkaconnect/service-connection.webp", + } +/%} + +{% stepsContainer %} +{% extraContent parentTagName="stepsContainer" %} + +#### Connection Details + +- **Host and Port**: The hostname or IP address of the Kafka Connect worker with the REST API enabled eg.`https://localhost:8083` or `https://127.0.0.1:8083` or `https://` + +- **Kafka Connect Config**: OpenMetadata supports username/password. + 1. Basic Authentication + - Username: Username to connect to Kafka Connect. This user should be able to send request to the Kafka Connect API and access the [Rest API](https://docs.confluent.io/platform/current/connect/references/restapi.html) GET endpoints. + - Password: Password to connect to Kafka Connect. + +- **verifySSL** : Whether SSL verification should be perform when authenticating. + +- **Kafka Service Name** : The Service Name of the Ingested [Kafka](/connectors/messaging/kafka#4.-name-and-describe-your-service) instance associated with this KafkaConnect instance. + +{% /extraContent %} + +{% partial file="/v1.4/connectors/test-connection.md" /%} + +{% partial file="/v1.4/connectors/pipeline/configure-ingestion.md" /%} + +{% partial file="/v1.4/connectors/ingestion-schedule-and-deploy.md" /%} + +{% /stepsContainer %} + +## Displaying Lineage Information +Steps to retrieve and display the lineage information for a Kafka Connect service. +1. Ingest Kafka Messaging Service Metadata: Identify the Kafka messaging service associated with the Kafka Connect service .Ensure all connected topics are comprehensively ingested. +2. Ingest Source and Sink Database/Storage System Metadata: Identify both the source and sink database or storage systems used by the Kafka Connect service. Ingest metadata for these database or storage systems +3. Ingest Kafka Connect Service Metadata: Finally, Ingest your Kafka Connect service. + +By successfully completing these steps, the lineage information for the service will be displayed. + +{% image + src="/images/v1.4/connectors/kafkaconnect/lineage.webp" + alt="Kafkaconnect Lineage" /%} + + +## Supported Connectors +Currently, the following source and sink connectors for Kafka Connect are supported for lineage tracking: +* [MySQL](/connectors/database/mysql) +* [PostgreSQL](/connectors/database/postgres) +* [MSSQL](/connectors/database/mssql) +* [MongoDB](/connectors/database/mongodb) +* [Amazon S3](/connectors/storage/s3) + +For these connectors, lineage information can be obtained provided they are configured with a source or sink and the corresponding metadata ingestion is enabled. + +{% partial file="/v1.4/connectors/troubleshooting.md" /%} + +### Missing Lineage +If lineage information is not displayed for a Kafka Connect service, follow these steps to diagnose the issue. +1. *Kafka Service Association*: Make sure the Kafka service that the data is being ingested from is associated with this Kafka Connect service. Additionally, verify that the correct name is passed on in the Kafka Service Name field during configuration. This field helps establish the lineage between the Kafka service and the Kafka Connect flow. +2. *Source and Sink Configuration*: Verify that the Kafka Connect connector associated with the service is configured with a source and/or sink database or storage system. Connectors without a defined source or sink cannot provide lineage data. +3. *Metadata Ingestion*: Ensure that metadata for both the source and sink database/storage systems is ingested and passed to the lineage system. This typically involves configuring the relevant connectors to capture and transmit this information. diff --git a/openmetadata-docs/content/v1.4.x-SNAPSHOT/connectors/pipeline/kafkaconnect/yaml.md b/openmetadata-docs/content/v1.4.x-SNAPSHOT/connectors/pipeline/kafkaconnect/yaml.md new file mode 100644 index 00000000000..912b3f2a5d2 --- /dev/null +++ b/openmetadata-docs/content/v1.4.x-SNAPSHOT/connectors/pipeline/kafkaconnect/yaml.md @@ -0,0 +1,121 @@ +--- +title: Run the KafkaConnect Connector Externally +slug: /connectors/pipeline/kafkaconnect/yaml +--- + +{% connectorDetailsHeader +name="KafkaConnect" +stage="PROD" +platform="Collate" +availableFeatures=["Pipelines", "Pipeline Status", "Tags"] +unavailableFeatures=["Owners", "Lineage"] +/ %} + + +In this section, we provide guides and references to use the KafkaConnect connector. + +Configure and schedule KafkaConnect metadata and profiler workflows from the OpenMetadata UI: + +- [Requirements](#requirements) +- [Metadata Ingestion](#metadata-ingestion) + +{% partial file="/v1.3/connectors/external-ingestion-deployment.md" /%} + +## Requirements + +### Python Requirements + +To run the KafkaConnect ingestion, you will need to install: + +```bash +pip3 install "openmetadata-ingestion[kafkaconnect]" +``` + +## Metadata Ingestion + +All connectors are defined as JSON Schemas. +[Here](https://github.com/open-metadata/OpenMetadata/blob/main/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/pipeline/kafkaConnectConnection.json) +you can find the structure to create a connection to KafkaConnect. + +In order to create and run a Metadata Ingestion workflow, we will follow +the steps to create a YAML configuration able to connect to the source, +process the Entities if needed, and reach the OpenMetadata server. + +The workflow is modeled around the following +[JSON Schema](https://github.com/open-metadata/OpenMetadata/blob/main/openmetadata-spec/src/main/resources/json/schema/metadataIngestion/workflow.json) + +### 1. Define the YAML Config + +This is a sample config for KafkaConnect: + +{% codePreview %} + +{% codeInfoContainer %} + +#### Source Configuration - Service Connection + +{% codeInfo srNumber=1 %} + +**hostPort**: The hostname or IP address of the Kafka Connect worker with the REST API enabled + +{% /codeInfo %} + +{% codeInfo srNumber=2 %} + +**verifySSL**: Whether SSL verification should be perform when authenticating. + +{% /codeInfo %} + +{% codeInfo srNumber=3 %} + +**Kafka Connect Config**: OpenMetadata supports username/password or no Authentication. + +*Basic Authentication* + - Username: Username to connect to Kafka Connect. This user should be able to send request to the Kafka Connect API and access the [Rest API](https://docs.confluent.io/platform/current/connect/references/restapi.html) GET endpoints. + - Password: Password to connect to Kafka Connect. + +{% /codeInfo %} + + +{% partial file="/v1.3/connectors/yaml/pipeline/source-config-def.md" /%} + +{% partial file="/v1.3/connectors/yaml/ingestion-sink-def.md" /%} + +{% partial file="/v1.3/connectors/yaml/workflow-config-def.md" /%} + +{% /codeInfoContainer %} + +{% codeBlock fileName="filename.yaml" %} + + +```yaml +source: + type: kafkaconnect + serviceName: kafka_connect_source + serviceConnection: + config: + type: KafkaConnect +``` +```yaml {% srNumber=1 %} + hostPort: "https://" # or http://localhost:8083 or http://127.0.0.1:8083 +``` +```yaml {% srNumber=2 %} + verifySSL: true +``` +```yaml {% srNumber=3 %} + authType: + username: username + password: password +``` + +{% partial file="/v1.3/connectors/yaml/pipeline/source-config.md" /%} + +{% partial file="/v1.3/connectors/yaml/ingestion-sink.md" /%} + +{% partial file="/v1.3/connectors/yaml/workflow-config.md" /%} + +{% /codeBlock %} + +{% /codePreview %} + +{% partial file="/v1.3/connectors/yaml/ingestion-cli.md" /%} diff --git a/openmetadata-docs/content/v1.4.x-SNAPSHOT/menu.md b/openmetadata-docs/content/v1.4.x-SNAPSHOT/menu.md index 04e80af1f51..8c506afc52e 100644 --- a/openmetadata-docs/content/v1.4.x-SNAPSHOT/menu.md +++ b/openmetadata-docs/content/v1.4.x-SNAPSHOT/menu.md @@ -704,6 +704,10 @@ site_menu: url: /connectors/pipeline/dagster - category: Connectors / Pipeline / Dagster / Run Externally url: /connectors/pipeline/dagster/yaml + - category: Connectors / Pipeline / KafkaConnect + url: /connectors/pipeline/kafkaconnect + - category: Connectors / Pipeline / KafkaConnect / Run Externally + url: /connectors/pipeline/kafkaconnect/yaml - category: Connectors / Pipeline / Databricks Pipeline url: /connectors/pipeline/databricks-pipeline - category: Connectors / Pipeline / Databricks Pipeline / Run Externally diff --git a/openmetadata-docs/images/v1.4/connectors/kafkaconnect/add-new-service.webp b/openmetadata-docs/images/v1.4/connectors/kafkaconnect/add-new-service.webp new file mode 100644 index 00000000000..5d12056eafa Binary files /dev/null and b/openmetadata-docs/images/v1.4/connectors/kafkaconnect/add-new-service.webp differ diff --git a/openmetadata-docs/images/v1.4/connectors/kafkaconnect/lineage.webp b/openmetadata-docs/images/v1.4/connectors/kafkaconnect/lineage.webp new file mode 100644 index 00000000000..c5f0119e15b Binary files /dev/null and b/openmetadata-docs/images/v1.4/connectors/kafkaconnect/lineage.webp differ diff --git a/openmetadata-docs/images/v1.4/connectors/kafkaconnect/select-service.webp b/openmetadata-docs/images/v1.4/connectors/kafkaconnect/select-service.webp new file mode 100644 index 00000000000..e43d54ccf02 Binary files /dev/null and b/openmetadata-docs/images/v1.4/connectors/kafkaconnect/select-service.webp differ diff --git a/openmetadata-docs/images/v1.4/connectors/kafkaconnect/service-connection.webp b/openmetadata-docs/images/v1.4/connectors/kafkaconnect/service-connection.webp new file mode 100644 index 00000000000..efa8bd69b4d Binary files /dev/null and b/openmetadata-docs/images/v1.4/connectors/kafkaconnect/service-connection.webp differ