mirror of
https://github.com/datahub-project/datahub.git
synced 2025-11-02 19:58:59 +00:00
feat(ingest/kakfa-connect): support generic connectors (#6173)
Co-authored-by: Harshal Sheth <hsheth2@gmail.com>
This commit is contained in:
parent
18df38e504
commit
64c28a2316
@ -37,6 +37,12 @@ class ProvidedConfig(ConfigModel):
|
|||||||
value: str
|
value: str
|
||||||
|
|
||||||
|
|
||||||
|
class GenericConnectorConfig(ConfigModel):
|
||||||
|
connector_name: str
|
||||||
|
source_dataset: str
|
||||||
|
source_platform: str
|
||||||
|
|
||||||
|
|
||||||
class KafkaConnectSourceConfig(DatasetLineageProviderConfigBase):
|
class KafkaConnectSourceConfig(DatasetLineageProviderConfigBase):
|
||||||
# See the Connect REST Interface for details
|
# See the Connect REST Interface for details
|
||||||
# https://docs.confluent.io/platform/current/connect/references/restapi.html#
|
# https://docs.confluent.io/platform/current/connect/references/restapi.html#
|
||||||
@ -67,6 +73,10 @@ class KafkaConnectSourceConfig(DatasetLineageProviderConfigBase):
|
|||||||
default=None,
|
default=None,
|
||||||
description='Platform instance mapping to use when constructing URNs. e.g.`platform_instance_map: { "hive": "warehouse" }`',
|
description='Platform instance mapping to use when constructing URNs. e.g.`platform_instance_map: { "hive": "warehouse" }`',
|
||||||
)
|
)
|
||||||
|
generic_connectors: List[GenericConnectorConfig] = Field(
|
||||||
|
default=[],
|
||||||
|
description="Provide lineage graph for sources connectors other than Confluent JDBC Source Connector or Debezium Source Connector",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@ -836,6 +846,7 @@ class KafkaConnectSource(Source):
|
|||||||
|
|
||||||
- works only for
|
- works only for
|
||||||
- JDBC and Debezium source connectors
|
- JDBC and Debezium source connectors
|
||||||
|
- Generic connectors with user-defined lineage graph
|
||||||
- BigQuery sink connector
|
- BigQuery sink connector
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@ -923,20 +934,35 @@ class KafkaConnectSource(Source):
|
|||||||
config=self.config,
|
config=self.config,
|
||||||
report=self.report,
|
report=self.report,
|
||||||
).connector_manifest
|
).connector_manifest
|
||||||
|
elif connector_manifest.config.get("connector.class", "").startswith(
|
||||||
|
"io.debezium.connector"
|
||||||
|
):
|
||||||
|
connector_manifest = DebeziumSourceConnector(
|
||||||
|
connector_manifest=connector_manifest, config=self.config
|
||||||
|
).connector_manifest
|
||||||
else:
|
else:
|
||||||
# Debezium Source Connector lineages
|
# Find the target connector object in the list, or log an error if unknown.
|
||||||
try:
|
target_connector = None
|
||||||
connector_manifest = DebeziumSourceConnector(
|
for connector in self.config.generic_connectors:
|
||||||
connector_manifest=connector_manifest, config=self.config
|
if connector.connector_name == connector_manifest.name:
|
||||||
).connector_manifest
|
target_connector = connector
|
||||||
|
break
|
||||||
except ValueError as err:
|
if not target_connector:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"Skipping connector {connector_manifest.name} due to error: {err}"
|
f"Detected undefined connector {connector_manifest.name}, which is not in the customized connector list. Please refer to Kafka Connect ingestion recipe to define this customized connector."
|
||||||
)
|
)
|
||||||
self.report.report_failure(connector_manifest.name, str(err))
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
for topic in topics:
|
||||||
|
lineage = KafkaConnectLineage(
|
||||||
|
source_dataset=target_connector.source_dataset,
|
||||||
|
source_platform=target_connector.source_platform,
|
||||||
|
target_dataset=topic,
|
||||||
|
target_platform="kafka",
|
||||||
|
)
|
||||||
|
|
||||||
|
connector_manifest.lineages.append(lineage)
|
||||||
|
|
||||||
if connector_manifest.type == "sink":
|
if connector_manifest.type == "sink":
|
||||||
if connector_manifest.config.get("connector.class").__eq__(
|
if connector_manifest.config.get("connector.class").__eq__(
|
||||||
"com.wepay.kafka.connect.bigquery.BigQuerySinkConnector"
|
"com.wepay.kafka.connect.bigquery.BigQuerySinkConnector"
|
||||||
|
|||||||
@ -28,6 +28,8 @@ services:
|
|||||||
#
|
#
|
||||||
confluent-hub install --no-prompt confluentinc/connect-transforms:1.4.1
|
confluent-hub install --no-prompt confluentinc/connect-transforms:1.4.1
|
||||||
#
|
#
|
||||||
|
confluent-hub install --no-prompt confluentinc/kafka-connect-datagen:0.6.0
|
||||||
|
#
|
||||||
confluent-hub install --no-prompt debezium/debezium-connector-mysql:1.7.0
|
confluent-hub install --no-prompt debezium/debezium-connector-mysql:1.7.0
|
||||||
#
|
#
|
||||||
#confluent-hub install --no-prompt wepay/kafka-connect-bigquery:1.6.8
|
#confluent-hub install --no-prompt wepay/kafka-connect-bigquery:1.6.8
|
||||||
|
|||||||
@ -19,6 +19,10 @@ source:
|
|||||||
connect_to_platform_map: # optional
|
connect_to_platform_map: # optional
|
||||||
postgres_source: # optional - Connector name
|
postgres_source: # optional - Connector name
|
||||||
postgres: postgres1 # optional - Platform to instance map
|
postgres: postgres1 # optional - Platform to instance map
|
||||||
|
generic_connectors: # optional - Generic connector lineage info
|
||||||
|
- connector_name: generic-connector
|
||||||
|
source_dataset: generic-dataset
|
||||||
|
source_platform: generic-platform
|
||||||
|
|
||||||
# see https://datahubproject.io/docs/metadata-ingestion/sink_docs/datahub for complete documentation
|
# see https://datahubproject.io/docs/metadata-ingestion/sink_docs/datahub for complete documentation
|
||||||
sink:
|
sink:
|
||||||
|
|||||||
@ -215,6 +215,29 @@ def test_kafka_connect_ingest(docker_compose_runner, pytestconfig, tmp_path, moc
|
|||||||
}""",
|
}""",
|
||||||
)
|
)
|
||||||
assert r.status_code == 201 # Created
|
assert r.status_code == 201 # Created
|
||||||
|
|
||||||
|
# Creating Generic source
|
||||||
|
r = requests.post(
|
||||||
|
"http://localhost:58083/connectors",
|
||||||
|
headers={"Content-Type": "application/json"},
|
||||||
|
data="""{
|
||||||
|
"name": "generic_source",
|
||||||
|
"config": {
|
||||||
|
"connector.class": "io.confluent.kafka.connect.datagen.DatagenConnector",
|
||||||
|
"kafka.topic": "my-topic",
|
||||||
|
"quickstart": "product",
|
||||||
|
"key.converter": "org.apache.kafka.connect.storage.StringConverter",
|
||||||
|
"value.converter": "org.apache.kafka.connect.json.JsonConverter",
|
||||||
|
"value.converter.schemas.enable": "false",
|
||||||
|
"max.interval": 1000,
|
||||||
|
"iterations": 10000000,
|
||||||
|
"tasks.max": "1"
|
||||||
|
}
|
||||||
|
}""",
|
||||||
|
)
|
||||||
|
r.raise_for_status()
|
||||||
|
assert r.status_code == 201 # Created
|
||||||
|
|
||||||
# Give time for connectors to process the table data
|
# Give time for connectors to process the table data
|
||||||
time.sleep(60)
|
time.sleep(60)
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user