mirror of
https://github.com/datahub-project/datahub.git
synced 2025-08-22 16:18:10 +00:00
feat(ingest): add Kafka-based emitter example (#2412)
This commit is contained in:
parent
43075c4a91
commit
27a672e697
@ -518,10 +518,8 @@ sink:
|
|||||||
|
|
||||||
In some cases, you might want to construct the MetadataChangeEvents yourself but still use this framework to emit that metadata to DataHub. In this case, take a look at the emitter interfaces, which can easily be imported and called from your own code.
|
In some cases, you might want to construct the MetadataChangeEvents yourself but still use this framework to emit that metadata to DataHub. In this case, take a look at the emitter interfaces, which can easily be imported and called from your own code.
|
||||||
|
|
||||||
- [DataHub emitter via REST](./src/datahub/emitter/rest_emitter.py) (same requirements as `datahub-rest`)
|
- [DataHub emitter via REST](./src/datahub/emitter/rest_emitter.py) (same requirements as `datahub-rest`). Basic usage [example](./examples/library/lineage_emitter_rest.py).
|
||||||
- [DataHub emitter via Kafka](./src/datahub/emitter/kafka_emitter.py) (same requirements as `datahub-kafka`)
|
- [DataHub emitter via Kafka](./src/datahub/emitter/kafka_emitter.py) (same requirements as `datahub-kafka`). Basic usage [example](./examples/library/lineage_emitter_kafka.py).
|
||||||
|
|
||||||
For a basic usage example, see the [lineage_emitter.py](./examples/library/lineage_emitter.py) example.
|
|
||||||
|
|
||||||
## Usage with Airflow
|
## Usage with Airflow
|
||||||
|
|
||||||
|
36
metadata-ingestion/examples/library/lineage_emitter_kafka.py
Normal file
36
metadata-ingestion/examples/library/lineage_emitter_kafka.py
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
import datahub.emitter.mce_builder as builder
|
||||||
|
from datahub.emitter.kafka_emitter import DatahubKafkaEmitter, KafkaEmitterConfig
|
||||||
|
|
||||||
|
# Construct a lineage object.
|
||||||
|
lineage_mce = builder.make_lineage_mce(
|
||||||
|
[
|
||||||
|
builder.make_dataset_urn("bigquery", "upstream1"),
|
||||||
|
builder.make_dataset_urn("bigquery", "upstream2"),
|
||||||
|
],
|
||||||
|
builder.make_dataset_urn("bigquery", "downstream"),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create an emitter to DataHub's Kafka broker.
|
||||||
|
emitter = DatahubKafkaEmitter(
|
||||||
|
KafkaEmitterConfig.parse_obj(
|
||||||
|
# This is the same config format as the standard Kafka sink's YAML.
|
||||||
|
{
|
||||||
|
"connection": {
|
||||||
|
"bootstrap": "broker:9092",
|
||||||
|
"producer_config": {},
|
||||||
|
"schema_registry_url": "http://schema-registry:8081",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# Emit metadata!
|
||||||
|
def callback(err, msg):
|
||||||
|
if err:
|
||||||
|
# Handle the metadata emission error.
|
||||||
|
print("error:", err)
|
||||||
|
|
||||||
|
|
||||||
|
emitter.emit_mce_async(lineage_mce, callback)
|
||||||
|
emitter.flush()
|
Loading…
x
Reference in New Issue
Block a user