diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index e12f4c7e9f..b5380b20f2 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -6,6 +6,9 @@ This file documents any backwards-incompatible changes in DataHub and assists pe ### Breaking Changes - #7016 Add `add_database_name_to_urn` flag to Oracle source which ensure that Dataset urns have the DB name as a prefix to prevent collision (.e.g. {database}.{schema}.{table}). ONLY breaking if you set this flag to true, otherwise behavior remains the same. +- The Airflow plugin no longer includes the DataHub Kafka emitter by default. Use `pip install acryl-datahub-airflow-plugin[datahub-kafka]` for Kafka support. +- The Airflow lineage backend no longer includes the DataHub Kafka emitter by default. Use `pip install acryl-datahub[airflow,datahub-kafka]` for Kafka support. + ### Potential Downtime diff --git a/docs/lineage/airflow.md b/docs/lineage/airflow.md index 128423f0f0..745076f283 100644 --- a/docs/lineage/airflow.md +++ b/docs/lineage/airflow.md @@ -26,6 +26,12 @@ If you're using Airflow 1.x, use the Airflow lineage plugin with acryl-datahub-a pip install acryl-datahub-airflow-plugin ``` +:::note + +The [DataHub Rest](../../metadata-ingestion/sink_docs/datahub.md#datahub-rest) emitter is included in the plugin package by default. To use [DataHub Kafka](../../metadata-ingestion/sink_docs/datahub.md#datahub-kafka) install `pip install acryl-datahub-airflow-plugin[datahub-kafka]`. + +::: + 2. Disable lazy plugin loading in your airflow.cfg. On MWAA you should add this config to your [Apache Airflow configuration options](https://docs.aws.amazon.com/mwaa/latest/userguide/configuring-env-variables.html#configuring-2.0-airflow-override). @@ -89,6 +95,8 @@ If you are looking to run Airflow and DataHub using docker locally, follow the g ```shell pip install acryl-datahub[airflow] +# If you need the Kafka-based emitter/hook: +pip install acryl-datahub[airflow,datahub-kafka] ``` 2. You must configure an Airflow hook for Datahub. We support both a Datahub REST hook and a Kafka-based hook, but you only need one. diff --git a/metadata-ingestion-modules/airflow-plugin/setup.py b/metadata-ingestion-modules/airflow-plugin/setup.py index 8c6338c114..a5ec6c5b00 100644 --- a/metadata-ingestion-modules/airflow-plugin/setup.py +++ b/metadata-ingestion-modules/airflow-plugin/setup.py @@ -125,5 +125,6 @@ setuptools.setup( install_requires=list(base_requirements), extras_require={ "dev": list(dev_requirements), + "datahub-kafka": f"acryl-datahub[datahub-kafka] == {package_metadata['__version__']}", }, ) diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 5eda705392..a303bcd353 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -251,7 +251,6 @@ plugins: Dict[str, Set[str]] = { "airflow": { "apache-airflow >= 2.0.2", *rest_common, - *kafka_common, }, "circuit-breaker": { "gql>=3.3.0",