mirror of
https://github.com/datahub-project/datahub.git
synced 2025-11-04 20:58:36 +00:00
doc(ingestion/airflow-plugin): update for developers (#10633)
This commit is contained in:
parent
e842161849
commit
12455f95d7
@ -69,7 +69,7 @@ enabled = True # default
|
|||||||
| -------------------------- | -------------------- | ---------------------------------------------------------------------------------------- |
|
| -------------------------- | -------------------- | ---------------------------------------------------------------------------------------- |
|
||||||
| enabled | true | If the plugin should be enabled. |
|
| enabled | true | If the plugin should be enabled. |
|
||||||
| conn_id | datahub_rest_default | The name of the datahub rest connection. |
|
| conn_id | datahub_rest_default | The name of the datahub rest connection. |
|
||||||
| cluster | prod | name of the airflow cluster |
|
| cluster | prod | name of the airflow cluster, this is equivalent to the `env` of the instance |
|
||||||
| capture_ownership_info | true | Extract DAG ownership. |
|
| capture_ownership_info | true | Extract DAG ownership. |
|
||||||
| capture_tags_info | true | Extract DAG tags. |
|
| capture_tags_info | true | Extract DAG tags. |
|
||||||
| capture_executions | true | Extract task runs and success/failure statuses. This will show up in DataHub "Runs" tab. |
|
| capture_executions | true | Extract task runs and success/failure statuses. This will show up in DataHub "Runs" tab. |
|
||||||
|
|||||||
@ -34,7 +34,30 @@ cd metadata-ingestion-modules/airflow-plugin
|
|||||||
../../gradlew :metadata-ingestion-modules:airflow-plugin:installDev
|
../../gradlew :metadata-ingestion-modules:airflow-plugin:installDev
|
||||||
source venv/bin/activate
|
source venv/bin/activate
|
||||||
datahub version # should print "DataHub CLI version: unavailable (installed in develop mode)"
|
datahub version # should print "DataHub CLI version: unavailable (installed in develop mode)"
|
||||||
|
|
||||||
|
# start the airflow web server
|
||||||
|
export AIRFLOW_HOME=~/airflow
|
||||||
|
airflow webserver --port 8090 -d
|
||||||
|
|
||||||
|
# start the airflow scheduler
|
||||||
|
airflow scheduler
|
||||||
|
|
||||||
|
# access the airflow service and run any of the DAG
|
||||||
|
# open http://localhost:8090/
|
||||||
|
# select any DAG and click on the `play arrow` button to start the DAG
|
||||||
|
|
||||||
|
# add the debug lines in the codebase, i.e. in ./src/datahub_airflow_plugin/datahub_listener.py
|
||||||
|
logger.debug("this is the sample debug line")
|
||||||
|
|
||||||
|
# run the DAG again and you can see the debug lines in the task_run log at,
|
||||||
|
#1. click on the `timestamp` in the `Last Run` column
|
||||||
|
#2. select the task
|
||||||
|
#3. click on the `log` option
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
> **P.S. if you are not able to see the log lines, then restart the `airflow scheduler` and rerun the DAG**
|
||||||
|
|
||||||
### (Optional) Set up your Python environment for developing on Dagster Plugin
|
### (Optional) Set up your Python environment for developing on Dagster Plugin
|
||||||
|
|
||||||
From the repository root:
|
From the repository root:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user