mirror of
				https://github.com/datahub-project/datahub.git
				synced 2025-11-04 04:39:10 +00:00 
			
		
		
		
	doc(ingestion/airflow-plugin): update for developers (#10633)
This commit is contained in:
		
							parent
							
								
									e842161849
								
							
						
					
					
						commit
						12455f95d7
					
				@ -69,7 +69,7 @@ enabled = True  # default
 | 
			
		||||
| -------------------------- | -------------------- | ---------------------------------------------------------------------------------------- |
 | 
			
		||||
| enabled                    | true                 | If the plugin should be enabled.                                                         |
 | 
			
		||||
| conn_id                    | datahub_rest_default | The name of the datahub rest connection.                                                 |
 | 
			
		||||
| cluster                    | prod                 | name of the airflow cluster                                                              |
 | 
			
		||||
| cluster                    | prod                 | name of the airflow cluster, this is equivalent to the `env` of the instance                                                            |
 | 
			
		||||
| capture_ownership_info     | true                 | Extract DAG ownership.                                                                   |
 | 
			
		||||
| capture_tags_info          | true                 | Extract DAG tags.                                                                        |
 | 
			
		||||
| capture_executions         | true                 | Extract task runs and success/failure statuses. This will show up in DataHub "Runs" tab. |
 | 
			
		||||
 | 
			
		||||
@ -34,7 +34,30 @@ cd metadata-ingestion-modules/airflow-plugin
 | 
			
		||||
../../gradlew :metadata-ingestion-modules:airflow-plugin:installDev
 | 
			
		||||
source venv/bin/activate
 | 
			
		||||
datahub version  # should print "DataHub CLI version: unavailable (installed in develop mode)"
 | 
			
		||||
 | 
			
		||||
# start the airflow web server
 | 
			
		||||
export AIRFLOW_HOME=~/airflow
 | 
			
		||||
airflow webserver --port 8090 -d
 | 
			
		||||
 | 
			
		||||
# start the airflow scheduler
 | 
			
		||||
airflow scheduler
 | 
			
		||||
 | 
			
		||||
# access the airflow service and run any of the DAG
 | 
			
		||||
# open http://localhost:8090/
 | 
			
		||||
# select any DAG and click on the `play arrow` button to start the DAG
 | 
			
		||||
 | 
			
		||||
# add the debug lines in the codebase, i.e. in ./src/datahub_airflow_plugin/datahub_listener.py
 | 
			
		||||
logger.debug("this is the sample debug line")
 | 
			
		||||
 | 
			
		||||
# run the DAG again and you can see the debug lines in the task_run log at,
 | 
			
		||||
#1. click on the `timestamp` in the `Last Run` column
 | 
			
		||||
#2. select the task
 | 
			
		||||
#3. click on the `log` option
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
> **P.S. if you are not able to see the log lines, then restart the `airflow scheduler` and rerun the DAG**
 | 
			
		||||
 | 
			
		||||
### (Optional) Set up your Python environment for developing on Dagster Plugin
 | 
			
		||||
 | 
			
		||||
From the repository root:
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user