diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py b/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py index a0bed4ae9a..30e8164383 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py @@ -248,6 +248,9 @@ class GlueSourceReport(StaleEntityRemovalSourceReport): "Enabled by default when stateful ingestion is turned on.", ) @capability(SourceCapability.LINEAGE_COARSE, "Enabled by default") +@capability( + SourceCapability.LINEAGE_FINE, "Support via the `emit_s3_lineage` config field" +) class GlueSource(StatefulIngestionSourceBase): """ Note: if you also have files in S3 that you'd like to ingest, we recommend you use Glue's built-in data catalog. See [here](../../../../docs/generated/ingestion/sources/s3.md) for a quick guide on how to set up a crawler on Glue and ingest the outputs with DataHub. @@ -284,12 +287,22 @@ class GlueSource(StatefulIngestionSourceBase): "Action": [ "glue:GetDataflowGraph", "glue:GetJobs", + "s3:GetObject", ], "Resource": "*" } ``` - plus `s3:GetObject` for the job script locations. + For profiling datasets, the following additional permissions are required: + ```json + { + "Effect": "Allow", + "Action": [ + "glue:GetPartitions", + ], + "Resource": "*" + } + ``` """