mirror of
https://github.com/datahub-project/datahub.git
synced 2025-08-15 04:37:03 +00:00
feat: add docs on column-level linage (#8062)
This commit is contained in:
parent
820981072f
commit
a8b622a016
@ -12,7 +12,8 @@ For more information about lineage, refer to [About DataHub Lineage](/docs/linea
|
|||||||
|
|
||||||
This guide will show you how to
|
This guide will show you how to
|
||||||
|
|
||||||
- Add lineage between two hive datasets.
|
- Add lineage between datasets.
|
||||||
|
- Add column-level lineage between datasets.
|
||||||
|
|
||||||
## Prerequisites
|
## Prerequisites
|
||||||
|
|
||||||
@ -112,3 +113,21 @@ Expected Response:
|
|||||||
You can now see the lineage between `fct_users_deleted` and `logging_events`.
|
You can now see the lineage between `fct_users_deleted` and `logging_events`.
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
|
## Add Column-level Lineage
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="python" label="Python">
|
||||||
|
|
||||||
|
```python
|
||||||
|
{{ inline /metadata-ingestion/examples/library/lineage_emitter_dataset_finegrained_sample.py show_path_as_comment }}
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
### Expected Outcome of Adding Column Level Lineage
|
||||||
|
|
||||||
|
You can now see the column-level lineage between datasets. Note that you have to enable `Show Columns` to be able to see the column-level lineage.
|
||||||
|
|
||||||
|

|
||||||
|
BIN
docs/imgs/apis/tutorials/column-level-lineage-added.png
Normal file
BIN
docs/imgs/apis/tutorials/column-level-lineage-added.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 129 KiB |
@ -0,0 +1,53 @@
|
|||||||
|
import datahub.emitter.mce_builder as builder
|
||||||
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
||||||
|
from datahub.emitter.rest_emitter import DatahubRestEmitter
|
||||||
|
from datahub.metadata.com.linkedin.pegasus2avro.dataset import (
|
||||||
|
DatasetLineageType,
|
||||||
|
FineGrainedLineage,
|
||||||
|
FineGrainedLineageDownstreamType,
|
||||||
|
FineGrainedLineageUpstreamType,
|
||||||
|
Upstream,
|
||||||
|
UpstreamLineage,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def datasetUrn(tbl):
|
||||||
|
return builder.make_dataset_urn("hive", tbl)
|
||||||
|
|
||||||
|
|
||||||
|
def fldUrn(tbl, fld):
|
||||||
|
return builder.make_schema_field_urn(datasetUrn(tbl), fld)
|
||||||
|
|
||||||
|
|
||||||
|
fineGrainedLineages = [
|
||||||
|
FineGrainedLineage(
|
||||||
|
upstreamType=FineGrainedLineageUpstreamType.FIELD_SET,
|
||||||
|
upstreams=[
|
||||||
|
fldUrn("fct_users_deleted", "browser_id"),
|
||||||
|
fldUrn("fct_users_created", "user_id"),
|
||||||
|
],
|
||||||
|
downstreamType=FineGrainedLineageDownstreamType.FIELD,
|
||||||
|
downstreams=[fldUrn("logging_events", "browser")],
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
# this is just to check if any conflicts with existing Upstream, particularly the DownstreamOf relationship
|
||||||
|
upstream = Upstream(
|
||||||
|
dataset=datasetUrn("fct_users_deleted"), type=DatasetLineageType.TRANSFORMED
|
||||||
|
)
|
||||||
|
|
||||||
|
fieldLineages = UpstreamLineage(
|
||||||
|
upstreams=[upstream], fineGrainedLineages=fineGrainedLineages
|
||||||
|
)
|
||||||
|
|
||||||
|
lineageMcp = MetadataChangeProposalWrapper(
|
||||||
|
entityUrn=datasetUrn("logging_events"),
|
||||||
|
aspect=fieldLineages,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create an emitter to the GMS REST API.
|
||||||
|
emitter = DatahubRestEmitter("http://localhost:8080")
|
||||||
|
|
||||||
|
# Emit metadata!
|
||||||
|
emitter.emit_mcp(lineageMcp)
|
Loading…
x
Reference in New Issue
Block a user