mirror of
https://github.com/datahub-project/datahub.git
synced 2025-11-12 01:11:41 +00:00
113 lines
3.5 KiB
Python
113 lines
3.5 KiB
Python
# metadata-ingestion/examples/library/notebook_create.py
|
|
import logging
|
|
import time
|
|
from typing import Dict, Optional
|
|
|
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
from datahub.emitter.rest_emitter import DatahubRestEmitter
|
|
from datahub.metadata.schema_classes import (
|
|
AuditStampClass,
|
|
ChangeAuditStampsClass,
|
|
NotebookInfoClass,
|
|
)
|
|
|
|
log = logging.getLogger(__name__)
|
|
logging.basicConfig(level=logging.INFO)
|
|
|
|
|
|
def create_notebook_metadata(
|
|
notebook_urn: str,
|
|
title: str,
|
|
description: str,
|
|
external_url: str,
|
|
custom_properties: Optional[Dict[str, str]] = None,
|
|
actor: str = "urn:li:corpuser:data_scientist",
|
|
timestamp_millis: Optional[int] = None,
|
|
) -> MetadataChangeProposalWrapper:
|
|
"""
|
|
Create metadata for a notebook entity.
|
|
|
|
Args:
|
|
notebook_urn: URN of the notebook
|
|
title: Title of the notebook
|
|
description: Description of the notebook
|
|
external_url: URL to access the notebook
|
|
custom_properties: Optional dictionary of custom properties
|
|
actor: URN of the actor creating the notebook
|
|
timestamp_millis: Optional timestamp in milliseconds (defaults to current time)
|
|
|
|
Returns:
|
|
MetadataChangeProposalWrapper containing the notebook metadata
|
|
"""
|
|
timestamp_millis = timestamp_millis or int(time.time() * 1000)
|
|
|
|
audit_stamp = AuditStampClass(time=timestamp_millis, actor=actor)
|
|
|
|
notebook_info = NotebookInfoClass(
|
|
title=title,
|
|
description=description,
|
|
externalUrl=external_url,
|
|
customProperties=custom_properties or {},
|
|
changeAuditStamps=ChangeAuditStampsClass(
|
|
created=audit_stamp,
|
|
lastModified=audit_stamp,
|
|
),
|
|
)
|
|
|
|
return MetadataChangeProposalWrapper(
|
|
entityUrn=notebook_urn,
|
|
aspect=notebook_info,
|
|
)
|
|
|
|
|
|
def main(emitter: Optional[DatahubRestEmitter] = None) -> None:
|
|
"""
|
|
Main function to create a notebook example.
|
|
|
|
Args:
|
|
emitter: Optional emitter to use (for testing). If not provided, creates a new one.
|
|
|
|
Environment Variables:
|
|
DATAHUB_GMS_URL: DataHub GMS server URL (default: http://localhost:8080)
|
|
DATAHUB_GMS_TOKEN: DataHub access token (if authentication is required)
|
|
"""
|
|
if emitter is None:
|
|
import os
|
|
|
|
gms_server = os.getenv("DATAHUB_GMS_URL", "http://localhost:8080")
|
|
token = os.getenv("DATAHUB_GMS_TOKEN")
|
|
|
|
# If no token in env, try to get from datahub config
|
|
if not token:
|
|
try:
|
|
from datahub.ingestion.graph.client import get_default_graph
|
|
|
|
graph = get_default_graph()
|
|
token = graph.config.token
|
|
except Exception:
|
|
# Fall back to no token
|
|
pass
|
|
|
|
emitter = DatahubRestEmitter(gms_server=gms_server, token=token)
|
|
|
|
notebook_urn = "urn:li:notebook:(querybook,customer_analysis_2024)"
|
|
|
|
event = create_notebook_metadata(
|
|
notebook_urn=notebook_urn,
|
|
title="Customer Segmentation Analysis 2024",
|
|
description="Comprehensive analysis of customer segments including RFM analysis, cohort analysis, and predictive scoring for marketing campaigns",
|
|
external_url="https://querybook.company.com/notebook/customer_analysis_2024",
|
|
custom_properties={
|
|
"workspace": "analytics",
|
|
"team": "growth",
|
|
"last_run": "2024-01-15T10:30:00Z",
|
|
},
|
|
)
|
|
|
|
emitter.emit(event)
|
|
log.info(f"Created notebook {notebook_urn}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|