fix: Add option for disabling ownership extraction (#11970)

Co-authored-by: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com>
This commit is contained in:
sagar-salvi-apptware 2024-11-29 21:28:31 +05:30 committed by GitHub
parent a46de1ecf9
commit c42f779859
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 29 additions and 16 deletions

View File

@ -309,7 +309,7 @@
"displayName": "Dremio",
"description": "Import Spaces, Sources, Tables and statistics from Dremio.",
"docsUrl": "https://datahubproject.io/docs/metadata-ingestion/",
"recipe": "source:\n type: dremio\n config:\n # Coordinates\n hostname: null\n port: null\n #true if https, otherwise false\n tls: true\n\n #For cloud instance\n #is_dremio_cloud: True\n #dremio_cloud_project_id: <project_id>\n\n #Credentials with personal access token\n authentication_method: PAT\n password: pass\n\n #Or Credentials with basic auth\n #authentication_method: password\n #username: null\n #password: null\n\n stateful_ingestion:\n enabled: true"
"recipe": "source:\n type: dremio\n config:\n # Coordinates\n hostname: null\n port: null\n #true if https, otherwise false\n tls: true\n\n #For cloud instance\n #is_dremio_cloud: True\n #dremio_cloud_project_id: <project_id>\n\n #Credentials with personal access token\n authentication_method: PAT\n password: pass\n\n #Or Credentials with basic auth\n #authentication_method: password\n #username: null\n #password: null\n\n ingest_owner: true\n\n stateful_ingestion:\n enabled: true"
},
{
"urn": "urn:li:dataPlatform:cassandra",

View File

@ -20,6 +20,8 @@ source:
include_query_lineage: True
ingest_owner: true
#Optional
source_mappings:
- platform: s3

View File

@ -142,6 +142,7 @@ class DremioAspects:
platform: str,
ui_url: str,
env: str,
ingest_owner: bool,
domain: Optional[str] = None,
platform_instance: Optional[str] = None,
):
@ -150,6 +151,7 @@ class DremioAspects:
self.env = env
self.domain = domain
self.ui_url = ui_url
self.ingest_owner = ingest_owner
def get_container_key(
self, name: Optional[str], path: Optional[List[str]]
@ -426,21 +428,23 @@ class DremioAspects:
return f'{self.ui_url}/{container_type}/{dataset_url_path}"{dataset.resource_name}"'
def _create_ownership(self, dataset: DremioDataset) -> Optional[OwnershipClass]:
if not dataset.owner:
return None
owner = (
make_user_urn(dataset.owner)
if dataset.owner_type == "USER"
else make_group_urn(dataset.owner)
)
return OwnershipClass(
owners=[
OwnerClass(
owner=owner,
type=OwnershipTypeClass.TECHNICAL_OWNER,
)
]
)
if self.ingest_owner and dataset.owner:
owner_urn = (
make_user_urn(dataset.owner)
if dataset.owner_type == "USER"
else make_group_urn(dataset.owner)
)
ownership: OwnershipClass = OwnershipClass(
owners=[
OwnerClass(
owner=owner_urn,
type=OwnershipTypeClass.TECHNICAL_OWNER,
)
]
)
return ownership
return None
def _create_glossary_terms(self, entity: DremioDataset) -> GlossaryTermsClass:
return GlossaryTermsClass(

View File

@ -174,3 +174,8 @@ class DremioSourceConfig(
default=False,
description="Whether to include query-based lineage information.",
)
ingest_owner: bool = Field(
default=True,
description="Ingest Owner from source. This will override Owner info entered from UI",
)

View File

@ -97,6 +97,7 @@ class DremioSource(StatefulIngestionSourceBase):
- Ownership and Glossary Terms:
- Metadata related to ownership of datasets, extracted from Dremios ownership model.
- Glossary terms and business metadata associated with datasets, providing additional context to the data.
- Note: Ownership information will only be available for the Cloud and Enterprise editions, it will not be available for the Community edition.
- Optional SQL Profiling (if enabled):
- Table, row, and column statistics can be profiled and ingested via optional SQL queries.
@ -123,6 +124,7 @@ class DremioSource(StatefulIngestionSourceBase):
self.dremio_aspects = DremioAspects(
platform=self.get_platform(),
domain=self.config.domain,
ingest_owner=self.config.ingest_owner,
platform_instance=self.config.platform_instance,
env=self.config.env,
ui_url=dremio_api.ui_url,