feat(ingest/tableau): support ingestion of access roles (#11157)

Co-authored-by: Yanik Häni <Yanik.Haeni1@swisscom.com>
Co-authored-by: Harshal Sheth <hsheth2@gmail.com>
This commit is contained in:
haeniya 2024-10-24 20:56:00 +02:00 committed by GitHub
parent b89ca3f081
commit 7c8dba4bd9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 44451 additions and 13 deletions

View File

@ -52,7 +52,8 @@ public class ContainerType
Constants.DEPRECATION_ASPECT_NAME, Constants.DEPRECATION_ASPECT_NAME,
Constants.DATA_PRODUCTS_ASPECT_NAME, Constants.DATA_PRODUCTS_ASPECT_NAME,
Constants.STRUCTURED_PROPERTIES_ASPECT_NAME, Constants.STRUCTURED_PROPERTIES_ASPECT_NAME,
Constants.FORMS_ASPECT_NAME); Constants.FORMS_ASPECT_NAME,
Constants.ACCESS_ASPECT_NAME);
private static final Set<String> FACET_FIELDS = ImmutableSet.of("origin", "platform"); private static final Set<String> FACET_FIELDS = ImmutableSet.of("origin", "platform");
private static final String ENTITY_NAME = "container"; private static final String ENTITY_NAME = "container";

View File

@ -2,6 +2,7 @@ package com.linkedin.datahub.graphql.types.container.mappers;
import static com.linkedin.metadata.Constants.*; import static com.linkedin.metadata.Constants.*;
import com.linkedin.common.Access;
import com.linkedin.common.DataPlatformInstance; import com.linkedin.common.DataPlatformInstance;
import com.linkedin.common.Deprecation; import com.linkedin.common.Deprecation;
import com.linkedin.common.Forms; import com.linkedin.common.Forms;
@ -30,6 +31,7 @@ import com.linkedin.datahub.graphql.types.common.mappers.util.SystemMetadataUtil
import com.linkedin.datahub.graphql.types.domain.DomainAssociationMapper; import com.linkedin.datahub.graphql.types.domain.DomainAssociationMapper;
import com.linkedin.datahub.graphql.types.form.FormsMapper; import com.linkedin.datahub.graphql.types.form.FormsMapper;
import com.linkedin.datahub.graphql.types.glossary.mappers.GlossaryTermsMapper; import com.linkedin.datahub.graphql.types.glossary.mappers.GlossaryTermsMapper;
import com.linkedin.datahub.graphql.types.rolemetadata.mappers.AccessMapper;
import com.linkedin.datahub.graphql.types.structuredproperty.StructuredPropertiesMapper; import com.linkedin.datahub.graphql.types.structuredproperty.StructuredPropertiesMapper;
import com.linkedin.datahub.graphql.types.tag.mappers.GlobalTagsMapper; import com.linkedin.datahub.graphql.types.tag.mappers.GlobalTagsMapper;
import com.linkedin.domain.Domains; import com.linkedin.domain.Domains;
@ -105,6 +107,11 @@ public class ContainerMapper {
context, new GlossaryTerms(envelopedTerms.getValue().data()), entityUrn)); context, new GlossaryTerms(envelopedTerms.getValue().data()), entityUrn));
} }
final EnvelopedAspect accessAspect = aspects.get(ACCESS_ASPECT_NAME);
if (accessAspect != null) {
result.setAccess(AccessMapper.map(new Access(accessAspect.getValue().data()), entityUrn));
}
final EnvelopedAspect envelopedInstitutionalMemory = final EnvelopedAspect envelopedInstitutionalMemory =
aspects.get(Constants.INSTITUTIONAL_MEMORY_ASPECT_NAME); aspects.get(Constants.INSTITUTIONAL_MEMORY_ASPECT_NAME);
if (envelopedInstitutionalMemory != null) { if (envelopedInstitutionalMemory != null) {

View File

@ -86,7 +86,7 @@ public class DatasetType
EMBED_ASPECT_NAME, EMBED_ASPECT_NAME,
DATA_PRODUCTS_ASPECT_NAME, DATA_PRODUCTS_ASPECT_NAME,
BROWSE_PATHS_V2_ASPECT_NAME, BROWSE_PATHS_V2_ASPECT_NAME,
ACCESS_DATASET_ASPECT_NAME, ACCESS_ASPECT_NAME,
STRUCTURED_PROPERTIES_ASPECT_NAME, STRUCTURED_PROPERTIES_ASPECT_NAME,
FORMS_ASPECT_NAME, FORMS_ASPECT_NAME,
SUB_TYPES_ASPECT_NAME); SUB_TYPES_ASPECT_NAME);

View File

@ -166,7 +166,7 @@ public class DatasetMapper implements ModelMapper<EntityResponse, Dataset> {
(dataset, dataMap) -> (dataset, dataMap) ->
dataset.setBrowsePathV2(BrowsePathsV2Mapper.map(context, new BrowsePathsV2(dataMap)))); dataset.setBrowsePathV2(BrowsePathsV2Mapper.map(context, new BrowsePathsV2(dataMap))));
mappingHelper.mapToResult( mappingHelper.mapToResult(
ACCESS_DATASET_ASPECT_NAME, ACCESS_ASPECT_NAME,
((dataset, dataMap) -> ((dataset, dataMap) ->
dataset.setAccess(AccessMapper.map(new Access(dataMap), entityUrn)))); dataset.setAccess(AccessMapper.map(new Access(dataMap), entityUrn))));
mappingHelper.mapToResult( mappingHelper.mapToResult(

View File

@ -2837,6 +2837,11 @@ type Container implements Entity {
""" """
exists: Boolean exists: Boolean
"""
The Roles and the properties to access the container
"""
access: Access
""" """
Experimental API. Experimental API.
For fetching extra entities that do not have custom UI code yet For fetching extra entities that do not have custom UI code yet

View File

@ -8,7 +8,7 @@ import { DocumentationTab } from '../shared/tabs/Documentation/DocumentationTab'
import { SidebarAboutSection } from '../shared/containers/profile/sidebar/AboutSection/SidebarAboutSection'; import { SidebarAboutSection } from '../shared/containers/profile/sidebar/AboutSection/SidebarAboutSection';
import { SidebarOwnerSection } from '../shared/containers/profile/sidebar/Ownership/sidebar/SidebarOwnerSection'; import { SidebarOwnerSection } from '../shared/containers/profile/sidebar/Ownership/sidebar/SidebarOwnerSection';
import { getDataForEntityType } from '../shared/containers/profile/utils'; import { getDataForEntityType } from '../shared/containers/profile/utils';
import { useGetContainerQuery } from '../../../graphql/container.generated'; import { useGetContainerQuery, GetContainerQuery } from '../../../graphql/container.generated';
import { ContainerEntitiesTab } from './ContainerEntitiesTab'; import { ContainerEntitiesTab } from './ContainerEntitiesTab';
import { SidebarTagsSection } from '../shared/containers/profile/sidebar/SidebarTagsSection'; import { SidebarTagsSection } from '../shared/containers/profile/sidebar/SidebarTagsSection';
import { PropertiesTab } from '../shared/tabs/Properties/PropertiesTab'; import { PropertiesTab } from '../shared/tabs/Properties/PropertiesTab';
@ -17,6 +17,8 @@ import { capitalizeFirstLetterOnly } from '../../shared/textUtil';
import DataProductSection from '../shared/containers/profile/sidebar/DataProduct/DataProductSection'; import DataProductSection from '../shared/containers/profile/sidebar/DataProduct/DataProductSection';
import { getDataProduct } from '../shared/utils'; import { getDataProduct } from '../shared/utils';
import EmbeddedProfile from '../shared/embed/EmbeddedProfile'; import EmbeddedProfile from '../shared/embed/EmbeddedProfile';
import AccessManagement from '../shared/tabs/Dataset/AccessManagement/AccessManagement';
import { useAppConfig } from '../../useAppConfig';
/** /**
* Definition of the DataHub Container entity. * Definition of the DataHub Container entity.
@ -65,6 +67,8 @@ export class ContainerEntity implements Entity<Container> {
useEntityQuery = useGetContainerQuery; useEntityQuery = useGetContainerQuery;
appconfig = useAppConfig;
renderProfile = (urn: string) => ( renderProfile = (urn: string) => (
<EntityProfile <EntityProfile
urn={urn} urn={urn}
@ -85,6 +89,23 @@ export class ContainerEntity implements Entity<Container> {
name: 'Properties', name: 'Properties',
component: PropertiesTab, component: PropertiesTab,
}, },
{
name: 'Access Management',
component: AccessManagement,
display: {
visible: (_, container: GetContainerQuery) => {
return (
this.appconfig().config.featureFlags.showAccessManagement &&
!!container?.container?.access
);
},
enabled: (_, container: GetContainerQuery) => {
const accessAspect = container?.container?.access;
const rolesList = accessAspect?.roles;
return !!accessAspect && !!rolesList && rolesList.length > 0;
},
},
},
]} ]}
sidebarSections={this.getSidebarSections()} sidebarSections={this.getSidebarSections()}
/> />

View File

@ -3,8 +3,8 @@ import styled from 'styled-components';
import { Button, Table } from 'antd'; import { Button, Table } from 'antd';
import { SpinProps } from 'antd/es/spin'; import { SpinProps } from 'antd/es/spin';
import { LoadingOutlined } from '@ant-design/icons'; import { LoadingOutlined } from '@ant-design/icons';
import { useBaseEntity } from '../../../EntityContext'; import { useEntityData } from '../../../EntityContext';
import { GetDatasetQuery, useGetExternalRolesQuery } from '../../../../../../graphql/dataset.generated'; import { useGetExternalRolesQuery } from '../../../../../../graphql/dataset.generated';
import { handleAccessRoles } from './utils'; import { handleAccessRoles } from './utils';
import AccessManagerDescription from './AccessManagerDescription'; import AccessManagerDescription from './AccessManagerDescription';
@ -60,11 +60,12 @@ const AccessButton = styled(Button)`
`; `;
export default function AccessManagement() { export default function AccessManagement() {
const baseEntity = useBaseEntity<GetDatasetQuery>(); const { entityData } = useEntityData();
const entityUrn = (entityData as any)?.urn;
const { data: externalRoles, loading: isLoading } = useGetExternalRolesQuery({ const { data: externalRoles, loading: isLoading } = useGetExternalRolesQuery({
variables: { urn: baseEntity?.dataset?.urn as string }, variables: { urn: entityUrn as string },
skip: !baseEntity?.dataset?.urn, skip: !entityUrn,
}); });
const columns = [ const columns = [

View File

@ -59,6 +59,13 @@ query getContainer($urn: String!) {
status { status {
removed removed
} }
access {
roles {
role {
urn
}
}
}
autoRenderAspects: aspects(input: { autoRenderOnly: true }) { autoRenderAspects: aspects(input: { autoRenderOnly: true }) {
...autoRenderAspectFields ...autoRenderAspectFields
} }

View File

@ -277,7 +277,7 @@ public class Constants {
// ExternalRoleMetadata // ExternalRoleMetadata
public static final String ROLE_ENTITY_NAME = "role"; public static final String ROLE_ENTITY_NAME = "role";
public static final String ACCESS_DATASET_ASPECT_NAME = "access"; public static final String ACCESS_ASPECT_NAME = "access";
public static final String ROLE_KEY = "roleKey"; public static final String ROLE_KEY = "roleKey";
public static final String ROLE_PROPERTIES_ASPECT_NAME = "roleProperties"; public static final String ROLE_PROPERTIES_ASPECT_NAME = "roleProperties";
public static final String ROLE_ACTORS_ASPECT_NAME = "actors"; public static final String ROLE_ACTORS_ASPECT_NAME = "actors";

View File

@ -1,3 +1,4 @@
import json
import logging import logging
import re import re
import time import time
@ -26,6 +27,8 @@ from pydantic import root_validator, validator
from pydantic.fields import Field from pydantic.fields import Field
from requests.adapters import HTTPAdapter from requests.adapters import HTTPAdapter
from tableauserverclient import ( from tableauserverclient import (
GroupItem,
PermissionsRule,
PersonalAccessTokenAuth, PersonalAccessTokenAuth,
Server, Server,
ServerResponseError, ServerResponseError,
@ -216,6 +219,11 @@ class TableauConnectionConfig(ConfigModel):
description="Whether to verify SSL certificates. If using self-signed certificates, set to false or provide the path to the .pem certificate bundle.", description="Whether to verify SSL certificates. If using self-signed certificates, set to false or provide the path to the .pem certificate bundle.",
) )
session_trust_env: bool = Field(
False,
description="Configures the trust_env property in the requests session. If set to false (default value) it will bypass proxy settings. See https://requests.readthedocs.io/en/latest/api/#requests.Session.trust_env for more information.",
)
extract_column_level_lineage: bool = Field( extract_column_level_lineage: bool = Field(
True, True,
description="When enabled, extracts column-level lineage from Tableau Datasources", description="When enabled, extracts column-level lineage from Tableau Datasources",
@ -265,8 +273,7 @@ class TableauConnectionConfig(ConfigModel):
}, },
) )
# From https://stackoverflow.com/a/50159273/5004662. server._session.trust_env = self.session_trust_env
server._session.trust_env = False
# Setup request retries. # Setup request retries.
adapter = HTTPAdapter( adapter = HTTPAdapter(
@ -298,6 +305,23 @@ class TableauConnectionConfig(ConfigModel):
) from e ) from e
class PermissionIngestionConfig(ConfigModel):
enable_workbooks: bool = Field(
default=True,
description="Whether or not to enable group permission ingestion for workbooks. "
"Default: True",
)
group_name_pattern: AllowDenyPattern = Field(
default=AllowDenyPattern.allow_all(),
description="Filter for Tableau group names when ingesting group permissions. "
"For example, you could filter for groups that include the term 'Consumer' in their name by adding '^.*Consumer$' to the allow list."
"By default, all groups will be ingested. "
"You can both allow and deny groups based on their name using their name, or a Regex pattern. "
"Deny patterns always take precedence over allow patterns. ",
)
class TableauConfig( class TableauConfig(
DatasetLineageProviderConfigBase, DatasetLineageProviderConfigBase,
StatefulIngestionConfigBase, StatefulIngestionConfigBase,
@ -459,6 +483,11 @@ class TableauConfig(
description="When enabled, sites are added as containers and therefore visible in the folder structure within Datahub.", description="When enabled, sites are added as containers and therefore visible in the folder structure within Datahub.",
) )
permission_ingestion: Optional[PermissionIngestionConfig] = Field(
default=None,
description="Configuration settings for ingesting Tableau groups and their capabilities as custom properties.",
)
# pre = True because we want to take some decision before pydantic initialize the configuration to default values # pre = True because we want to take some decision before pydantic initialize the configuration to default values
@root_validator(pre=True) @root_validator(pre=True)
def projects_backward_compatibility(cls, values: Dict) -> Dict: def projects_backward_compatibility(cls, values: Dict) -> Dict:
@ -732,6 +761,8 @@ class TableauSiteSource:
self.workbook_project_map: Dict[str, str] = {} self.workbook_project_map: Dict[str, str] = {}
self.datasource_project_map: Dict[str, str] = {} self.datasource_project_map: Dict[str, str] = {}
self.group_map: Dict[str, GroupItem] = {}
# This map keeps track of the database server connection hostnames. # This map keeps track of the database server connection hostnames.
self.database_server_hostname_map: Dict[str, str] = {} self.database_server_hostname_map: Dict[str, str] = {}
# This list keeps track of sheets in workbooks so that we retrieve those # This list keeps track of sheets in workbooks so that we retrieve those
@ -2803,6 +2834,18 @@ class TableauSiteSource:
f"Could not load project hierarchy for workbook {workbook_name}({workbook_id}). Please check permissions." f"Could not load project hierarchy for workbook {workbook_name}({workbook_id}). Please check permissions."
) )
custom_props = None
if (
self.config.permission_ingestion
and self.config.permission_ingestion.enable_workbooks
):
logger.debug(f"Ingest access roles of workbook-id='{workbook.get(c.LUID)}'")
workbook_instance = self.server.workbooks.get_by_id(workbook.get(c.LUID))
self.server.workbooks.populate_permissions(workbook_instance)
custom_props = self._create_workbook_properties(
workbook_instance.permissions
)
yield from gen_containers( yield from gen_containers(
container_key=workbook_container_key, container_key=workbook_container_key,
name=workbook.get(c.NAME) or "", name=workbook.get(c.NAME) or "",
@ -2811,6 +2854,7 @@ class TableauSiteSource:
sub_types=[BIContainerSubTypes.TABLEAU_WORKBOOK], sub_types=[BIContainerSubTypes.TABLEAU_WORKBOOK],
owner_urn=owner_urn, owner_urn=owner_urn,
external_url=workbook_external_url, external_url=workbook_external_url,
extra_properties=custom_props,
tags=tags, tags=tags,
) )
@ -3168,11 +3212,53 @@ class TableauSiteSource:
sub_types=[c.SITE], sub_types=[c.SITE],
) )
def _fetch_groups(self):
for group in TSC.Pager(self.server.groups):
self.group_map[group.id] = group
def _get_allowed_capabilities(self, capabilities: Dict[str, str]) -> List[str]:
if not self.config.permission_ingestion:
return []
allowed_capabilities = [
key for key, value in capabilities.items() if value == "Allow"
]
return allowed_capabilities
def _create_workbook_properties(
self, permissions: List[PermissionsRule]
) -> Optional[Dict[str, str]]:
if not self.config.permission_ingestion:
return None
groups = []
for rule in permissions:
if rule.grantee.tag_name == "group":
group = self.group_map.get(rule.grantee.id)
if not group or not group.name:
logger.debug(f"Group {rule.grantee.id} not found in group map.")
continue
if not self.config.permission_ingestion.group_name_pattern.allowed(
group.name
):
logger.info(
f"Skip permission '{group.name}' as it's excluded in group_name_pattern."
)
continue
capabilities = self._get_allowed_capabilities(rule.capabilities)
groups.append({"group": group.name, "capabilities": capabilities})
return {"permissions": json.dumps(groups)} if len(groups) > 0 else None
def ingest_tableau_site(self): def ingest_tableau_site(self):
# Initialise the dictionary to later look-up for chart and dashboard stat # Initialise the dictionary to later look-up for chart and dashboard stat
if self.config.extract_usage_stats: if self.config.extract_usage_stats:
self._populate_usage_stat_registry() self._populate_usage_stat_registry()
if self.config.permission_ingestion:
self._fetch_groups()
# Populate the map of database names and database hostnames to be used later to map # Populate the map of database names and database hostnames to be used later to map
# databases to platform instances. # databases to platform instances.
if self.config.database_hostname_to_platform_instance_map: if self.config.database_hostname_to_platform_instance_map:

View File

@ -8,14 +8,16 @@ from unittest import mock
import pytest import pytest
from freezegun import freeze_time from freezegun import freeze_time
from requests.adapters import ConnectionError from requests.adapters import ConnectionError
from tableauserverclient import Server from tableauserverclient import PermissionsRule, Server
from tableauserverclient.models import ( from tableauserverclient.models import (
DatasourceItem, DatasourceItem,
GroupItem,
ProjectItem, ProjectItem,
SiteItem, SiteItem,
ViewItem, ViewItem,
WorkbookItem, WorkbookItem,
) )
from tableauserverclient.models.reference_item import ResourceReference
from datahub.configuration.source_common import DEFAULT_ENV from datahub.configuration.source_common import DEFAULT_ENV
from datahub.emitter.mce_builder import make_schema_field_urn from datahub.emitter.mce_builder import make_schema_field_urn
@ -132,6 +134,43 @@ def side_effect_project_data(*arg, **kwargs):
return [project1, project2, project3, project4], mock_pagination return [project1, project2, project3, project4], mock_pagination
def side_effect_group_data(*arg, **kwargs):
mock_pagination = mock.MagicMock()
mock_pagination.total_available = None
group1: GroupItem = GroupItem(
name="AB_XY00-Tableau-Access_A_123_PROJECT_XY_Consumer"
)
group1._id = "79d02655-88e5-45a6-9f9b-eeaf5fe54903-group1"
group2: GroupItem = GroupItem(
name="AB_XY00-Tableau-Access_A_123_PROJECT_XY_Analyst"
)
group2._id = "79d02655-88e5-45a6-9f9b-eeaf5fe54903-group2"
return [group1, group2], mock_pagination
def side_effect_workbook_permissions(*arg, **kwargs):
project_capabilities1 = {"Read": "Allow", "ViewComments": "Allow"}
reference: ResourceReference = ResourceReference(
id_="79d02655-88e5-45a6-9f9b-eeaf5fe54903-group1", tag_name="group"
)
rule1 = PermissionsRule(grantee=reference, capabilities=project_capabilities1)
project_capabilities2 = {
"Read": "Allow",
"ViewComments": "Allow",
"Delete": "Allow",
"Write": "Allow",
}
reference2: ResourceReference = ResourceReference(
id_="79d02655-88e5-45a6-9f9b-eeaf5fe54903-group2", tag_name="group"
)
rule2 = PermissionsRule(grantee=reference2, capabilities=project_capabilities2)
return [rule1, rule2]
def side_effect_site_data(*arg, **kwargs): def side_effect_site_data(*arg, **kwargs):
mock_pagination = mock.MagicMock() mock_pagination = mock.MagicMock()
mock_pagination.total_available = None mock_pagination.total_available = None
@ -249,8 +288,10 @@ def mock_sdk_client(
mock_client.views = mock.Mock() mock_client.views = mock.Mock()
mock_client.projects = mock.Mock() mock_client.projects = mock.Mock()
mock_client.sites = mock.Mock() mock_client.sites = mock.Mock()
mock_client.groups = mock.Mock()
mock_client.projects.get.side_effect = side_effect_project_data mock_client.projects.get.side_effect = side_effect_project_data
mock_client.groups.get.side_effect = side_effect_group_data
mock_client.sites.get.side_effect = side_effect_site_data mock_client.sites.get.side_effect = side_effect_site_data
mock_client.sites.get_by_id.side_effect = side_effect_site_get_by_id mock_client.sites.get_by_id.side_effect = side_effect_site_get_by_id
@ -260,6 +301,11 @@ def mock_sdk_client(
mock_client.workbooks = mock.Mock() mock_client.workbooks = mock.Mock()
mock_client.workbooks.get.side_effect = side_effect_workbook_data mock_client.workbooks.get.side_effect = side_effect_workbook_data
workbook_mock = mock.create_autospec(WorkbookItem, instance=True)
type(workbook_mock).permissions = mock.PropertyMock(
return_value=side_effect_workbook_permissions()
)
mock_client.workbooks.get_by_id.return_value = workbook_mock
mock_client.views.get.side_effect = side_effect_usage_stat mock_client.views.get.side_effect = side_effect_usage_stat
mock_client.auth.sign_in.return_value = None mock_client.auth.sign_in.return_value = None
@ -1154,6 +1200,32 @@ def test_site_name_pattern(pytestconfig, tmp_path, mock_datahub_graph):
) )
@freeze_time(FROZEN_TIME)
@pytest.mark.integration
def test_permission_ingestion(pytestconfig, tmp_path, mock_datahub_graph):
enable_logging()
output_file_name: str = "tableau_permission_ingestion_mces.json"
golden_file_name: str = "tableau_permission_ingestion_mces_golden.json"
new_pipeline_config: Dict[Any, Any] = {
**config_source_default,
"permission_ingestion": {
"enable_workbooks": True,
"group_name_pattern": {"allow": ["^.*_Consumer$"]},
},
}
tableau_ingest_common(
pytestconfig,
tmp_path,
mock_data(),
golden_file_name,
output_file_name,
mock_datahub_graph,
pipeline_config=new_pipeline_config,
pipeline_name="test_tableau_group_ingest",
)
@freeze_time(FROZEN_TIME) @freeze_time(FROZEN_TIME)
@pytest.mark.integration @pytest.mark.integration
def test_permission_mode_switched_error(pytestconfig, tmp_path, mock_datahub_graph): def test_permission_mode_switched_error(pytestconfig, tmp_path, mock_datahub_graph):

View File

@ -248,6 +248,7 @@ entities:
- structuredProperties - structuredProperties
- forms - forms
- testResults - testResults
- access
- name: tag - name: tag
category: core category: core
keyAspect: tagKey keyAspect: tagKey