mirror of
https://github.com/datahub-project/datahub.git
synced 2025-08-14 20:26:53 +00:00
feat(ingest/tableau): support ingestion of access roles (#11157)
Co-authored-by: Yanik Häni <Yanik.Haeni1@swisscom.com> Co-authored-by: Harshal Sheth <hsheth2@gmail.com>
This commit is contained in:
parent
b89ca3f081
commit
7c8dba4bd9
@ -52,7 +52,8 @@ public class ContainerType
|
||||
Constants.DEPRECATION_ASPECT_NAME,
|
||||
Constants.DATA_PRODUCTS_ASPECT_NAME,
|
||||
Constants.STRUCTURED_PROPERTIES_ASPECT_NAME,
|
||||
Constants.FORMS_ASPECT_NAME);
|
||||
Constants.FORMS_ASPECT_NAME,
|
||||
Constants.ACCESS_ASPECT_NAME);
|
||||
|
||||
private static final Set<String> FACET_FIELDS = ImmutableSet.of("origin", "platform");
|
||||
private static final String ENTITY_NAME = "container";
|
||||
|
@ -2,6 +2,7 @@ package com.linkedin.datahub.graphql.types.container.mappers;
|
||||
|
||||
import static com.linkedin.metadata.Constants.*;
|
||||
|
||||
import com.linkedin.common.Access;
|
||||
import com.linkedin.common.DataPlatformInstance;
|
||||
import com.linkedin.common.Deprecation;
|
||||
import com.linkedin.common.Forms;
|
||||
@ -30,6 +31,7 @@ import com.linkedin.datahub.graphql.types.common.mappers.util.SystemMetadataUtil
|
||||
import com.linkedin.datahub.graphql.types.domain.DomainAssociationMapper;
|
||||
import com.linkedin.datahub.graphql.types.form.FormsMapper;
|
||||
import com.linkedin.datahub.graphql.types.glossary.mappers.GlossaryTermsMapper;
|
||||
import com.linkedin.datahub.graphql.types.rolemetadata.mappers.AccessMapper;
|
||||
import com.linkedin.datahub.graphql.types.structuredproperty.StructuredPropertiesMapper;
|
||||
import com.linkedin.datahub.graphql.types.tag.mappers.GlobalTagsMapper;
|
||||
import com.linkedin.domain.Domains;
|
||||
@ -105,6 +107,11 @@ public class ContainerMapper {
|
||||
context, new GlossaryTerms(envelopedTerms.getValue().data()), entityUrn));
|
||||
}
|
||||
|
||||
final EnvelopedAspect accessAspect = aspects.get(ACCESS_ASPECT_NAME);
|
||||
if (accessAspect != null) {
|
||||
result.setAccess(AccessMapper.map(new Access(accessAspect.getValue().data()), entityUrn));
|
||||
}
|
||||
|
||||
final EnvelopedAspect envelopedInstitutionalMemory =
|
||||
aspects.get(Constants.INSTITUTIONAL_MEMORY_ASPECT_NAME);
|
||||
if (envelopedInstitutionalMemory != null) {
|
||||
|
@ -86,7 +86,7 @@ public class DatasetType
|
||||
EMBED_ASPECT_NAME,
|
||||
DATA_PRODUCTS_ASPECT_NAME,
|
||||
BROWSE_PATHS_V2_ASPECT_NAME,
|
||||
ACCESS_DATASET_ASPECT_NAME,
|
||||
ACCESS_ASPECT_NAME,
|
||||
STRUCTURED_PROPERTIES_ASPECT_NAME,
|
||||
FORMS_ASPECT_NAME,
|
||||
SUB_TYPES_ASPECT_NAME);
|
||||
|
@ -166,7 +166,7 @@ public class DatasetMapper implements ModelMapper<EntityResponse, Dataset> {
|
||||
(dataset, dataMap) ->
|
||||
dataset.setBrowsePathV2(BrowsePathsV2Mapper.map(context, new BrowsePathsV2(dataMap))));
|
||||
mappingHelper.mapToResult(
|
||||
ACCESS_DATASET_ASPECT_NAME,
|
||||
ACCESS_ASPECT_NAME,
|
||||
((dataset, dataMap) ->
|
||||
dataset.setAccess(AccessMapper.map(new Access(dataMap), entityUrn))));
|
||||
mappingHelper.mapToResult(
|
||||
|
@ -2837,6 +2837,11 @@ type Container implements Entity {
|
||||
"""
|
||||
exists: Boolean
|
||||
|
||||
"""
|
||||
The Roles and the properties to access the container
|
||||
"""
|
||||
access: Access
|
||||
|
||||
"""
|
||||
Experimental API.
|
||||
For fetching extra entities that do not have custom UI code yet
|
||||
|
@ -8,7 +8,7 @@ import { DocumentationTab } from '../shared/tabs/Documentation/DocumentationTab'
|
||||
import { SidebarAboutSection } from '../shared/containers/profile/sidebar/AboutSection/SidebarAboutSection';
|
||||
import { SidebarOwnerSection } from '../shared/containers/profile/sidebar/Ownership/sidebar/SidebarOwnerSection';
|
||||
import { getDataForEntityType } from '../shared/containers/profile/utils';
|
||||
import { useGetContainerQuery } from '../../../graphql/container.generated';
|
||||
import { useGetContainerQuery, GetContainerQuery } from '../../../graphql/container.generated';
|
||||
import { ContainerEntitiesTab } from './ContainerEntitiesTab';
|
||||
import { SidebarTagsSection } from '../shared/containers/profile/sidebar/SidebarTagsSection';
|
||||
import { PropertiesTab } from '../shared/tabs/Properties/PropertiesTab';
|
||||
@ -17,6 +17,8 @@ import { capitalizeFirstLetterOnly } from '../../shared/textUtil';
|
||||
import DataProductSection from '../shared/containers/profile/sidebar/DataProduct/DataProductSection';
|
||||
import { getDataProduct } from '../shared/utils';
|
||||
import EmbeddedProfile from '../shared/embed/EmbeddedProfile';
|
||||
import AccessManagement from '../shared/tabs/Dataset/AccessManagement/AccessManagement';
|
||||
import { useAppConfig } from '../../useAppConfig';
|
||||
|
||||
/**
|
||||
* Definition of the DataHub Container entity.
|
||||
@ -65,6 +67,8 @@ export class ContainerEntity implements Entity<Container> {
|
||||
|
||||
useEntityQuery = useGetContainerQuery;
|
||||
|
||||
appconfig = useAppConfig;
|
||||
|
||||
renderProfile = (urn: string) => (
|
||||
<EntityProfile
|
||||
urn={urn}
|
||||
@ -85,6 +89,23 @@ export class ContainerEntity implements Entity<Container> {
|
||||
name: 'Properties',
|
||||
component: PropertiesTab,
|
||||
},
|
||||
{
|
||||
name: 'Access Management',
|
||||
component: AccessManagement,
|
||||
display: {
|
||||
visible: (_, container: GetContainerQuery) => {
|
||||
return (
|
||||
this.appconfig().config.featureFlags.showAccessManagement &&
|
||||
!!container?.container?.access
|
||||
);
|
||||
},
|
||||
enabled: (_, container: GetContainerQuery) => {
|
||||
const accessAspect = container?.container?.access;
|
||||
const rolesList = accessAspect?.roles;
|
||||
return !!accessAspect && !!rolesList && rolesList.length > 0;
|
||||
},
|
||||
},
|
||||
},
|
||||
]}
|
||||
sidebarSections={this.getSidebarSections()}
|
||||
/>
|
||||
|
@ -3,8 +3,8 @@ import styled from 'styled-components';
|
||||
import { Button, Table } from 'antd';
|
||||
import { SpinProps } from 'antd/es/spin';
|
||||
import { LoadingOutlined } from '@ant-design/icons';
|
||||
import { useBaseEntity } from '../../../EntityContext';
|
||||
import { GetDatasetQuery, useGetExternalRolesQuery } from '../../../../../../graphql/dataset.generated';
|
||||
import { useEntityData } from '../../../EntityContext';
|
||||
import { useGetExternalRolesQuery } from '../../../../../../graphql/dataset.generated';
|
||||
import { handleAccessRoles } from './utils';
|
||||
import AccessManagerDescription from './AccessManagerDescription';
|
||||
|
||||
@ -60,11 +60,12 @@ const AccessButton = styled(Button)`
|
||||
`;
|
||||
|
||||
export default function AccessManagement() {
|
||||
const baseEntity = useBaseEntity<GetDatasetQuery>();
|
||||
const { entityData } = useEntityData();
|
||||
const entityUrn = (entityData as any)?.urn;
|
||||
|
||||
const { data: externalRoles, loading: isLoading } = useGetExternalRolesQuery({
|
||||
variables: { urn: baseEntity?.dataset?.urn as string },
|
||||
skip: !baseEntity?.dataset?.urn,
|
||||
variables: { urn: entityUrn as string },
|
||||
skip: !entityUrn,
|
||||
});
|
||||
|
||||
const columns = [
|
||||
|
@ -59,6 +59,13 @@ query getContainer($urn: String!) {
|
||||
status {
|
||||
removed
|
||||
}
|
||||
access {
|
||||
roles {
|
||||
role {
|
||||
urn
|
||||
}
|
||||
}
|
||||
}
|
||||
autoRenderAspects: aspects(input: { autoRenderOnly: true }) {
|
||||
...autoRenderAspectFields
|
||||
}
|
||||
|
@ -277,7 +277,7 @@ public class Constants {
|
||||
|
||||
// ExternalRoleMetadata
|
||||
public static final String ROLE_ENTITY_NAME = "role";
|
||||
public static final String ACCESS_DATASET_ASPECT_NAME = "access";
|
||||
public static final String ACCESS_ASPECT_NAME = "access";
|
||||
public static final String ROLE_KEY = "roleKey";
|
||||
public static final String ROLE_PROPERTIES_ASPECT_NAME = "roleProperties";
|
||||
public static final String ROLE_ACTORS_ASPECT_NAME = "actors";
|
||||
|
@ -1,3 +1,4 @@
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
@ -26,6 +27,8 @@ from pydantic import root_validator, validator
|
||||
from pydantic.fields import Field
|
||||
from requests.adapters import HTTPAdapter
|
||||
from tableauserverclient import (
|
||||
GroupItem,
|
||||
PermissionsRule,
|
||||
PersonalAccessTokenAuth,
|
||||
Server,
|
||||
ServerResponseError,
|
||||
@ -216,6 +219,11 @@ class TableauConnectionConfig(ConfigModel):
|
||||
description="Whether to verify SSL certificates. If using self-signed certificates, set to false or provide the path to the .pem certificate bundle.",
|
||||
)
|
||||
|
||||
session_trust_env: bool = Field(
|
||||
False,
|
||||
description="Configures the trust_env property in the requests session. If set to false (default value) it will bypass proxy settings. See https://requests.readthedocs.io/en/latest/api/#requests.Session.trust_env for more information.",
|
||||
)
|
||||
|
||||
extract_column_level_lineage: bool = Field(
|
||||
True,
|
||||
description="When enabled, extracts column-level lineage from Tableau Datasources",
|
||||
@ -265,8 +273,7 @@ class TableauConnectionConfig(ConfigModel):
|
||||
},
|
||||
)
|
||||
|
||||
# From https://stackoverflow.com/a/50159273/5004662.
|
||||
server._session.trust_env = False
|
||||
server._session.trust_env = self.session_trust_env
|
||||
|
||||
# Setup request retries.
|
||||
adapter = HTTPAdapter(
|
||||
@ -298,6 +305,23 @@ class TableauConnectionConfig(ConfigModel):
|
||||
) from e
|
||||
|
||||
|
||||
class PermissionIngestionConfig(ConfigModel):
|
||||
enable_workbooks: bool = Field(
|
||||
default=True,
|
||||
description="Whether or not to enable group permission ingestion for workbooks. "
|
||||
"Default: True",
|
||||
)
|
||||
|
||||
group_name_pattern: AllowDenyPattern = Field(
|
||||
default=AllowDenyPattern.allow_all(),
|
||||
description="Filter for Tableau group names when ingesting group permissions. "
|
||||
"For example, you could filter for groups that include the term 'Consumer' in their name by adding '^.*Consumer$' to the allow list."
|
||||
"By default, all groups will be ingested. "
|
||||
"You can both allow and deny groups based on their name using their name, or a Regex pattern. "
|
||||
"Deny patterns always take precedence over allow patterns. ",
|
||||
)
|
||||
|
||||
|
||||
class TableauConfig(
|
||||
DatasetLineageProviderConfigBase,
|
||||
StatefulIngestionConfigBase,
|
||||
@ -459,6 +483,11 @@ class TableauConfig(
|
||||
description="When enabled, sites are added as containers and therefore visible in the folder structure within Datahub.",
|
||||
)
|
||||
|
||||
permission_ingestion: Optional[PermissionIngestionConfig] = Field(
|
||||
default=None,
|
||||
description="Configuration settings for ingesting Tableau groups and their capabilities as custom properties.",
|
||||
)
|
||||
|
||||
# pre = True because we want to take some decision before pydantic initialize the configuration to default values
|
||||
@root_validator(pre=True)
|
||||
def projects_backward_compatibility(cls, values: Dict) -> Dict:
|
||||
@ -732,6 +761,8 @@ class TableauSiteSource:
|
||||
self.workbook_project_map: Dict[str, str] = {}
|
||||
self.datasource_project_map: Dict[str, str] = {}
|
||||
|
||||
self.group_map: Dict[str, GroupItem] = {}
|
||||
|
||||
# This map keeps track of the database server connection hostnames.
|
||||
self.database_server_hostname_map: Dict[str, str] = {}
|
||||
# This list keeps track of sheets in workbooks so that we retrieve those
|
||||
@ -2803,6 +2834,18 @@ class TableauSiteSource:
|
||||
f"Could not load project hierarchy for workbook {workbook_name}({workbook_id}). Please check permissions."
|
||||
)
|
||||
|
||||
custom_props = None
|
||||
if (
|
||||
self.config.permission_ingestion
|
||||
and self.config.permission_ingestion.enable_workbooks
|
||||
):
|
||||
logger.debug(f"Ingest access roles of workbook-id='{workbook.get(c.LUID)}'")
|
||||
workbook_instance = self.server.workbooks.get_by_id(workbook.get(c.LUID))
|
||||
self.server.workbooks.populate_permissions(workbook_instance)
|
||||
custom_props = self._create_workbook_properties(
|
||||
workbook_instance.permissions
|
||||
)
|
||||
|
||||
yield from gen_containers(
|
||||
container_key=workbook_container_key,
|
||||
name=workbook.get(c.NAME) or "",
|
||||
@ -2811,6 +2854,7 @@ class TableauSiteSource:
|
||||
sub_types=[BIContainerSubTypes.TABLEAU_WORKBOOK],
|
||||
owner_urn=owner_urn,
|
||||
external_url=workbook_external_url,
|
||||
extra_properties=custom_props,
|
||||
tags=tags,
|
||||
)
|
||||
|
||||
@ -3168,11 +3212,53 @@ class TableauSiteSource:
|
||||
sub_types=[c.SITE],
|
||||
)
|
||||
|
||||
def _fetch_groups(self):
|
||||
for group in TSC.Pager(self.server.groups):
|
||||
self.group_map[group.id] = group
|
||||
|
||||
def _get_allowed_capabilities(self, capabilities: Dict[str, str]) -> List[str]:
|
||||
if not self.config.permission_ingestion:
|
||||
return []
|
||||
|
||||
allowed_capabilities = [
|
||||
key for key, value in capabilities.items() if value == "Allow"
|
||||
]
|
||||
return allowed_capabilities
|
||||
|
||||
def _create_workbook_properties(
|
||||
self, permissions: List[PermissionsRule]
|
||||
) -> Optional[Dict[str, str]]:
|
||||
if not self.config.permission_ingestion:
|
||||
return None
|
||||
|
||||
groups = []
|
||||
for rule in permissions:
|
||||
if rule.grantee.tag_name == "group":
|
||||
group = self.group_map.get(rule.grantee.id)
|
||||
if not group or not group.name:
|
||||
logger.debug(f"Group {rule.grantee.id} not found in group map.")
|
||||
continue
|
||||
if not self.config.permission_ingestion.group_name_pattern.allowed(
|
||||
group.name
|
||||
):
|
||||
logger.info(
|
||||
f"Skip permission '{group.name}' as it's excluded in group_name_pattern."
|
||||
)
|
||||
continue
|
||||
|
||||
capabilities = self._get_allowed_capabilities(rule.capabilities)
|
||||
groups.append({"group": group.name, "capabilities": capabilities})
|
||||
|
||||
return {"permissions": json.dumps(groups)} if len(groups) > 0 else None
|
||||
|
||||
def ingest_tableau_site(self):
|
||||
# Initialise the dictionary to later look-up for chart and dashboard stat
|
||||
if self.config.extract_usage_stats:
|
||||
self._populate_usage_stat_registry()
|
||||
|
||||
if self.config.permission_ingestion:
|
||||
self._fetch_groups()
|
||||
|
||||
# Populate the map of database names and database hostnames to be used later to map
|
||||
# databases to platform instances.
|
||||
if self.config.database_hostname_to_platform_instance_map:
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -8,14 +8,16 @@ from unittest import mock
|
||||
import pytest
|
||||
from freezegun import freeze_time
|
||||
from requests.adapters import ConnectionError
|
||||
from tableauserverclient import Server
|
||||
from tableauserverclient import PermissionsRule, Server
|
||||
from tableauserverclient.models import (
|
||||
DatasourceItem,
|
||||
GroupItem,
|
||||
ProjectItem,
|
||||
SiteItem,
|
||||
ViewItem,
|
||||
WorkbookItem,
|
||||
)
|
||||
from tableauserverclient.models.reference_item import ResourceReference
|
||||
|
||||
from datahub.configuration.source_common import DEFAULT_ENV
|
||||
from datahub.emitter.mce_builder import make_schema_field_urn
|
||||
@ -132,6 +134,43 @@ def side_effect_project_data(*arg, **kwargs):
|
||||
return [project1, project2, project3, project4], mock_pagination
|
||||
|
||||
|
||||
def side_effect_group_data(*arg, **kwargs):
|
||||
mock_pagination = mock.MagicMock()
|
||||
mock_pagination.total_available = None
|
||||
|
||||
group1: GroupItem = GroupItem(
|
||||
name="AB_XY00-Tableau-Access_A_123_PROJECT_XY_Consumer"
|
||||
)
|
||||
group1._id = "79d02655-88e5-45a6-9f9b-eeaf5fe54903-group1"
|
||||
group2: GroupItem = GroupItem(
|
||||
name="AB_XY00-Tableau-Access_A_123_PROJECT_XY_Analyst"
|
||||
)
|
||||
group2._id = "79d02655-88e5-45a6-9f9b-eeaf5fe54903-group2"
|
||||
|
||||
return [group1, group2], mock_pagination
|
||||
|
||||
|
||||
def side_effect_workbook_permissions(*arg, **kwargs):
|
||||
project_capabilities1 = {"Read": "Allow", "ViewComments": "Allow"}
|
||||
reference: ResourceReference = ResourceReference(
|
||||
id_="79d02655-88e5-45a6-9f9b-eeaf5fe54903-group1", tag_name="group"
|
||||
)
|
||||
rule1 = PermissionsRule(grantee=reference, capabilities=project_capabilities1)
|
||||
|
||||
project_capabilities2 = {
|
||||
"Read": "Allow",
|
||||
"ViewComments": "Allow",
|
||||
"Delete": "Allow",
|
||||
"Write": "Allow",
|
||||
}
|
||||
reference2: ResourceReference = ResourceReference(
|
||||
id_="79d02655-88e5-45a6-9f9b-eeaf5fe54903-group2", tag_name="group"
|
||||
)
|
||||
rule2 = PermissionsRule(grantee=reference2, capabilities=project_capabilities2)
|
||||
|
||||
return [rule1, rule2]
|
||||
|
||||
|
||||
def side_effect_site_data(*arg, **kwargs):
|
||||
mock_pagination = mock.MagicMock()
|
||||
mock_pagination.total_available = None
|
||||
@ -249,8 +288,10 @@ def mock_sdk_client(
|
||||
mock_client.views = mock.Mock()
|
||||
mock_client.projects = mock.Mock()
|
||||
mock_client.sites = mock.Mock()
|
||||
mock_client.groups = mock.Mock()
|
||||
|
||||
mock_client.projects.get.side_effect = side_effect_project_data
|
||||
mock_client.groups.get.side_effect = side_effect_group_data
|
||||
mock_client.sites.get.side_effect = side_effect_site_data
|
||||
mock_client.sites.get_by_id.side_effect = side_effect_site_get_by_id
|
||||
|
||||
@ -260,6 +301,11 @@ def mock_sdk_client(
|
||||
|
||||
mock_client.workbooks = mock.Mock()
|
||||
mock_client.workbooks.get.side_effect = side_effect_workbook_data
|
||||
workbook_mock = mock.create_autospec(WorkbookItem, instance=True)
|
||||
type(workbook_mock).permissions = mock.PropertyMock(
|
||||
return_value=side_effect_workbook_permissions()
|
||||
)
|
||||
mock_client.workbooks.get_by_id.return_value = workbook_mock
|
||||
|
||||
mock_client.views.get.side_effect = side_effect_usage_stat
|
||||
mock_client.auth.sign_in.return_value = None
|
||||
@ -1154,6 +1200,32 @@ def test_site_name_pattern(pytestconfig, tmp_path, mock_datahub_graph):
|
||||
)
|
||||
|
||||
|
||||
@freeze_time(FROZEN_TIME)
|
||||
@pytest.mark.integration
|
||||
def test_permission_ingestion(pytestconfig, tmp_path, mock_datahub_graph):
|
||||
enable_logging()
|
||||
output_file_name: str = "tableau_permission_ingestion_mces.json"
|
||||
golden_file_name: str = "tableau_permission_ingestion_mces_golden.json"
|
||||
|
||||
new_pipeline_config: Dict[Any, Any] = {
|
||||
**config_source_default,
|
||||
"permission_ingestion": {
|
||||
"enable_workbooks": True,
|
||||
"group_name_pattern": {"allow": ["^.*_Consumer$"]},
|
||||
},
|
||||
}
|
||||
tableau_ingest_common(
|
||||
pytestconfig,
|
||||
tmp_path,
|
||||
mock_data(),
|
||||
golden_file_name,
|
||||
output_file_name,
|
||||
mock_datahub_graph,
|
||||
pipeline_config=new_pipeline_config,
|
||||
pipeline_name="test_tableau_group_ingest",
|
||||
)
|
||||
|
||||
|
||||
@freeze_time(FROZEN_TIME)
|
||||
@pytest.mark.integration
|
||||
def test_permission_mode_switched_error(pytestconfig, tmp_path, mock_datahub_graph):
|
||||
|
@ -248,6 +248,7 @@ entities:
|
||||
- structuredProperties
|
||||
- forms
|
||||
- testResults
|
||||
- access
|
||||
- name: tag
|
||||
category: core
|
||||
keyAspect: tagKey
|
||||
|
Loading…
x
Reference in New Issue
Block a user