From 1e1a2e70f63c2bb568a3b3a3cb26c7436d40228d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Manero?= Date: Tue, 19 Aug 2025 12:05:19 +0200 Subject: [PATCH] Fix Consistent masking of PII in table column profiling (#22933) * Refactor PIIMasker getColumnProfile to encapsulate Authorizer PII check * Refactor PIIMasker getTableProfile to encapsulate Authorizer PII check * Add tests to check PII Sensitive masking on getTableColumns --- .../service/jdbi3/TableRepository.java | 115 ++++++++++++++---- .../resources/databases/TableResource.java | 18 +-- .../service/security/mask/PIIMasker.java | 36 ++++-- .../databases/TableResourceTest.java | 67 ++++++++++ 4 files changed, 197 insertions(+), 39 deletions(-) diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/TableRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/TableRepository.java index 425e424fc56..76b66a138ae 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/TableRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/TableRepository.java @@ -41,6 +41,7 @@ import static org.openmetadata.service.util.LambdaExceptionUtil.rethrowFunction; import com.google.common.collect.Streams; import jakarta.json.JsonPatch; +import jakarta.ws.rs.core.SecurityContext; import java.io.IOException; import java.time.LocalDate; import java.util.ArrayList; @@ -53,6 +54,7 @@ import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.UUID; +import java.util.function.Function; import java.util.function.Predicate; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -110,6 +112,7 @@ import org.openmetadata.service.jdbi3.FeedRepository.ThreadContext; import org.openmetadata.service.resources.databases.DatabaseUtil; import org.openmetadata.service.resources.databases.TableResource; import org.openmetadata.service.resources.feeds.MessageParser.EntityLink; +import org.openmetadata.service.security.Authorizer; import org.openmetadata.service.security.mask.PIIMasker; import org.openmetadata.service.util.EntityUtil; import org.openmetadata.service.util.EntityUtil.Fields; @@ -693,7 +696,11 @@ public class TableRepository extends EntityRepository { } public ResultList getColumnProfiles( - String fqn, Long startTs, Long endTs, boolean authorizePII) { + String fqn, + Long startTs, + Long endTs, + Authorizer authorizer, + SecurityContext securityContext) { List columnProfiles; columnProfiles = JsonUtils.readObjects( @@ -709,11 +716,12 @@ public class TableRepository extends EntityRepository
{ ResultList columnProfileResultList = new ResultList<>( columnProfiles, startTs.toString(), endTs.toString(), columnProfiles.size()); - if (!authorizePII) { - // Mask the PII data - columnProfileResultList.setData( - PIIMasker.getColumnProfile(fqn, columnProfileResultList.getData())); - } + + // Mask the PII data + columnProfileResultList.setData( + PIIMasker.getColumnProfile( + fqn, columnProfileResultList.getData(), authorizer, securityContext)); + return columnProfileResultList; } @@ -750,9 +758,27 @@ public class TableRepository extends EntityRepository
{ } } + public Table getLatestTableProfile( + String fqn, + boolean includeColumnProfile, + Authorizer authorizer, + SecurityContext securityContext) { + return getLatestTableProfileInternal( + fqn, + includeColumnProfile, + t -> PIIMasker.getTableProfile(fqn, t.getColumns(), authorizer, securityContext)); + } + public Table getLatestTableProfile( String fqn, boolean authorizePII, boolean includeColumnProfile) { + return getLatestTableProfileInternal( + fqn, includeColumnProfile, t -> PIIMasker.getTableProfile(t.getColumns(), authorizePII)); + } + + private Table getLatestTableProfileInternal( + String fqn, boolean includeColumnProfile, Function> maskFn) { Table table = findByName(fqn, ALL); + TableProfile tableProfile = JsonUtils.readValue( daoCollection @@ -760,16 +786,16 @@ public class TableRepository extends EntityRepository
{ .getLatestExtension(table.getFullyQualifiedName(), TABLE_PROFILE_EXTENSION), TableProfile.class); table.setProfile(tableProfile); + if (includeColumnProfile) { setColumnProfile(table.getColumns()); } - // Set the column tags. Will be used to hide the data - if (!authorizePII) { - populateEntityFieldTags(entityType, table.getColumns(), table.getFullyQualifiedName(), true); - table.setColumns(PIIMasker.getTableProfile(table.getColumns())); - } + // Always populate field tags; the masking strategy decides what to do with them + populateEntityFieldTags(entityType, table.getColumns(), table.getFullyQualifiedName(), true); + // Apply caller-provided masking strategy + table.setColumns(maskFn.apply(table)); return table; } @@ -1786,19 +1812,36 @@ public class TableRepository extends EntityRepository
{ } public ResultList getTableColumns( - UUID tableId, int limit, int offset, String fieldsParam, Include include) { + UUID tableId, + int limit, + int offset, + String fieldsParam, + Include include, + Authorizer authorizer, + SecurityContext securityContext) { Table table = find(tableId, include); - return getTableColumnsInternal(table, limit, offset, fieldsParam); + return getTableColumnsInternal(table, limit, offset, fieldsParam, authorizer, securityContext); } public ResultList getTableColumnsByFQN( - String fqn, int limit, int offset, String fieldsParam, Include include) { + String fqn, + int limit, + int offset, + String fieldsParam, + Include include, + Authorizer authorizer, + SecurityContext securityContext) { Table table = findByName(fqn, include); - return getTableColumnsInternal(table, limit, offset, fieldsParam); + return getTableColumnsInternal(table, limit, offset, fieldsParam, authorizer, securityContext); } private org.openmetadata.service.util.ResultList getTableColumnsInternal( - Table table, int limit, int offset, String fieldsParam) { + Table table, + int limit, + int offset, + String fieldsParam, + Authorizer authorizer, + SecurityContext securityContext) { // For paginated column access, we need to load the table with columns // but we'll optimize the field loading to only process what we need Table fullTable = get(null, table.getId(), getFields(Set.of(COLUMN_FIELD)), NON_DELETED, false); @@ -1829,7 +1872,9 @@ public class TableRepository extends EntityRepository
{ if (fieldsParam != null && fieldsParam.contains("profile")) { setColumnProfile(paginatedColumns); populateEntityFieldTags(entityType, paginatedColumns, table.getFullyQualifiedName(), true); - paginatedColumns = PIIMasker.getTableProfile(paginatedColumns); + paginatedColumns = + PIIMasker.getTableProfile( + table.getFullyQualifiedName(), paginatedColumns, authorizer, securityContext); } // Calculate pagination metadata @@ -1974,19 +2019,41 @@ public class TableRepository extends EntityRepository
{ } public ResultList searchTableColumnsById( - UUID id, String query, int limit, int offset, String fieldsParam, Include include) { + UUID id, + String query, + int limit, + int offset, + String fieldsParam, + Include include, + Authorizer authorizer, + SecurityContext securityContext) { Table table = get(null, id, getFields(fieldsParam), include, false); - return searchTableColumnsInternal(table, query, limit, offset, fieldsParam); + return searchTableColumnsInternal( + table, query, limit, offset, fieldsParam, authorizer, securityContext); } public ResultList searchTableColumnsByFQN( - String fqn, String query, int limit, int offset, String fieldsParam, Include include) { + String fqn, + String query, + int limit, + int offset, + String fieldsParam, + Include include, + Authorizer authorizer, + SecurityContext securityContext) { Table table = getByName(null, fqn, getFields(fieldsParam), include, false); - return searchTableColumnsInternal(table, query, limit, offset, fieldsParam); + return searchTableColumnsInternal( + table, query, limit, offset, fieldsParam, authorizer, securityContext); } private ResultList searchTableColumnsInternal( - Table table, String query, int limit, int offset, String fieldsParam) { + Table table, + String query, + int limit, + int offset, + String fieldsParam, + Authorizer authorizer, + SecurityContext securityContext) { List allColumns = table.getColumns(); if (allColumns == null || allColumns.isEmpty()) { return new ResultList<>(List.of(), null, null, 0); @@ -2035,7 +2102,9 @@ public class TableRepository extends EntityRepository
{ if (fieldsParam != null && fieldsParam.contains("profile")) { setColumnProfile(matchingColumns); populateEntityFieldTags(entityType, matchingColumns, table.getFullyQualifiedName(), true); - matchingColumns = PIIMasker.getTableProfile(matchingColumns); + matchingColumns = + PIIMasker.getTableProfile( + table.getFullyQualifiedName(), matchingColumns, authorizer, securityContext); } String before = offset > 0 ? String.valueOf(Math.max(0, offset - limit)) : null; diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/resources/databases/TableResource.java b/openmetadata-service/src/main/java/org/openmetadata/service/resources/databases/TableResource.java index 0436a78af62..1dae343ca94 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/resources/databases/TableResource.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/resources/databases/TableResource.java @@ -954,12 +954,12 @@ public class TableResource extends EntityResource { new OperationContext(entityType, MetadataOperation.VIEW_DATA_PROFILE); ResourceContext resourceContext = getResourceContextByName(fqn); authorizer.authorize(securityContext, operationContext, resourceContext); - boolean authorizePII = authorizer.authorizePII(securityContext, resourceContext.getOwners()); return Response.status(Response.Status.OK) .entity( JsonUtils.pojoToJson( - repository.getLatestTableProfile(fqn, authorizePII, includeColumnProfile))) + repository.getLatestTableProfile( + fqn, includeColumnProfile, authorizer, securityContext))) .build(); } @@ -1047,8 +1047,7 @@ public class TableResource extends EntityResource { fqn); // get table fqn for the resource context (vs column fqn) ResourceContext resourceContext = getResourceContextByName(tableFqn); authorizer.authorize(securityContext, operationContext, resourceContext); - boolean authorizePII = authorizer.authorizePII(securityContext, resourceContext.getOwners()); - return repository.getColumnProfiles(fqn, startTs, endTs, authorizePII); + return repository.getColumnProfiles(fqn, startTs, endTs, authorizer, securityContext); } @GET @@ -1382,7 +1381,8 @@ public class TableResource extends EntityResource { authorizer.authorize(securityContext, operationContext, getResourceContextById(id)); ResultList result = - repository.getTableColumns(id, limitParam, offsetParam, fieldsParam, include); + repository.getTableColumns( + id, limitParam, offsetParam, fieldsParam, include, authorizer, securityContext); TableColumnList tableColumnList = new TableColumnList(); tableColumnList.setData(result.getData()); tableColumnList.setPaging(result.getPaging()); @@ -1441,7 +1441,8 @@ public class TableResource extends EntityResource { authorizer.authorize(securityContext, operationContext, getResourceContextByName(fqn)); ResultList result = - repository.getTableColumnsByFQN(fqn, limitParam, offsetParam, fieldsParam, include); + repository.getTableColumnsByFQN( + fqn, limitParam, offsetParam, fieldsParam, include, authorizer, securityContext); TableColumnList tableColumnList = new TableColumnList(); tableColumnList.setData(result.getData()); tableColumnList.setPaging(result.getPaging()); @@ -1625,7 +1626,8 @@ public class TableResource extends EntityResource { new OperationContext(entityType, MetadataOperation.VIEW_BASIC); authorizer.authorize(securityContext, operationContext, getResourceContextById(id)); ResultList result = - repository.searchTableColumnsById(id, query, limitParam, offsetParam, fieldsParam, include); + repository.searchTableColumnsById( + id, query, limitParam, offsetParam, fieldsParam, include, authorizer, securityContext); TableColumnList tableColumnList = new TableColumnList(); tableColumnList.setData(result.getData()); tableColumnList.setPaging(result.getPaging()); @@ -1686,7 +1688,7 @@ public class TableResource extends EntityResource { authorizer.authorize(securityContext, operationContext, getResourceContextByName(fqn)); ResultList result = repository.searchTableColumnsByFQN( - fqn, query, limitParam, offsetParam, fieldsParam, include); + fqn, query, limitParam, offsetParam, fieldsParam, include, authorizer, securityContext); TableColumnList tableColumnList = new TableColumnList(); tableColumnList.setData(result.getData()); tableColumnList.setPaging(result.getPaging()); diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/security/mask/PIIMasker.java b/openmetadata-service/src/main/java/org/openmetadata/service/security/mask/PIIMasker.java index afea0e80500..e4fd4975594 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/security/mask/PIIMasker.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/security/mask/PIIMasker.java @@ -22,6 +22,7 @@ import org.openmetadata.schema.entity.teams.User; import org.openmetadata.schema.tests.TestCase; import org.openmetadata.schema.type.Column; import org.openmetadata.schema.type.ColumnProfile; +import org.openmetadata.schema.type.EntityReference; import org.openmetadata.schema.type.Field; import org.openmetadata.schema.type.Include; import org.openmetadata.schema.type.TableData; @@ -130,27 +131,46 @@ public class PIIMasker { return searchIndex; } - public static List getTableProfile(List columns) { - for (Column column : listOrEmpty(columns)) { - if (hasPiiSensitiveTag(column)) { - column.setProfile(null); - column.setName(flagMaskedName(column.getName())); + public static List getTableProfile( + String fqn, List columns, Authorizer authorizer, SecurityContext securityContext) { + Table table = Entity.getEntityByName(Entity.TABLE, fqn, "owners", Include.ALL); + List owners = table.getOwners(); + boolean authorizePII = authorizer.authorizePII(securityContext, owners); + return maskColumnsIfNotAuthorized(columns, authorizePII); + } + + public static List getTableProfile(List columns, boolean authorizePII) { + return maskColumnsIfNotAuthorized(columns, authorizePII); + } + + private static List maskColumnsIfNotAuthorized( + List columns, boolean authorizePII) { + if (authorizePII) return columns; + for (Column c : listOrEmpty(columns)) { + if (hasPiiSensitiveTag(c)) { + c.setProfile(null); + c.setName(flagMaskedName(c.getName())); } } return columns; } public static List getColumnProfile( - String fqn, List columnProfiles) { + String fqn, + List columnProfiles, + Authorizer authorizer, + SecurityContext securityContext) { Table table = Entity.getEntityByName( - Entity.TABLE, FullyQualifiedName.getTableFQN(fqn), "columns,tags", Include.ALL); + Entity.TABLE, FullyQualifiedName.getTableFQN(fqn), "columns,tags,owners", Include.ALL); Column column = table.getColumns().stream() .filter(c -> c.getFullyQualifiedName().equals(fqn)) .findFirst() .orElse(null); - if (column != null && hasPiiSensitiveTag(column)) { + boolean authorizePII = authorizer.authorizePII(securityContext, table.getOwners()); + + if (column != null && hasPiiSensitiveTag(column) && !authorizePII) { return Collections.nCopies(columnProfiles.size(), new ColumnProfile()); } return columnProfiles; diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/resources/databases/TableResourceTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/resources/databases/TableResourceTest.java index a4d8bc26f36..e1b119b0b80 100644 --- a/openmetadata-service/src/test/java/org/openmetadata/service/resources/databases/TableResourceTest.java +++ b/openmetadata-service/src/test/java/org/openmetadata/service/resources/databases/TableResourceTest.java @@ -28,6 +28,7 @@ import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.openmetadata.common.utils.CommonUtil.getDateStringByOffset; import static org.openmetadata.common.utils.CommonUtil.listOf; +import static org.openmetadata.common.utils.CommonUtil.listOrEmpty; import static org.openmetadata.csv.CsvUtil.recordToString; import static org.openmetadata.csv.EntityCsvTest.assertRows; import static org.openmetadata.csv.EntityCsvTest.assertSummary; @@ -2931,6 +2932,72 @@ public class TableResourceTest extends EntityResourceTest { assertEquals(maskedColumnProfiles.getData().size(), columnProfiles.getData().size()); } + @Test + void test_sensitivePIIColumnProfile_byGetColumns(TestInfo test) throws Exception { + // Arrange: create 2 profiled tables owned by USER1 + Table table = + createEntity( + createRequest(test).withOwners(Lists.newArrayList(USER1.getEntityReference())), + ADMIN_AUTH_HEADERS); + Table table1 = + createEntity( + createRequest(test, 1).withOwners(List.of(USER1.getEntityReference())), + ADMIN_AUTH_HEADERS); + + // Seed table/column profiles (C1, C2, C3) + putTableProfile(table, table1, ADMIN_AUTH_HEADERS); + + // Tag C3 as PII.Sensitive and persist + Column c3 = + table.getColumns().stream().filter(c -> c.getName().equals(C3)).findFirst().orElseThrow(); + String c3FQN = c3.getFullyQualifiedName(); + List c3Tags = new ArrayList<>(listOrEmpty(c3.getTags())); + c3Tags.add( + new TagLabel().withTagFQN("PII.Sensitive").withSource(TagLabel.TagSource.CLASSIFICATION)); + c3.setTags(c3Tags); + patchEntity(table.getId(), JsonUtils.pojoToJson(table), table, ADMIN_AUTH_HEADERS); + + // Build the base target to fetch columns (requesting tags + profile fields) + WebTarget baseColumnsTarget = + getResource("tables/" + table.getId() + "/columns") + .queryParam("limit", "1000") + .queryParam("offset", "0") + .queryParam("fields", "tags,profile") + .queryParam("include", "non-deleted"); + + // --- Owner call: USER1 should see C3 profile values + TableResource.TableColumnList ownerResp = + TestUtils.get( + baseColumnsTarget, TableResource.TableColumnList.class, authHeaders(USER1.getName())); + + Column ownerC3 = + ownerResp.getData().stream() + .filter(col -> c3FQN.equals(col.getFullyQualifiedName())) + .findFirst() + .orElseThrow(() -> new AssertionError("C3 not found for owner in /{id}/columns")); + + assertNotNull(ownerC3.getProfile(), "Owner should see a column profile object for C3"); + assertNotNull(ownerC3.getProfile().getMin(), "Owner should see min for PII column"); + assertNotNull(ownerC3.getProfile().getMax(), "Owner should see max for PII column"); + + // --- Non-owner call: USER2 should get masked stats for C3 + TableResource.TableColumnList nonOwnerResp = + TestUtils.get( + baseColumnsTarget, + TableResource.TableColumnList.class, + authHeaders(USER2_REF.getName())); + + Column nonOwnerC3 = + nonOwnerResp.getData().stream() + .filter(col -> c3FQN.equals(col.getFullyQualifiedName())) + .findFirst() + .orElseThrow(() -> new AssertionError("C3 not found for non-owner in /{id}/columns")); + + assertNull( + nonOwnerC3.getProfile(), + "Non-owner should NOT receive a profile object for PII-sensitive column"); + } + @Test void testInheritedPermissionFromParent(TestInfo test) throws IOException { // DatabaseService has owner dataConsumer