Fix Consistent masking of PII in table column profiling (#22933)

* Refactor PIIMasker getColumnProfile to encapsulate Authorizer PII check

* Refactor PIIMasker getTableProfile to encapsulate Authorizer PII check

* Add tests to check PII Sensitive masking on getTableColumns
This commit is contained in:
Adrià Manero 2025-08-19 12:05:19 +02:00 committed by GitHub
parent b6c5211876
commit 1e1a2e70f6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 197 additions and 39 deletions

View File

@ -41,6 +41,7 @@ import static org.openmetadata.service.util.LambdaExceptionUtil.rethrowFunction;
import com.google.common.collect.Streams;
import jakarta.json.JsonPatch;
import jakarta.ws.rs.core.SecurityContext;
import java.io.IOException;
import java.time.LocalDate;
import java.util.ArrayList;
@ -53,6 +54,7 @@ import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.UUID;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@ -110,6 +112,7 @@ import org.openmetadata.service.jdbi3.FeedRepository.ThreadContext;
import org.openmetadata.service.resources.databases.DatabaseUtil;
import org.openmetadata.service.resources.databases.TableResource;
import org.openmetadata.service.resources.feeds.MessageParser.EntityLink;
import org.openmetadata.service.security.Authorizer;
import org.openmetadata.service.security.mask.PIIMasker;
import org.openmetadata.service.util.EntityUtil;
import org.openmetadata.service.util.EntityUtil.Fields;
@ -693,7 +696,11 @@ public class TableRepository extends EntityRepository<Table> {
}
public ResultList<ColumnProfile> getColumnProfiles(
String fqn, Long startTs, Long endTs, boolean authorizePII) {
String fqn,
Long startTs,
Long endTs,
Authorizer authorizer,
SecurityContext securityContext) {
List<ColumnProfile> columnProfiles;
columnProfiles =
JsonUtils.readObjects(
@ -709,11 +716,12 @@ public class TableRepository extends EntityRepository<Table> {
ResultList<ColumnProfile> columnProfileResultList =
new ResultList<>(
columnProfiles, startTs.toString(), endTs.toString(), columnProfiles.size());
if (!authorizePII) {
// Mask the PII data
columnProfileResultList.setData(
PIIMasker.getColumnProfile(fqn, columnProfileResultList.getData()));
}
// Mask the PII data
columnProfileResultList.setData(
PIIMasker.getColumnProfile(
fqn, columnProfileResultList.getData(), authorizer, securityContext));
return columnProfileResultList;
}
@ -750,9 +758,27 @@ public class TableRepository extends EntityRepository<Table> {
}
}
public Table getLatestTableProfile(
String fqn,
boolean includeColumnProfile,
Authorizer authorizer,
SecurityContext securityContext) {
return getLatestTableProfileInternal(
fqn,
includeColumnProfile,
t -> PIIMasker.getTableProfile(fqn, t.getColumns(), authorizer, securityContext));
}
public Table getLatestTableProfile(
String fqn, boolean authorizePII, boolean includeColumnProfile) {
return getLatestTableProfileInternal(
fqn, includeColumnProfile, t -> PIIMasker.getTableProfile(t.getColumns(), authorizePII));
}
private Table getLatestTableProfileInternal(
String fqn, boolean includeColumnProfile, Function<Table, List<Column>> maskFn) {
Table table = findByName(fqn, ALL);
TableProfile tableProfile =
JsonUtils.readValue(
daoCollection
@ -760,16 +786,16 @@ public class TableRepository extends EntityRepository<Table> {
.getLatestExtension(table.getFullyQualifiedName(), TABLE_PROFILE_EXTENSION),
TableProfile.class);
table.setProfile(tableProfile);
if (includeColumnProfile) {
setColumnProfile(table.getColumns());
}
// Set the column tags. Will be used to hide the data
if (!authorizePII) {
populateEntityFieldTags(entityType, table.getColumns(), table.getFullyQualifiedName(), true);
table.setColumns(PIIMasker.getTableProfile(table.getColumns()));
}
// Always populate field tags; the masking strategy decides what to do with them
populateEntityFieldTags(entityType, table.getColumns(), table.getFullyQualifiedName(), true);
// Apply caller-provided masking strategy
table.setColumns(maskFn.apply(table));
return table;
}
@ -1786,19 +1812,36 @@ public class TableRepository extends EntityRepository<Table> {
}
public ResultList<Column> getTableColumns(
UUID tableId, int limit, int offset, String fieldsParam, Include include) {
UUID tableId,
int limit,
int offset,
String fieldsParam,
Include include,
Authorizer authorizer,
SecurityContext securityContext) {
Table table = find(tableId, include);
return getTableColumnsInternal(table, limit, offset, fieldsParam);
return getTableColumnsInternal(table, limit, offset, fieldsParam, authorizer, securityContext);
}
public ResultList<Column> getTableColumnsByFQN(
String fqn, int limit, int offset, String fieldsParam, Include include) {
String fqn,
int limit,
int offset,
String fieldsParam,
Include include,
Authorizer authorizer,
SecurityContext securityContext) {
Table table = findByName(fqn, include);
return getTableColumnsInternal(table, limit, offset, fieldsParam);
return getTableColumnsInternal(table, limit, offset, fieldsParam, authorizer, securityContext);
}
private org.openmetadata.service.util.ResultList<Column> getTableColumnsInternal(
Table table, int limit, int offset, String fieldsParam) {
Table table,
int limit,
int offset,
String fieldsParam,
Authorizer authorizer,
SecurityContext securityContext) {
// For paginated column access, we need to load the table with columns
// but we'll optimize the field loading to only process what we need
Table fullTable = get(null, table.getId(), getFields(Set.of(COLUMN_FIELD)), NON_DELETED, false);
@ -1829,7 +1872,9 @@ public class TableRepository extends EntityRepository<Table> {
if (fieldsParam != null && fieldsParam.contains("profile")) {
setColumnProfile(paginatedColumns);
populateEntityFieldTags(entityType, paginatedColumns, table.getFullyQualifiedName(), true);
paginatedColumns = PIIMasker.getTableProfile(paginatedColumns);
paginatedColumns =
PIIMasker.getTableProfile(
table.getFullyQualifiedName(), paginatedColumns, authorizer, securityContext);
}
// Calculate pagination metadata
@ -1974,19 +2019,41 @@ public class TableRepository extends EntityRepository<Table> {
}
public ResultList<Column> searchTableColumnsById(
UUID id, String query, int limit, int offset, String fieldsParam, Include include) {
UUID id,
String query,
int limit,
int offset,
String fieldsParam,
Include include,
Authorizer authorizer,
SecurityContext securityContext) {
Table table = get(null, id, getFields(fieldsParam), include, false);
return searchTableColumnsInternal(table, query, limit, offset, fieldsParam);
return searchTableColumnsInternal(
table, query, limit, offset, fieldsParam, authorizer, securityContext);
}
public ResultList<Column> searchTableColumnsByFQN(
String fqn, String query, int limit, int offset, String fieldsParam, Include include) {
String fqn,
String query,
int limit,
int offset,
String fieldsParam,
Include include,
Authorizer authorizer,
SecurityContext securityContext) {
Table table = getByName(null, fqn, getFields(fieldsParam), include, false);
return searchTableColumnsInternal(table, query, limit, offset, fieldsParam);
return searchTableColumnsInternal(
table, query, limit, offset, fieldsParam, authorizer, securityContext);
}
private ResultList<Column> searchTableColumnsInternal(
Table table, String query, int limit, int offset, String fieldsParam) {
Table table,
String query,
int limit,
int offset,
String fieldsParam,
Authorizer authorizer,
SecurityContext securityContext) {
List<Column> allColumns = table.getColumns();
if (allColumns == null || allColumns.isEmpty()) {
return new ResultList<>(List.of(), null, null, 0);
@ -2035,7 +2102,9 @@ public class TableRepository extends EntityRepository<Table> {
if (fieldsParam != null && fieldsParam.contains("profile")) {
setColumnProfile(matchingColumns);
populateEntityFieldTags(entityType, matchingColumns, table.getFullyQualifiedName(), true);
matchingColumns = PIIMasker.getTableProfile(matchingColumns);
matchingColumns =
PIIMasker.getTableProfile(
table.getFullyQualifiedName(), matchingColumns, authorizer, securityContext);
}
String before = offset > 0 ? String.valueOf(Math.max(0, offset - limit)) : null;

View File

@ -954,12 +954,12 @@ public class TableResource extends EntityResource<Table, TableRepository> {
new OperationContext(entityType, MetadataOperation.VIEW_DATA_PROFILE);
ResourceContext<?> resourceContext = getResourceContextByName(fqn);
authorizer.authorize(securityContext, operationContext, resourceContext);
boolean authorizePII = authorizer.authorizePII(securityContext, resourceContext.getOwners());
return Response.status(Response.Status.OK)
.entity(
JsonUtils.pojoToJson(
repository.getLatestTableProfile(fqn, authorizePII, includeColumnProfile)))
repository.getLatestTableProfile(
fqn, includeColumnProfile, authorizer, securityContext)))
.build();
}
@ -1047,8 +1047,7 @@ public class TableResource extends EntityResource<Table, TableRepository> {
fqn); // get table fqn for the resource context (vs column fqn)
ResourceContext<?> resourceContext = getResourceContextByName(tableFqn);
authorizer.authorize(securityContext, operationContext, resourceContext);
boolean authorizePII = authorizer.authorizePII(securityContext, resourceContext.getOwners());
return repository.getColumnProfiles(fqn, startTs, endTs, authorizePII);
return repository.getColumnProfiles(fqn, startTs, endTs, authorizer, securityContext);
}
@GET
@ -1382,7 +1381,8 @@ public class TableResource extends EntityResource<Table, TableRepository> {
authorizer.authorize(securityContext, operationContext, getResourceContextById(id));
ResultList<org.openmetadata.schema.type.Column> result =
repository.getTableColumns(id, limitParam, offsetParam, fieldsParam, include);
repository.getTableColumns(
id, limitParam, offsetParam, fieldsParam, include, authorizer, securityContext);
TableColumnList tableColumnList = new TableColumnList();
tableColumnList.setData(result.getData());
tableColumnList.setPaging(result.getPaging());
@ -1441,7 +1441,8 @@ public class TableResource extends EntityResource<Table, TableRepository> {
authorizer.authorize(securityContext, operationContext, getResourceContextByName(fqn));
ResultList<org.openmetadata.schema.type.Column> result =
repository.getTableColumnsByFQN(fqn, limitParam, offsetParam, fieldsParam, include);
repository.getTableColumnsByFQN(
fqn, limitParam, offsetParam, fieldsParam, include, authorizer, securityContext);
TableColumnList tableColumnList = new TableColumnList();
tableColumnList.setData(result.getData());
tableColumnList.setPaging(result.getPaging());
@ -1625,7 +1626,8 @@ public class TableResource extends EntityResource<Table, TableRepository> {
new OperationContext(entityType, MetadataOperation.VIEW_BASIC);
authorizer.authorize(securityContext, operationContext, getResourceContextById(id));
ResultList<Column> result =
repository.searchTableColumnsById(id, query, limitParam, offsetParam, fieldsParam, include);
repository.searchTableColumnsById(
id, query, limitParam, offsetParam, fieldsParam, include, authorizer, securityContext);
TableColumnList tableColumnList = new TableColumnList();
tableColumnList.setData(result.getData());
tableColumnList.setPaging(result.getPaging());
@ -1686,7 +1688,7 @@ public class TableResource extends EntityResource<Table, TableRepository> {
authorizer.authorize(securityContext, operationContext, getResourceContextByName(fqn));
ResultList<org.openmetadata.schema.type.Column> result =
repository.searchTableColumnsByFQN(
fqn, query, limitParam, offsetParam, fieldsParam, include);
fqn, query, limitParam, offsetParam, fieldsParam, include, authorizer, securityContext);
TableColumnList tableColumnList = new TableColumnList();
tableColumnList.setData(result.getData());
tableColumnList.setPaging(result.getPaging());

View File

@ -22,6 +22,7 @@ import org.openmetadata.schema.entity.teams.User;
import org.openmetadata.schema.tests.TestCase;
import org.openmetadata.schema.type.Column;
import org.openmetadata.schema.type.ColumnProfile;
import org.openmetadata.schema.type.EntityReference;
import org.openmetadata.schema.type.Field;
import org.openmetadata.schema.type.Include;
import org.openmetadata.schema.type.TableData;
@ -130,27 +131,46 @@ public class PIIMasker {
return searchIndex;
}
public static List<Column> getTableProfile(List<Column> columns) {
for (Column column : listOrEmpty(columns)) {
if (hasPiiSensitiveTag(column)) {
column.setProfile(null);
column.setName(flagMaskedName(column.getName()));
public static List<Column> getTableProfile(
String fqn, List<Column> columns, Authorizer authorizer, SecurityContext securityContext) {
Table table = Entity.getEntityByName(Entity.TABLE, fqn, "owners", Include.ALL);
List<EntityReference> owners = table.getOwners();
boolean authorizePII = authorizer.authorizePII(securityContext, owners);
return maskColumnsIfNotAuthorized(columns, authorizePII);
}
public static List<Column> getTableProfile(List<Column> columns, boolean authorizePII) {
return maskColumnsIfNotAuthorized(columns, authorizePII);
}
private static List<Column> maskColumnsIfNotAuthorized(
List<Column> columns, boolean authorizePII) {
if (authorizePII) return columns;
for (Column c : listOrEmpty(columns)) {
if (hasPiiSensitiveTag(c)) {
c.setProfile(null);
c.setName(flagMaskedName(c.getName()));
}
}
return columns;
}
public static List<ColumnProfile> getColumnProfile(
String fqn, List<ColumnProfile> columnProfiles) {
String fqn,
List<ColumnProfile> columnProfiles,
Authorizer authorizer,
SecurityContext securityContext) {
Table table =
Entity.getEntityByName(
Entity.TABLE, FullyQualifiedName.getTableFQN(fqn), "columns,tags", Include.ALL);
Entity.TABLE, FullyQualifiedName.getTableFQN(fqn), "columns,tags,owners", Include.ALL);
Column column =
table.getColumns().stream()
.filter(c -> c.getFullyQualifiedName().equals(fqn))
.findFirst()
.orElse(null);
if (column != null && hasPiiSensitiveTag(column)) {
boolean authorizePII = authorizer.authorizePII(securityContext, table.getOwners());
if (column != null && hasPiiSensitiveTag(column) && !authorizePII) {
return Collections.nCopies(columnProfiles.size(), new ColumnProfile());
}
return columnProfiles;

View File

@ -28,6 +28,7 @@ import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.openmetadata.common.utils.CommonUtil.getDateStringByOffset;
import static org.openmetadata.common.utils.CommonUtil.listOf;
import static org.openmetadata.common.utils.CommonUtil.listOrEmpty;
import static org.openmetadata.csv.CsvUtil.recordToString;
import static org.openmetadata.csv.EntityCsvTest.assertRows;
import static org.openmetadata.csv.EntityCsvTest.assertSummary;
@ -2931,6 +2932,72 @@ public class TableResourceTest extends EntityResourceTest<Table, CreateTable> {
assertEquals(maskedColumnProfiles.getData().size(), columnProfiles.getData().size());
}
@Test
void test_sensitivePIIColumnProfile_byGetColumns(TestInfo test) throws Exception {
// Arrange: create 2 profiled tables owned by USER1
Table table =
createEntity(
createRequest(test).withOwners(Lists.newArrayList(USER1.getEntityReference())),
ADMIN_AUTH_HEADERS);
Table table1 =
createEntity(
createRequest(test, 1).withOwners(List.of(USER1.getEntityReference())),
ADMIN_AUTH_HEADERS);
// Seed table/column profiles (C1, C2, C3)
putTableProfile(table, table1, ADMIN_AUTH_HEADERS);
// Tag C3 as PII.Sensitive and persist
Column c3 =
table.getColumns().stream().filter(c -> c.getName().equals(C3)).findFirst().orElseThrow();
String c3FQN = c3.getFullyQualifiedName();
List<TagLabel> c3Tags = new ArrayList<>(listOrEmpty(c3.getTags()));
c3Tags.add(
new TagLabel().withTagFQN("PII.Sensitive").withSource(TagLabel.TagSource.CLASSIFICATION));
c3.setTags(c3Tags);
patchEntity(table.getId(), JsonUtils.pojoToJson(table), table, ADMIN_AUTH_HEADERS);
// Build the base target to fetch columns (requesting tags + profile fields)
WebTarget baseColumnsTarget =
getResource("tables/" + table.getId() + "/columns")
.queryParam("limit", "1000")
.queryParam("offset", "0")
.queryParam("fields", "tags,profile")
.queryParam("include", "non-deleted");
// --- Owner call: USER1 should see C3 profile values
TableResource.TableColumnList ownerResp =
TestUtils.get(
baseColumnsTarget, TableResource.TableColumnList.class, authHeaders(USER1.getName()));
Column ownerC3 =
ownerResp.getData().stream()
.filter(col -> c3FQN.equals(col.getFullyQualifiedName()))
.findFirst()
.orElseThrow(() -> new AssertionError("C3 not found for owner in /{id}/columns"));
assertNotNull(ownerC3.getProfile(), "Owner should see a column profile object for C3");
assertNotNull(ownerC3.getProfile().getMin(), "Owner should see min for PII column");
assertNotNull(ownerC3.getProfile().getMax(), "Owner should see max for PII column");
// --- Non-owner call: USER2 should get masked stats for C3
TableResource.TableColumnList nonOwnerResp =
TestUtils.get(
baseColumnsTarget,
TableResource.TableColumnList.class,
authHeaders(USER2_REF.getName()));
Column nonOwnerC3 =
nonOwnerResp.getData().stream()
.filter(col -> c3FQN.equals(col.getFullyQualifiedName()))
.findFirst()
.orElseThrow(() -> new AssertionError("C3 not found for non-owner in /{id}/columns"));
assertNull(
nonOwnerC3.getProfile(),
"Non-owner should NOT receive a profile object for PII-sensitive column");
}
@Test
void testInheritedPermissionFromParent(TestInfo test) throws IOException {
// DatabaseService has owner dataConsumer