mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2026-01-07 04:56:54 +00:00
Fix Consistent masking of PII in table column profiling (#22933)
* Refactor PIIMasker getColumnProfile to encapsulate Authorizer PII check * Refactor PIIMasker getTableProfile to encapsulate Authorizer PII check * Add tests to check PII Sensitive masking on getTableColumns
This commit is contained in:
parent
b6c5211876
commit
1e1a2e70f6
@ -41,6 +41,7 @@ import static org.openmetadata.service.util.LambdaExceptionUtil.rethrowFunction;
|
||||
|
||||
import com.google.common.collect.Streams;
|
||||
import jakarta.json.JsonPatch;
|
||||
import jakarta.ws.rs.core.SecurityContext;
|
||||
import java.io.IOException;
|
||||
import java.time.LocalDate;
|
||||
import java.util.ArrayList;
|
||||
@ -53,6 +54,7 @@ import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
import java.util.function.Function;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
@ -110,6 +112,7 @@ import org.openmetadata.service.jdbi3.FeedRepository.ThreadContext;
|
||||
import org.openmetadata.service.resources.databases.DatabaseUtil;
|
||||
import org.openmetadata.service.resources.databases.TableResource;
|
||||
import org.openmetadata.service.resources.feeds.MessageParser.EntityLink;
|
||||
import org.openmetadata.service.security.Authorizer;
|
||||
import org.openmetadata.service.security.mask.PIIMasker;
|
||||
import org.openmetadata.service.util.EntityUtil;
|
||||
import org.openmetadata.service.util.EntityUtil.Fields;
|
||||
@ -693,7 +696,11 @@ public class TableRepository extends EntityRepository<Table> {
|
||||
}
|
||||
|
||||
public ResultList<ColumnProfile> getColumnProfiles(
|
||||
String fqn, Long startTs, Long endTs, boolean authorizePII) {
|
||||
String fqn,
|
||||
Long startTs,
|
||||
Long endTs,
|
||||
Authorizer authorizer,
|
||||
SecurityContext securityContext) {
|
||||
List<ColumnProfile> columnProfiles;
|
||||
columnProfiles =
|
||||
JsonUtils.readObjects(
|
||||
@ -709,11 +716,12 @@ public class TableRepository extends EntityRepository<Table> {
|
||||
ResultList<ColumnProfile> columnProfileResultList =
|
||||
new ResultList<>(
|
||||
columnProfiles, startTs.toString(), endTs.toString(), columnProfiles.size());
|
||||
if (!authorizePII) {
|
||||
// Mask the PII data
|
||||
columnProfileResultList.setData(
|
||||
PIIMasker.getColumnProfile(fqn, columnProfileResultList.getData()));
|
||||
}
|
||||
|
||||
// Mask the PII data
|
||||
columnProfileResultList.setData(
|
||||
PIIMasker.getColumnProfile(
|
||||
fqn, columnProfileResultList.getData(), authorizer, securityContext));
|
||||
|
||||
return columnProfileResultList;
|
||||
}
|
||||
|
||||
@ -750,9 +758,27 @@ public class TableRepository extends EntityRepository<Table> {
|
||||
}
|
||||
}
|
||||
|
||||
public Table getLatestTableProfile(
|
||||
String fqn,
|
||||
boolean includeColumnProfile,
|
||||
Authorizer authorizer,
|
||||
SecurityContext securityContext) {
|
||||
return getLatestTableProfileInternal(
|
||||
fqn,
|
||||
includeColumnProfile,
|
||||
t -> PIIMasker.getTableProfile(fqn, t.getColumns(), authorizer, securityContext));
|
||||
}
|
||||
|
||||
public Table getLatestTableProfile(
|
||||
String fqn, boolean authorizePII, boolean includeColumnProfile) {
|
||||
return getLatestTableProfileInternal(
|
||||
fqn, includeColumnProfile, t -> PIIMasker.getTableProfile(t.getColumns(), authorizePII));
|
||||
}
|
||||
|
||||
private Table getLatestTableProfileInternal(
|
||||
String fqn, boolean includeColumnProfile, Function<Table, List<Column>> maskFn) {
|
||||
Table table = findByName(fqn, ALL);
|
||||
|
||||
TableProfile tableProfile =
|
||||
JsonUtils.readValue(
|
||||
daoCollection
|
||||
@ -760,16 +786,16 @@ public class TableRepository extends EntityRepository<Table> {
|
||||
.getLatestExtension(table.getFullyQualifiedName(), TABLE_PROFILE_EXTENSION),
|
||||
TableProfile.class);
|
||||
table.setProfile(tableProfile);
|
||||
|
||||
if (includeColumnProfile) {
|
||||
setColumnProfile(table.getColumns());
|
||||
}
|
||||
|
||||
// Set the column tags. Will be used to hide the data
|
||||
if (!authorizePII) {
|
||||
populateEntityFieldTags(entityType, table.getColumns(), table.getFullyQualifiedName(), true);
|
||||
table.setColumns(PIIMasker.getTableProfile(table.getColumns()));
|
||||
}
|
||||
// Always populate field tags; the masking strategy decides what to do with them
|
||||
populateEntityFieldTags(entityType, table.getColumns(), table.getFullyQualifiedName(), true);
|
||||
|
||||
// Apply caller-provided masking strategy
|
||||
table.setColumns(maskFn.apply(table));
|
||||
return table;
|
||||
}
|
||||
|
||||
@ -1786,19 +1812,36 @@ public class TableRepository extends EntityRepository<Table> {
|
||||
}
|
||||
|
||||
public ResultList<Column> getTableColumns(
|
||||
UUID tableId, int limit, int offset, String fieldsParam, Include include) {
|
||||
UUID tableId,
|
||||
int limit,
|
||||
int offset,
|
||||
String fieldsParam,
|
||||
Include include,
|
||||
Authorizer authorizer,
|
||||
SecurityContext securityContext) {
|
||||
Table table = find(tableId, include);
|
||||
return getTableColumnsInternal(table, limit, offset, fieldsParam);
|
||||
return getTableColumnsInternal(table, limit, offset, fieldsParam, authorizer, securityContext);
|
||||
}
|
||||
|
||||
public ResultList<Column> getTableColumnsByFQN(
|
||||
String fqn, int limit, int offset, String fieldsParam, Include include) {
|
||||
String fqn,
|
||||
int limit,
|
||||
int offset,
|
||||
String fieldsParam,
|
||||
Include include,
|
||||
Authorizer authorizer,
|
||||
SecurityContext securityContext) {
|
||||
Table table = findByName(fqn, include);
|
||||
return getTableColumnsInternal(table, limit, offset, fieldsParam);
|
||||
return getTableColumnsInternal(table, limit, offset, fieldsParam, authorizer, securityContext);
|
||||
}
|
||||
|
||||
private org.openmetadata.service.util.ResultList<Column> getTableColumnsInternal(
|
||||
Table table, int limit, int offset, String fieldsParam) {
|
||||
Table table,
|
||||
int limit,
|
||||
int offset,
|
||||
String fieldsParam,
|
||||
Authorizer authorizer,
|
||||
SecurityContext securityContext) {
|
||||
// For paginated column access, we need to load the table with columns
|
||||
// but we'll optimize the field loading to only process what we need
|
||||
Table fullTable = get(null, table.getId(), getFields(Set.of(COLUMN_FIELD)), NON_DELETED, false);
|
||||
@ -1829,7 +1872,9 @@ public class TableRepository extends EntityRepository<Table> {
|
||||
if (fieldsParam != null && fieldsParam.contains("profile")) {
|
||||
setColumnProfile(paginatedColumns);
|
||||
populateEntityFieldTags(entityType, paginatedColumns, table.getFullyQualifiedName(), true);
|
||||
paginatedColumns = PIIMasker.getTableProfile(paginatedColumns);
|
||||
paginatedColumns =
|
||||
PIIMasker.getTableProfile(
|
||||
table.getFullyQualifiedName(), paginatedColumns, authorizer, securityContext);
|
||||
}
|
||||
|
||||
// Calculate pagination metadata
|
||||
@ -1974,19 +2019,41 @@ public class TableRepository extends EntityRepository<Table> {
|
||||
}
|
||||
|
||||
public ResultList<Column> searchTableColumnsById(
|
||||
UUID id, String query, int limit, int offset, String fieldsParam, Include include) {
|
||||
UUID id,
|
||||
String query,
|
||||
int limit,
|
||||
int offset,
|
||||
String fieldsParam,
|
||||
Include include,
|
||||
Authorizer authorizer,
|
||||
SecurityContext securityContext) {
|
||||
Table table = get(null, id, getFields(fieldsParam), include, false);
|
||||
return searchTableColumnsInternal(table, query, limit, offset, fieldsParam);
|
||||
return searchTableColumnsInternal(
|
||||
table, query, limit, offset, fieldsParam, authorizer, securityContext);
|
||||
}
|
||||
|
||||
public ResultList<Column> searchTableColumnsByFQN(
|
||||
String fqn, String query, int limit, int offset, String fieldsParam, Include include) {
|
||||
String fqn,
|
||||
String query,
|
||||
int limit,
|
||||
int offset,
|
||||
String fieldsParam,
|
||||
Include include,
|
||||
Authorizer authorizer,
|
||||
SecurityContext securityContext) {
|
||||
Table table = getByName(null, fqn, getFields(fieldsParam), include, false);
|
||||
return searchTableColumnsInternal(table, query, limit, offset, fieldsParam);
|
||||
return searchTableColumnsInternal(
|
||||
table, query, limit, offset, fieldsParam, authorizer, securityContext);
|
||||
}
|
||||
|
||||
private ResultList<Column> searchTableColumnsInternal(
|
||||
Table table, String query, int limit, int offset, String fieldsParam) {
|
||||
Table table,
|
||||
String query,
|
||||
int limit,
|
||||
int offset,
|
||||
String fieldsParam,
|
||||
Authorizer authorizer,
|
||||
SecurityContext securityContext) {
|
||||
List<Column> allColumns = table.getColumns();
|
||||
if (allColumns == null || allColumns.isEmpty()) {
|
||||
return new ResultList<>(List.of(), null, null, 0);
|
||||
@ -2035,7 +2102,9 @@ public class TableRepository extends EntityRepository<Table> {
|
||||
if (fieldsParam != null && fieldsParam.contains("profile")) {
|
||||
setColumnProfile(matchingColumns);
|
||||
populateEntityFieldTags(entityType, matchingColumns, table.getFullyQualifiedName(), true);
|
||||
matchingColumns = PIIMasker.getTableProfile(matchingColumns);
|
||||
matchingColumns =
|
||||
PIIMasker.getTableProfile(
|
||||
table.getFullyQualifiedName(), matchingColumns, authorizer, securityContext);
|
||||
}
|
||||
|
||||
String before = offset > 0 ? String.valueOf(Math.max(0, offset - limit)) : null;
|
||||
|
||||
@ -954,12 +954,12 @@ public class TableResource extends EntityResource<Table, TableRepository> {
|
||||
new OperationContext(entityType, MetadataOperation.VIEW_DATA_PROFILE);
|
||||
ResourceContext<?> resourceContext = getResourceContextByName(fqn);
|
||||
authorizer.authorize(securityContext, operationContext, resourceContext);
|
||||
boolean authorizePII = authorizer.authorizePII(securityContext, resourceContext.getOwners());
|
||||
|
||||
return Response.status(Response.Status.OK)
|
||||
.entity(
|
||||
JsonUtils.pojoToJson(
|
||||
repository.getLatestTableProfile(fqn, authorizePII, includeColumnProfile)))
|
||||
repository.getLatestTableProfile(
|
||||
fqn, includeColumnProfile, authorizer, securityContext)))
|
||||
.build();
|
||||
}
|
||||
|
||||
@ -1047,8 +1047,7 @@ public class TableResource extends EntityResource<Table, TableRepository> {
|
||||
fqn); // get table fqn for the resource context (vs column fqn)
|
||||
ResourceContext<?> resourceContext = getResourceContextByName(tableFqn);
|
||||
authorizer.authorize(securityContext, operationContext, resourceContext);
|
||||
boolean authorizePII = authorizer.authorizePII(securityContext, resourceContext.getOwners());
|
||||
return repository.getColumnProfiles(fqn, startTs, endTs, authorizePII);
|
||||
return repository.getColumnProfiles(fqn, startTs, endTs, authorizer, securityContext);
|
||||
}
|
||||
|
||||
@GET
|
||||
@ -1382,7 +1381,8 @@ public class TableResource extends EntityResource<Table, TableRepository> {
|
||||
authorizer.authorize(securityContext, operationContext, getResourceContextById(id));
|
||||
|
||||
ResultList<org.openmetadata.schema.type.Column> result =
|
||||
repository.getTableColumns(id, limitParam, offsetParam, fieldsParam, include);
|
||||
repository.getTableColumns(
|
||||
id, limitParam, offsetParam, fieldsParam, include, authorizer, securityContext);
|
||||
TableColumnList tableColumnList = new TableColumnList();
|
||||
tableColumnList.setData(result.getData());
|
||||
tableColumnList.setPaging(result.getPaging());
|
||||
@ -1441,7 +1441,8 @@ public class TableResource extends EntityResource<Table, TableRepository> {
|
||||
authorizer.authorize(securityContext, operationContext, getResourceContextByName(fqn));
|
||||
|
||||
ResultList<org.openmetadata.schema.type.Column> result =
|
||||
repository.getTableColumnsByFQN(fqn, limitParam, offsetParam, fieldsParam, include);
|
||||
repository.getTableColumnsByFQN(
|
||||
fqn, limitParam, offsetParam, fieldsParam, include, authorizer, securityContext);
|
||||
TableColumnList tableColumnList = new TableColumnList();
|
||||
tableColumnList.setData(result.getData());
|
||||
tableColumnList.setPaging(result.getPaging());
|
||||
@ -1625,7 +1626,8 @@ public class TableResource extends EntityResource<Table, TableRepository> {
|
||||
new OperationContext(entityType, MetadataOperation.VIEW_BASIC);
|
||||
authorizer.authorize(securityContext, operationContext, getResourceContextById(id));
|
||||
ResultList<Column> result =
|
||||
repository.searchTableColumnsById(id, query, limitParam, offsetParam, fieldsParam, include);
|
||||
repository.searchTableColumnsById(
|
||||
id, query, limitParam, offsetParam, fieldsParam, include, authorizer, securityContext);
|
||||
TableColumnList tableColumnList = new TableColumnList();
|
||||
tableColumnList.setData(result.getData());
|
||||
tableColumnList.setPaging(result.getPaging());
|
||||
@ -1686,7 +1688,7 @@ public class TableResource extends EntityResource<Table, TableRepository> {
|
||||
authorizer.authorize(securityContext, operationContext, getResourceContextByName(fqn));
|
||||
ResultList<org.openmetadata.schema.type.Column> result =
|
||||
repository.searchTableColumnsByFQN(
|
||||
fqn, query, limitParam, offsetParam, fieldsParam, include);
|
||||
fqn, query, limitParam, offsetParam, fieldsParam, include, authorizer, securityContext);
|
||||
TableColumnList tableColumnList = new TableColumnList();
|
||||
tableColumnList.setData(result.getData());
|
||||
tableColumnList.setPaging(result.getPaging());
|
||||
|
||||
@ -22,6 +22,7 @@ import org.openmetadata.schema.entity.teams.User;
|
||||
import org.openmetadata.schema.tests.TestCase;
|
||||
import org.openmetadata.schema.type.Column;
|
||||
import org.openmetadata.schema.type.ColumnProfile;
|
||||
import org.openmetadata.schema.type.EntityReference;
|
||||
import org.openmetadata.schema.type.Field;
|
||||
import org.openmetadata.schema.type.Include;
|
||||
import org.openmetadata.schema.type.TableData;
|
||||
@ -130,27 +131,46 @@ public class PIIMasker {
|
||||
return searchIndex;
|
||||
}
|
||||
|
||||
public static List<Column> getTableProfile(List<Column> columns) {
|
||||
for (Column column : listOrEmpty(columns)) {
|
||||
if (hasPiiSensitiveTag(column)) {
|
||||
column.setProfile(null);
|
||||
column.setName(flagMaskedName(column.getName()));
|
||||
public static List<Column> getTableProfile(
|
||||
String fqn, List<Column> columns, Authorizer authorizer, SecurityContext securityContext) {
|
||||
Table table = Entity.getEntityByName(Entity.TABLE, fqn, "owners", Include.ALL);
|
||||
List<EntityReference> owners = table.getOwners();
|
||||
boolean authorizePII = authorizer.authorizePII(securityContext, owners);
|
||||
return maskColumnsIfNotAuthorized(columns, authorizePII);
|
||||
}
|
||||
|
||||
public static List<Column> getTableProfile(List<Column> columns, boolean authorizePII) {
|
||||
return maskColumnsIfNotAuthorized(columns, authorizePII);
|
||||
}
|
||||
|
||||
private static List<Column> maskColumnsIfNotAuthorized(
|
||||
List<Column> columns, boolean authorizePII) {
|
||||
if (authorizePII) return columns;
|
||||
for (Column c : listOrEmpty(columns)) {
|
||||
if (hasPiiSensitiveTag(c)) {
|
||||
c.setProfile(null);
|
||||
c.setName(flagMaskedName(c.getName()));
|
||||
}
|
||||
}
|
||||
return columns;
|
||||
}
|
||||
|
||||
public static List<ColumnProfile> getColumnProfile(
|
||||
String fqn, List<ColumnProfile> columnProfiles) {
|
||||
String fqn,
|
||||
List<ColumnProfile> columnProfiles,
|
||||
Authorizer authorizer,
|
||||
SecurityContext securityContext) {
|
||||
Table table =
|
||||
Entity.getEntityByName(
|
||||
Entity.TABLE, FullyQualifiedName.getTableFQN(fqn), "columns,tags", Include.ALL);
|
||||
Entity.TABLE, FullyQualifiedName.getTableFQN(fqn), "columns,tags,owners", Include.ALL);
|
||||
Column column =
|
||||
table.getColumns().stream()
|
||||
.filter(c -> c.getFullyQualifiedName().equals(fqn))
|
||||
.findFirst()
|
||||
.orElse(null);
|
||||
if (column != null && hasPiiSensitiveTag(column)) {
|
||||
boolean authorizePII = authorizer.authorizePII(securityContext, table.getOwners());
|
||||
|
||||
if (column != null && hasPiiSensitiveTag(column) && !authorizePII) {
|
||||
return Collections.nCopies(columnProfiles.size(), new ColumnProfile());
|
||||
}
|
||||
return columnProfiles;
|
||||
|
||||
@ -28,6 +28,7 @@ import static org.junit.jupiter.api.Assertions.assertNull;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
import static org.openmetadata.common.utils.CommonUtil.getDateStringByOffset;
|
||||
import static org.openmetadata.common.utils.CommonUtil.listOf;
|
||||
import static org.openmetadata.common.utils.CommonUtil.listOrEmpty;
|
||||
import static org.openmetadata.csv.CsvUtil.recordToString;
|
||||
import static org.openmetadata.csv.EntityCsvTest.assertRows;
|
||||
import static org.openmetadata.csv.EntityCsvTest.assertSummary;
|
||||
@ -2931,6 +2932,72 @@ public class TableResourceTest extends EntityResourceTest<Table, CreateTable> {
|
||||
assertEquals(maskedColumnProfiles.getData().size(), columnProfiles.getData().size());
|
||||
}
|
||||
|
||||
@Test
|
||||
void test_sensitivePIIColumnProfile_byGetColumns(TestInfo test) throws Exception {
|
||||
// Arrange: create 2 profiled tables owned by USER1
|
||||
Table table =
|
||||
createEntity(
|
||||
createRequest(test).withOwners(Lists.newArrayList(USER1.getEntityReference())),
|
||||
ADMIN_AUTH_HEADERS);
|
||||
Table table1 =
|
||||
createEntity(
|
||||
createRequest(test, 1).withOwners(List.of(USER1.getEntityReference())),
|
||||
ADMIN_AUTH_HEADERS);
|
||||
|
||||
// Seed table/column profiles (C1, C2, C3)
|
||||
putTableProfile(table, table1, ADMIN_AUTH_HEADERS);
|
||||
|
||||
// Tag C3 as PII.Sensitive and persist
|
||||
Column c3 =
|
||||
table.getColumns().stream().filter(c -> c.getName().equals(C3)).findFirst().orElseThrow();
|
||||
String c3FQN = c3.getFullyQualifiedName();
|
||||
List<TagLabel> c3Tags = new ArrayList<>(listOrEmpty(c3.getTags()));
|
||||
c3Tags.add(
|
||||
new TagLabel().withTagFQN("PII.Sensitive").withSource(TagLabel.TagSource.CLASSIFICATION));
|
||||
c3.setTags(c3Tags);
|
||||
patchEntity(table.getId(), JsonUtils.pojoToJson(table), table, ADMIN_AUTH_HEADERS);
|
||||
|
||||
// Build the base target to fetch columns (requesting tags + profile fields)
|
||||
WebTarget baseColumnsTarget =
|
||||
getResource("tables/" + table.getId() + "/columns")
|
||||
.queryParam("limit", "1000")
|
||||
.queryParam("offset", "0")
|
||||
.queryParam("fields", "tags,profile")
|
||||
.queryParam("include", "non-deleted");
|
||||
|
||||
// --- Owner call: USER1 should see C3 profile values
|
||||
TableResource.TableColumnList ownerResp =
|
||||
TestUtils.get(
|
||||
baseColumnsTarget, TableResource.TableColumnList.class, authHeaders(USER1.getName()));
|
||||
|
||||
Column ownerC3 =
|
||||
ownerResp.getData().stream()
|
||||
.filter(col -> c3FQN.equals(col.getFullyQualifiedName()))
|
||||
.findFirst()
|
||||
.orElseThrow(() -> new AssertionError("C3 not found for owner in /{id}/columns"));
|
||||
|
||||
assertNotNull(ownerC3.getProfile(), "Owner should see a column profile object for C3");
|
||||
assertNotNull(ownerC3.getProfile().getMin(), "Owner should see min for PII column");
|
||||
assertNotNull(ownerC3.getProfile().getMax(), "Owner should see max for PII column");
|
||||
|
||||
// --- Non-owner call: USER2 should get masked stats for C3
|
||||
TableResource.TableColumnList nonOwnerResp =
|
||||
TestUtils.get(
|
||||
baseColumnsTarget,
|
||||
TableResource.TableColumnList.class,
|
||||
authHeaders(USER2_REF.getName()));
|
||||
|
||||
Column nonOwnerC3 =
|
||||
nonOwnerResp.getData().stream()
|
||||
.filter(col -> c3FQN.equals(col.getFullyQualifiedName()))
|
||||
.findFirst()
|
||||
.orElseThrow(() -> new AssertionError("C3 not found for non-owner in /{id}/columns"));
|
||||
|
||||
assertNull(
|
||||
nonOwnerC3.getProfile(),
|
||||
"Non-owner should NOT receive a profile object for PII-sensitive column");
|
||||
}
|
||||
|
||||
@Test
|
||||
void testInheritedPermissionFromParent(TestInfo test) throws IOException {
|
||||
// DatabaseService has owner dataConsumer
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user