Fix #10542 & #10830 - Mask Sample Data PII & Column Metrics (#11919)

* Mask Sample Data PII

* Mask column profile

* Mask Test Cases

* Mask Test Cases

* Add tests

* Format

* Format

* Fix test case resource masking
This commit is contained in:
Pere Miquel Brull 2023-06-15 09:13:57 +02:00 committed by GitHub
parent 105dc9d48d
commit 75aa3d5fa1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 288 additions and 8 deletions

View File

@ -1,10 +1,15 @@
package org.openmetadata.service.jdbi3;
import static org.openmetadata.common.utils.CommonUtil.listOrEmpty;
import static org.openmetadata.common.utils.CommonUtil.nullOrEmpty;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import org.openmetadata.schema.type.Column;
import org.openmetadata.schema.type.TagLabel;
import org.openmetadata.service.exception.CatalogExceptionMessage;
import org.openmetadata.service.util.FullyQualifiedName;
@ -62,4 +67,15 @@ public final class ColumnUtil {
throw new IllegalArgumentException(CatalogExceptionMessage.invalidColumnFQN(columnFQN));
}
}
public static Set<String> getAllTags(Column column) {
Set<String> tags = new HashSet<>();
if (!listOrEmpty(column.getTags()).isEmpty()) {
tags.addAll(column.getTags().stream().map(TagLabel::getTagFQN).collect(Collectors.toSet()));
}
for (Column c : listOrEmpty(column.getChildren())) {
tags.addAll(getAllTags(c));
}
return tags;
}
}

View File

@ -226,7 +226,7 @@ public class TableRepository extends EntityRepository<Table> {
}
@Transaction
public Table getSampleData(UUID tableId) throws IOException {
public Table getSampleData(UUID tableId, boolean authorizePII) throws IOException {
// Validate the request content
Table table = dao.findEntityById(tableId);
@ -236,6 +236,10 @@ public class TableRepository extends EntityRepository<Table> {
TableData.class);
table.setSampleData(sampleData);
setFieldsInternal(table, Fields.EMPTY_FIELDS);
// Set the column tags. Will be used to mask the sample data
if (!authorizePII) getColumnTags(true, table.getColumns());
return table;
}
@ -509,7 +513,7 @@ public class TableRepository extends EntityRepository<Table> {
}
@Transaction
public Table getLatestTableProfile(String fqn) throws IOException {
public Table getLatestTableProfile(String fqn, boolean authorizePII) throws IOException {
Table table = dao.findEntityByName(fqn);
TableProfile tableProfile =
JsonUtils.readValue(
@ -519,6 +523,10 @@ public class TableRepository extends EntityRepository<Table> {
TableProfile.class);
table.setProfile(tableProfile);
setColumnProfile(table.getColumns());
// Set the column tags. Will be used to hide the data
if (!authorizePII) getColumnTags(true, table.getColumns());
return table;
}

View File

@ -72,7 +72,9 @@ import org.openmetadata.service.jdbi3.TableRepository;
import org.openmetadata.service.resources.Collection;
import org.openmetadata.service.resources.EntityResource;
import org.openmetadata.service.security.Authorizer;
import org.openmetadata.service.security.mask.PIIMasker;
import org.openmetadata.service.security.policyevaluator.OperationContext;
import org.openmetadata.service.security.policyevaluator.ResourceContext;
import org.openmetadata.service.util.EntityUtil.Fields;
import org.openmetadata.service.util.ResultList;
@ -531,8 +533,12 @@ public class TableResource extends EntityResource<Table, TableRepository> {
@Parameter(description = "Id of the table", schema = @Schema(type = "UUID")) @PathParam("id") UUID id)
throws IOException {
OperationContext operationContext = new OperationContext(entityType, MetadataOperation.VIEW_SAMPLE_DATA);
authorizer.authorize(securityContext, operationContext, getResourceContextById(id));
return addHref(uriInfo, repository.getSampleData(id));
ResourceContext resourceContext = getResourceContextById(id);
authorizer.authorize(securityContext, operationContext, resourceContext);
boolean authorizePII = authorizer.authorizePII(securityContext, resourceContext.getOwner());
Table maskedTable = PIIMasker.getSampleData(repository.getSampleData(id, authorizePII), authorizePII);
return addHref(uriInfo, maskedTable);
}
@DELETE
@ -647,8 +653,11 @@ public class TableResource extends EntityResource<Table, TableRepository> {
String fqn)
throws IOException {
OperationContext operationContext = new OperationContext(entityType, MetadataOperation.VIEW_DATA_PROFILE);
authorizer.authorize(securityContext, operationContext, getResourceContextByName(fqn));
return repository.getLatestTableProfile(fqn);
ResourceContext resourceContext = getResourceContextByName(fqn);
authorizer.authorize(securityContext, operationContext, resourceContext);
boolean authorizePII = authorizer.authorizePII(securityContext, resourceContext.getOwner());
return PIIMasker.getTableProfile(repository.getLatestTableProfile(fqn, authorizePII), authorizePII);
}
@GET

View File

@ -11,7 +11,9 @@ import io.swagger.v3.oas.annotations.responses.ApiResponse;
import io.swagger.v3.oas.annotations.tags.Tag;
import java.io.IOException;
import java.util.List;
import java.util.Optional;
import java.util.UUID;
import java.util.stream.Collectors;
import javax.json.JsonPatch;
import javax.validation.Valid;
import javax.validation.constraints.Max;
@ -37,9 +39,11 @@ import lombok.extern.slf4j.Slf4j;
import org.openmetadata.schema.api.data.RestoreEntity;
import org.openmetadata.schema.api.tests.CreateLogicalTestCases;
import org.openmetadata.schema.api.tests.CreateTestCase;
import org.openmetadata.schema.entity.data.Table;
import org.openmetadata.schema.tests.TestCase;
import org.openmetadata.schema.tests.TestSuite;
import org.openmetadata.schema.tests.type.TestCaseResult;
import org.openmetadata.schema.type.Column;
import org.openmetadata.schema.type.EntityHistory;
import org.openmetadata.schema.type.Include;
import org.openmetadata.schema.type.MetadataOperation;
@ -49,8 +53,10 @@ import org.openmetadata.service.jdbi3.ListFilter;
import org.openmetadata.service.jdbi3.TestCaseRepository;
import org.openmetadata.service.resources.Collection;
import org.openmetadata.service.resources.EntityResource;
import org.openmetadata.service.resources.feeds.MessageParser;
import org.openmetadata.service.resources.feeds.MessageParser.EntityLink;
import org.openmetadata.service.security.Authorizer;
import org.openmetadata.service.security.mask.PIIMasker;
import org.openmetadata.service.security.policyevaluator.OperationContext;
import org.openmetadata.service.security.policyevaluator.ResourceContextInterface;
import org.openmetadata.service.security.policyevaluator.TestCaseResourceContext;
@ -175,8 +181,45 @@ public class TestCaseResource extends EntityResource<TestCase, TestCaseRepositor
// Override OperationContext to change the entity to table and operation from VIEW_ALL to VIEW_TESTS
OperationContext operationContext = new OperationContext(Entity.TABLE, MetadataOperation.VIEW_TESTS);
Fields fields = getFields(fieldsParam);
return super.listInternal(
uriInfo, securityContext, fields, filter, limitParam, before, after, operationContext, resourceContext);
ResultList<TestCase> tests =
super.listInternal(
uriInfo, securityContext, fields, filter, limitParam, before, after, operationContext, resourceContext);
List<TestCase> maskedTests =
tests.getData().stream()
.map(
testCase -> {
try {
EntityLink testCaseLink = MessageParser.EntityLink.parse(testCase.getEntityLink());
Table table =
Entity.getEntityByName(
Entity.TABLE, testCaseLink.getEntityFQN(), "owner,tags", Include.NON_DELETED);
// Ignore table tests
if (testCaseLink.getFieldName() == null) return testCase;
Optional<Column> referencedColumn =
table.getColumns().stream()
.filter(
col -> testCaseLink.getFullyQualifiedFieldValue().equals(col.getFullyQualifiedName()))
.findFirst();
if (referencedColumn.isPresent()) {
Column col = referencedColumn.get();
// We need the table owner to know if we can authorize the access
boolean authorizePII = authorizer.authorizePII(securityContext, table.getOwner());
return PIIMasker.getTestCase(col, testCase, authorizePII);
}
return testCase;
} catch (IOException e) {
throw new RuntimeException(e);
}
})
.collect(Collectors.toList());
tests.setData(maskedTests);
return tests;
}
@GET

View File

@ -17,6 +17,7 @@ import java.io.IOException;
import java.util.List;
import javax.ws.rs.core.SecurityContext;
import org.jdbi.v3.core.Jdbi;
import org.openmetadata.schema.type.EntityReference;
import org.openmetadata.schema.type.ResourcePermission;
import org.openmetadata.service.OpenMetadataApplicationConfig;
import org.openmetadata.service.security.policyevaluator.OperationContext;
@ -46,4 +47,7 @@ public interface Authorizer {
boolean decryptSecret(SecurityContext securityContext);
boolean shouldMaskPasswords(SecurityContext securityContext);
/** Let the user view PII Sensitive data */
boolean authorizePII(SecurityContext securityContext, EntityReference owner);
}

View File

@ -21,6 +21,7 @@ import java.util.List;
import javax.ws.rs.core.SecurityContext;
import lombok.extern.slf4j.Slf4j;
import org.jdbi.v3.core.Jdbi;
import org.openmetadata.schema.type.EntityReference;
import org.openmetadata.schema.type.ResourcePermission;
import org.openmetadata.service.OpenMetadataApplicationConfig;
import org.openmetadata.service.security.policyevaluator.OperationContext;
@ -97,6 +98,13 @@ public class DefaultAuthorizer implements Authorizer {
return !subjectContext.isBot();
}
/** In 1.2, evaluate policies here instead of just checking the subject */
@Override
public boolean authorizePII(SecurityContext securityContext, EntityReference owner) {
SubjectContext subjectContext = getSubjectContext(securityContext);
return subjectContext.isAdmin() || subjectContext.isBot() || subjectContext.isOwner(owner);
}
public static SubjectContext getSubjectContext(SecurityContext securityContext) {
if (securityContext == null || securityContext.getUserPrincipal() == null) {
throw new AuthenticationException("No principal in security context");

View File

@ -20,6 +20,7 @@ import javax.ws.rs.core.SecurityContext;
import lombok.extern.slf4j.Slf4j;
import org.jdbi.v3.core.Jdbi;
import org.openmetadata.schema.entity.teams.User;
import org.openmetadata.schema.type.EntityReference;
import org.openmetadata.schema.type.Permission.Access;
import org.openmetadata.schema.type.ResourcePermission;
import org.openmetadata.service.Entity;
@ -108,4 +109,9 @@ public class NoopAuthorizer implements Authorizer {
public boolean shouldMaskPasswords(SecurityContext securityContext) {
return false; // Always show passwords
}
@Override
public boolean authorizePII(SecurityContext securityContext, EntityReference owner) {
return true; // Always show PII Sensitive data
}
}

View File

@ -0,0 +1,83 @@
package org.openmetadata.service.security.mask;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.stream.Collectors;
import org.openmetadata.schema.entity.data.Table;
import org.openmetadata.schema.tests.TestCase;
import org.openmetadata.schema.type.Column;
import org.openmetadata.schema.type.TableData;
import org.openmetadata.service.jdbi3.ColumnUtil;
public class PIIMasker {
public static final String SENSITIVE_PII_TAG = "PII.Sensitive";
public static final String MASKED_VALUE = "********";
public static final String MASKED_NAME = "[MASKED]";
public static Table getSampleData(Table table, boolean authorized) {
if (authorized) return table;
TableData sampleData = table.getSampleData();
// get the list of positions to be masked
List<Integer> columnsPositionToBeMasked =
table.getColumns().stream()
.collect(Collectors.toMap(Function.identity(), c -> sampleData.getColumns().indexOf(c.getName())))
.entrySet()
.stream()
.filter(entry -> hasPiiSensitiveTag(entry.getKey()))
.map(Map.Entry::getValue)
.collect(Collectors.toList());
// Mask rows
sampleData.setRows(
sampleData.getRows().stream()
.map(r -> maskSampleDataRow(r, columnsPositionToBeMasked))
.collect(Collectors.toList()));
List<String> sampleDataColumns = sampleData.getColumns();
// Flag column names as masked
columnsPositionToBeMasked.forEach(
position -> sampleDataColumns.set(position, flagMaskedName(sampleDataColumns.get(position))));
table.setSampleData(sampleData);
return table;
}
public static Table getTableProfile(Table table, boolean authorized) {
if (authorized) return table;
for (Column column : table.getColumns()) {
if (hasPiiSensitiveTag(column)) {
column.setProfile(null);
column.setName(flagMaskedName(column.getName()));
}
}
return table;
}
public static TestCase getTestCase(Column column, TestCase testCase, boolean authorized) {
if (authorized || !hasPiiSensitiveTag(column)) return testCase;
testCase.setTestCaseResult(null);
testCase.setParameterValues(null);
testCase.setDescription(null);
testCase.setName(flagMaskedName(testCase.getName()));
return testCase;
}
private static boolean hasPiiSensitiveTag(Column column) {
return ColumnUtil.getAllTags(column).stream().anyMatch(SENSITIVE_PII_TAG::equals);
}
private static List<Object> maskSampleDataRow(List<Object> row, List<Integer> columnsPositionToBeMasked) {
columnsPositionToBeMasked.forEach(position -> row.set(position, MASKED_VALUE));
return row;
}
private static String flagMaskedName(String name) {
return String.format("%s %s", name, MASKED_NAME);
}
}

View File

@ -20,6 +20,7 @@ import static javax.ws.rs.core.Response.Status.FORBIDDEN;
import static javax.ws.rs.core.Response.Status.NOT_FOUND;
import static javax.ws.rs.core.Response.Status.OK;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
@ -43,6 +44,7 @@ import static org.openmetadata.service.exception.CatalogExceptionMessage.entityN
import static org.openmetadata.service.exception.CatalogExceptionMessage.invalidColumnFQN;
import static org.openmetadata.service.exception.CatalogExceptionMessage.permissionNotAllowed;
import static org.openmetadata.service.security.SecurityUtil.authHeaders;
import static org.openmetadata.service.security.mask.PIIMasker.MASKED_VALUE;
import static org.openmetadata.service.util.EntityUtil.fieldAdded;
import static org.openmetadata.service.util.EntityUtil.fieldDeleted;
import static org.openmetadata.service.util.EntityUtil.fieldUpdated;
@ -1759,6 +1761,59 @@ public class TableResourceTest extends EntityResourceTest<Table, CreateTable> {
assertEquals("P30D", table.getRetentionPeriod());
}
@Test
void test_sensitivePIISampleData(TestInfo test) throws IOException {
// Create table with owner and a column tagged with PII.Sensitive
Table table =
createAndCheckEntity(createRequest(test).withOwner(USER_TEAM21.getEntityReference()), ADMIN_AUTH_HEADERS);
List<String> columns = Arrays.asList(C1, C2, C3);
// Add 3 rows of sample data for 3 columns
List<List<Object>> rows =
Arrays.asList(
Arrays.asList("c1Value1", 1, true),
Arrays.asList("c1Value2", null, false),
Arrays.asList("c1Value3", 3, true));
// add sample data
putSampleData(table, columns, rows, ADMIN_AUTH_HEADERS);
// assert values are not masked for the table owner
table = getSampleData(table.getId(), authHeaders(USER_TEAM21.getName()));
assertFalse(
table.getSampleData().getRows().stream()
.flatMap(List::stream)
.map(r -> r == null ? "" : r)
.map(Object::toString)
.anyMatch(MASKED_VALUE::equals));
// assert values are masked when is not the table owner
table = getSampleData(table.getId(), authHeaders(USER1_REF.getName()));
assertEquals(
3,
table.getSampleData().getRows().stream()
.flatMap(List::stream)
.map(r -> r == null ? "" : r)
.map(Object::toString)
.filter(MASKED_VALUE::equals)
.count());
}
@Test
void test_sensitivePIIColumnProfile(TestInfo test) throws IOException, ParseException {
// Create table with owner and a column tagged with PII.Sensitive
// C3 has the PII.Sensitive tag
Table table = createEntity(createRequest(test).withOwner(USER_TEAM21.getEntityReference()), ADMIN_AUTH_HEADERS);
Table table1 = createEntity(createRequest(test, 1).withOwner(USER_TEAM21.getEntityReference()), ADMIN_AUTH_HEADERS);
putTableProfile(table, table1, ADMIN_AUTH_HEADERS);
// Owner can read the column profile of C3
Table tableWithProfileFromOwner =
getLatestTableProfile(table.getFullyQualifiedName(), authHeaders(USER_TEAM21.getName()));
assertNotNull(tableWithProfileFromOwner.getColumns().get(2).getProfile());
// Non owners cannot read the column profile of C3
Table tableWithProfileFromNotOwner =
getLatestTableProfile(table.getFullyQualifiedName(), authHeaders(USER1_REF.getName()));
assertNull(tableWithProfileFromNotOwner.getColumns().get(2).getProfile());
}
void assertFields(List<Table> tableList, String fieldsParam) {
tableList.forEach(t -> assertFields(t, fieldsParam));
}

View File

@ -6,6 +6,8 @@ import static javax.ws.rs.core.Response.Status.FORBIDDEN;
import static javax.ws.rs.core.Response.Status.NOT_FOUND;
import static javax.ws.rs.core.Response.Status.OK;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.openmetadata.schema.type.MetadataOperation.EDIT_TESTS;
import static org.openmetadata.service.Entity.ADMIN_USER_NAME;
@ -16,6 +18,7 @@ import static org.openmetadata.service.util.EntityUtil.fieldUpdated;
import static org.openmetadata.service.util.TestUtils.ADMIN_AUTH_HEADERS;
import static org.openmetadata.service.util.TestUtils.TEST_AUTH_HEADERS;
import static org.openmetadata.service.util.TestUtils.TEST_USER_NAME;
import static org.openmetadata.service.util.TestUtils.assertListNotEmpty;
import static org.openmetadata.service.util.TestUtils.assertListNotNull;
import static org.openmetadata.service.util.TestUtils.assertListNull;
import static org.openmetadata.service.util.TestUtils.assertResponse;
@ -38,6 +41,7 @@ import org.junit.jupiter.api.TestMethodOrder;
import org.openmetadata.schema.api.data.CreateTable;
import org.openmetadata.schema.api.tests.CreateTestCase;
import org.openmetadata.schema.api.tests.CreateTestSuite;
import org.openmetadata.schema.entity.data.Table;
import org.openmetadata.schema.tests.TestCase;
import org.openmetadata.schema.tests.TestCaseParameterValue;
import org.openmetadata.schema.tests.TestSuite;
@ -333,6 +337,50 @@ public class TestCaseResourceTest extends EntityResourceTest<TestCase, CreateTes
verifyTestCaseResults(testCaseResults, testCase1ResultList, 4);
}
@Test
void test_sensitivePIITestCase(TestInfo test) throws IOException {
// First, create a table with PII Sensitive tag in a column
TableResourceTest tableResourceTest = new TableResourceTest();
CreateTable tableReq =
tableResourceTest
.createRequest(test)
.withName("sensitiveTableTest")
.withDatabaseSchema(DATABASE_SCHEMA.getFullyQualifiedName())
.withOwner(USER1_REF)
.withColumns(
List.of(
new Column()
.withName(C1)
.withDisplayName("c1")
.withDataType(ColumnDataType.VARCHAR)
.withDataLength(10)
.withTags(List.of(PII_SENSITIVE_TAG_LABEL))))
.withOwner(USER1_REF);
Table sensitiveTable = tableResourceTest.createAndCheckEntity(tableReq, ADMIN_AUTH_HEADERS);
String sensitiveColumnLink =
String.format("<#E::table::%s::columns::%s>", sensitiveTable.getFullyQualifiedName(), C1);
CreateTestCase create = createRequest(test);
create
.withEntityLink(sensitiveColumnLink)
.withTestSuite(TEST_SUITE1.getFullyQualifiedName())
.withTestDefinition(TEST_DEFINITION3.getFullyQualifiedName())
.withParameterValues(List.of(new TestCaseParameterValue().withValue("100").withName("missingCountValue")));
createAndCheckEntity(create, ADMIN_AUTH_HEADERS);
// Owner can see the results
ResultList<TestCase> testCases =
getTestCases(10, "*", sensitiveColumnLink, false, authHeaders(USER1_REF.getName()));
assertNotNull(testCases.getData().get(0).getDescription());
assertListNotEmpty(testCases.getData().get(0).getParameterValues());
// Owner can see the results
ResultList<TestCase> maskedTestCases =
getTestCases(10, "*", sensitiveColumnLink, false, authHeaders(USER2_REF.getName()));
assertNull(maskedTestCases.getData().get(0).getDescription());
assertEquals(maskedTestCases.getData().get(0).getParameterValues().size(), 0);
}
@Test
@Order(1)
void put_testCase_list_200(TestInfo test) throws IOException {