[GEN-1704] Remove Table Details from Import/Export at table level (#18093)

* [GEN-1704] Remove Table Details from Import/Export at table level, and only add columns

* Fix Tests
This commit is contained in:
Mohit Yadav 2024-10-04 10:07:45 +05:30 committed by sonikashah
parent b02545dad8
commit 93c444cfa2
4 changed files with 87 additions and 225 deletions

View File

@ -198,6 +198,14 @@ public final class CommonUtil {
return object == null || nullOrEmpty(object.toString());
}
public static <T> T nullOrDefault(T object, T defaultValue) {
if (object == null || (nullOrEmpty(object.toString()))) {
return defaultValue;
} else {
return object;
}
}
public static String getResourceAsStream(ClassLoader loader, String file) throws IOException {
return IOUtils.toString(Objects.requireNonNull(loader.getResourceAsStream(file)), UTF_8);
}

View File

@ -16,12 +16,11 @@ package org.openmetadata.service.jdbi3;
import static java.util.stream.Collectors.groupingBy;
import static org.openmetadata.common.utils.CommonUtil.listOf;
import static org.openmetadata.common.utils.CommonUtil.listOrEmpty;
import static org.openmetadata.common.utils.CommonUtil.nullOrDefault;
import static org.openmetadata.common.utils.CommonUtil.nullOrEmpty;
import static org.openmetadata.csv.CsvUtil.addField;
import static org.openmetadata.csv.CsvUtil.addGlossaryTerms;
import static org.openmetadata.csv.CsvUtil.addOwners;
import static org.openmetadata.csv.CsvUtil.addTagLabels;
import static org.openmetadata.csv.CsvUtil.addTagTiers;
import static org.openmetadata.schema.type.Include.ALL;
import static org.openmetadata.schema.type.Include.NON_DELETED;
import static org.openmetadata.service.Entity.DATABASE_SCHEMA;
@ -49,6 +48,7 @@ import java.util.UUID;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import javax.json.JsonPatch;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.csv.CSVPrinter;
import org.apache.commons.csv.CSVRecord;
@ -64,6 +64,7 @@ import org.openmetadata.schema.entity.data.DatabaseSchema;
import org.openmetadata.schema.entity.data.Table;
import org.openmetadata.schema.entity.feed.Suggestion;
import org.openmetadata.schema.tests.CustomMetric;
import org.openmetadata.schema.type.ApiStatus;
import org.openmetadata.schema.type.Column;
import org.openmetadata.schema.type.ColumnDataType;
import org.openmetadata.schema.type.ColumnJoin;
@ -72,6 +73,7 @@ import org.openmetadata.schema.type.ColumnProfilerConfig;
import org.openmetadata.schema.type.DailyCount;
import org.openmetadata.schema.type.DataModel;
import org.openmetadata.schema.type.EntityReference;
import org.openmetadata.schema.type.Include;
import org.openmetadata.schema.type.JoinedWith;
import org.openmetadata.schema.type.Relationship;
import org.openmetadata.schema.type.SuggestionType;
@ -104,6 +106,7 @@ import org.openmetadata.service.util.FullyQualifiedName;
import org.openmetadata.service.util.JsonUtils;
import org.openmetadata.service.util.RestUtil;
import org.openmetadata.service.util.ResultList;
import org.openmetadata.service.util.ValidatorUtil;
@Slf4j
public class TableRepository extends EntityRepository<Table> {
@ -1138,8 +1141,6 @@ public class TableRepository extends EntityRepository<Table> {
public static class TableCsv extends EntityCsv<Table> {
public static final CsvDocumentation DOCUMENTATION = getCsvDocumentation(TABLE);
public static final List<CsvHeader> HEADERS = DOCUMENTATION.getHeaders();
public static final List<CsvHeader> COLUMN_HEADERS =
resetRequiredColumns(DOCUMENTATION.getHeaders(), listOf("name"));
private final Table table;
@ -1150,73 +1151,57 @@ public class TableRepository extends EntityRepository<Table> {
@Override
protected void createEntity(CSVPrinter printer, List<CSVRecord> csvRecords) throws IOException {
CSVRecord csvRecord = getNextRecord(printer, csvRecords);
// Headers: name, displayName, description, owners, tags, glossaryTerms, tiers
// retentionPeriod,
// sourceUrl, domain, column.fullyQualifiedName, column.displayName, column.description,
// column.dataTypeDisplay,
// Headers: column.fullyQualifiedName, column.displayName, column.description,
// column.dataTypeDisplay,column.dataType, column.arrayDataType, column.dataLength,
// column.tags, column.glossaryTerms
Table originalEntity = JsonUtils.deepCopy(table, Table.class);
CSVRecord csvRecord = getNextRecord(printer, csvRecords);
if (csvRecord != null) {
if (processRecord) {
// fields tags(4), glossaryTerms(5), tiers(6)
List<TagLabel> tagLabels =
getTagLabels(
printer,
csvRecord,
List.of(
Pair.of(4, TagLabel.TagSource.CLASSIFICATION),
Pair.of(5, TagLabel.TagSource.GLOSSARY),
Pair.of(6, TagLabel.TagSource.CLASSIFICATION)));
table
.withName(csvRecord.get(0))
.withDisplayName(csvRecord.get(1))
.withDescription(csvRecord.get(2))
.withOwners(getOwners(printer, csvRecord, 3))
.withTags(tagLabels != null && tagLabels.isEmpty() ? null : tagLabels)
.withRetentionPeriod(csvRecord.get(7))
.withSourceUrl(csvRecord.get(8))
.withDomain(getEntityReference(printer, csvRecord, 9, Entity.DOMAIN));
ImportResult importResult = updateColumn(printer, csvRecord);
if (importResult.result().equals(IMPORT_FAILED)) {
importFailure(printer, importResult.details(), csvRecord);
}
}
List<ImportResult> importResults = new ArrayList<>();
updateColumns(printer, csvRecords, importResults);
if (processRecord) {
createEntity(printer, csvRecord, table);
}
for (ImportResult importResult : importResults) {
if (importResult.result().equals(IMPORT_SUCCESS)) {
importSuccess(printer, importResult.record(), importResult.details());
} else {
importFailure(printer, importResult.details(), importResult.record());
}
}
updateColumnsFromCsv(printer, csvRecord);
}
if (processRecord) {
patchColumns(originalEntity, printer, csvRecord, table);
}
}
public void updateColumns(
CSVPrinter printer, List<CSVRecord> csvRecords, List<ImportResult> results)
private void patchColumns(
Table originalTable, CSVPrinter resultsPrinter, CSVRecord csvRecord, Table entity)
throws IOException {
while (recordIndex < csvRecords.size() && csvRecords.get(0) != null) { // Column records
CSVRecord csvRecord = getNextRecord(printer, COLUMN_HEADERS, csvRecords);
if (csvRecord == null) {
// Since the processing failed for a particular csvRecord get the previous one
CSVRecord failedRecord = csvRecords.get(recordIndex - 1);
results.add(
new ImportResult(IMPORT_SKIPPED, failedRecord, super.importResult.getAbortReason()));
} else {
results.add(updateColumn(printer, csvRecord));
}
EntityRepository<Table> repository =
(EntityRepository<Table>) Entity.getEntityRepository(TABLE);
// Validate
String violations = ValidatorUtil.validate(entity);
if (violations != null) {
// JSON schema based validation failed for the entity
importFailure(resultsPrinter, violations, csvRecord);
return;
}
if (Boolean.FALSE.equals(importResult.getDryRun())) { // If not dry run, create the entity
try {
JsonPatch jsonPatch = JsonUtils.getJsonPatch(originalTable, table);
repository.patch(null, table.getId(), importedBy, jsonPatch);
} catch (Exception ex) {
importFailure(resultsPrinter, ex.getMessage(), csvRecord);
importResult.setStatus(ApiStatus.FAILURE);
return;
}
} else { // Dry run don't create the entity
repository.setFullyQualifiedName(entity);
repository.findByNameOrNull(entity.getFullyQualifiedName(), Include.NON_DELETED);
// Track the dryRun created entities, as they may be referred by other entities being
// created
// during import
dryRunCreatedEntities.put(entity.getFullyQualifiedName(), entity);
}
importSuccess(resultsPrinter, csvRecord, ENTITY_UPDATED);
}
public ImportResult updateColumn(CSVPrinter printer, CSVRecord csvRecord) throws IOException {
String columnFqn = csvRecord.get(10);
public void updateColumnsFromCsv(CSVPrinter printer, CSVRecord csvRecord) throws IOException {
String columnFqn = csvRecord.get(0);
Column column = findColumn(table.getColumns(), columnFqn);
boolean columnExists = column != null;
if (column == null) {
if (!columnExists) {
// Create Column, if not found
column =
new Column()
@ -1224,24 +1209,27 @@ public class TableRepository extends EntityRepository<Table> {
.withFullyQualifiedName(
table.getFullyQualifiedName() + Entity.SEPARATOR + columnFqn);
}
column.withDisplayName(csvRecord.get(11));
column.withDescription(csvRecord.get(12));
column.withDataTypeDisplay(csvRecord.get(13));
column.withDisplayName(csvRecord.get(1));
column.withDescription(csvRecord.get(2));
column.withDataTypeDisplay(csvRecord.get(3));
column.withDataType(
nullOrEmpty(csvRecord.get(14)) ? null : ColumnDataType.fromValue(csvRecord.get(14)));
nullOrEmpty(csvRecord.get(4)) ? null : ColumnDataType.fromValue(csvRecord.get(4)));
column.withArrayDataType(
nullOrEmpty(csvRecord.get(15)) ? null : ColumnDataType.fromValue(csvRecord.get(15)));
nullOrEmpty(csvRecord.get(5)) ? null : ColumnDataType.fromValue(csvRecord.get(5)));
column.withDataLength(
nullOrEmpty(csvRecord.get(16)) ? null : Integer.parseInt(csvRecord.get(16)));
nullOrEmpty(csvRecord.get(6)) ? null : Integer.parseInt(csvRecord.get(6)));
List<TagLabel> tagLabels =
getTagLabels(
printer,
csvRecord,
List.of(
Pair.of(17, TagLabel.TagSource.CLASSIFICATION),
Pair.of(18, TagLabel.TagSource.GLOSSARY)));
Pair.of(7, TagLabel.TagSource.CLASSIFICATION),
Pair.of(8, TagLabel.TagSource.GLOSSARY)));
column.withTags(nullOrEmpty(tagLabels) ? null : tagLabels);
column.withOrdinalPosition(nullOrEmpty(table.getColumns()) ? 0 : table.getColumns().size());
column.withOrdinalPosition(
nullOrDefault(
column.getOrdinalPosition(),
Long.valueOf((csvRecord.getRecordNumber() - 1)).intValue()));
// If Column Does not Exist add it to the table
if (!columnExists) {
@ -1258,10 +1246,10 @@ public class TableRepository extends EntityRepository<Table> {
Entity.SEPARATOR, Arrays.copyOf(splitColumnName, splitColumnName.length - 1));
Column parentColumn = findColumn(table.getColumns(), parentColumnFqn);
if (parentColumn == null) {
return new ImportResult(
IMPORT_FAILED,
csvRecord,
"Parent Column not found. Check the order of the columns in the CSV file.");
importFailure(
printer,
"Parent Column not found. Check the order of the columns in the CSV file.",
csvRecord);
}
// Update Name And Ordinal position in the parent column
@ -1277,52 +1265,19 @@ public class TableRepository extends EntityRepository<Table> {
parentColumn.withChildren(children);
}
}
return new ImportResult(IMPORT_SUCCESS, csvRecord, ENTITY_UPDATED);
}
@Override
protected void addRecord(CsvFile csvFile, Table entity) {
// Headers: name, displayName, description, owner, tags, retentionPeriod, sourceUrl, domain
// column.fullyQualifiedName, column.displayName, column.description, column.dataTypeDisplay,
// column.tags
List<String> recordList = new ArrayList<>();
addField(recordList, entity.getName());
addField(recordList, entity.getDisplayName());
addField(recordList, entity.getDescription());
addOwners(recordList, entity.getOwners());
addTagLabels(recordList, entity.getTags());
addGlossaryTerms(recordList, entity.getTags());
addTagTiers(recordList, entity.getTags());
addField(recordList, entity.getRetentionPeriod());
addField(recordList, entity.getSourceUrl());
String domain =
entity.getDomain() == null || Boolean.TRUE.equals(entity.getDomain().getInherited())
? ""
: entity.getDomain().getFullyQualifiedName();
addField(recordList, domain);
if (!nullOrEmpty(table.getColumns())) {
addRecord(csvFile, recordList, table.getColumns().get(0), false);
for (int i = 1; i < entity.getColumns().size(); i++) {
addRecord(csvFile, new ArrayList<>(), table.getColumns().get(i), true);
}
} else {
// Create a dummy Entry for the Column
for (int i = 0; i < 9; i++) {
addField(recordList, (String) null); // Add empty fields for table information
}
addRecord(csvFile, recordList);
// Headers: column.fullyQualifiedName, column.displayName, column.description,
// column.dataTypeDisplay,column.dataType, column.arrayDataType, column.dataLength,
// column.tags, column.glossaryTerms
for (int i = 0; i < listOrEmpty(entity.getColumns()).size(); i++) {
addRecord(csvFile, new ArrayList<>(), table.getColumns().get(i));
}
}
private void addRecord(
CsvFile csvFile, List<String> recordList, Column column, boolean emptyTableDetails) {
if (emptyTableDetails) {
for (int i = 0; i < 10; i++) {
addField(recordList, (String) null); // Add empty fields for table information
}
}
private void addRecord(CsvFile csvFile, List<String> recordList, Column column) {
addField(
recordList,
getLocalColumnName(table.getFullyQualifiedName(), column.getFullyQualifiedName()));
@ -1338,8 +1293,7 @@ public class TableRepository extends EntityRepository<Table> {
addTagLabels(recordList, column.getTags());
addGlossaryTerms(recordList, column.getTags());
addRecord(csvFile, recordList);
listOrEmpty(column.getChildren())
.forEach(c -> addRecord(csvFile, new ArrayList<>(), c, true));
listOrEmpty(column.getChildren()).forEach(c -> addRecord(csvFile, new ArrayList<>(), c));
}
private Column findColumn(List<Column> columns, String columnFqn) {

View File

@ -1,90 +1,6 @@
{
"summary": "Table CSV file is used for importing and exporting table metadata from and to an **existing** table.",
"headers": [
{
"name": "name",
"required": true,
"description": "The name of the table being updated.",
"examples": [
"`users`, `customers`"
]
},
{
"name": "displayName",
"required": false,
"description": "Display name for the table.",
"examples": [
"`fact_session`, `dim_user`"
]
},
{
"name": "description",
"required": false,
"description": "Description for the table in Markdown format.",
"examples": [
"`Customer Schema` that contains all the tables related to customer entity."
]
},
{
"name": "owner",
"required": false,
"description": "Owner names separated by ';'. For team owner, include prefix team. For user owner, include prefix user.",
"examples": [
"`team;marketing`",
"`user;john`"
]
},
{
"name": "tags",
"required": false,
"description": "Fully qualified classification tag names associated with the table separated by ';'.. These tags are automatically applied along with the glossary term, when it is used to label an entity.",
"examples": [
"`PII.Sensitive`",
"`PII.Sensitive;PersonalData.Personal`"
]
},
{
"name": "glossaryTerms",
"required": false,
"description": "Fully qualified glossary term names associated with the table separated by ';'.. These tags are automatically applied along with the glossary term, when it is used to label an entity.",
"examples": [
"`Glossary.GlossaryTerm1`",
"`Glossary.GlossaryTerm1.GlossaryTerm2`"
]
},
{
"name": "tiers",
"required": false,
"description": "Fully qualified tier tags names associated with the table separated by ';'.",
"examples": [
"`Tier.Tier1`",
"`Tier.Tier2`"
]
},
{
"name": "retentionPeriod",
"required": false,
"description": "Retention period of the data in the table. Period is expressed as duration in ISO 8601 format in UTC. Example - `P23DT23H`. When not set, the retention period is inherited from the parent database schema, if it exists.",
"examples": [
"`P23DT23H`"
]
},
{
"name": "sourceUrl",
"required": false,
"description": "Source URL for the table.",
"examples": [
"http://domain.com/system/customer_schema"
]
},
{
"name": "domain",
"required": false,
"description": "Domain to which the table belongs to.",
"examples": [
"Marketing", "Sales"
]
},
{
"name": "column.fullyQualifiedName",
"required": true,

View File

@ -19,7 +19,6 @@ import static javax.ws.rs.core.Response.Status.CREATED;
import static javax.ws.rs.core.Response.Status.FORBIDDEN;
import static javax.ws.rs.core.Response.Status.NOT_FOUND;
import static javax.ws.rs.core.Response.Status.OK;
import static org.apache.commons.lang.StringEscapeUtils.escapeCsv;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotNull;
@ -2686,41 +2685,29 @@ public class TableResourceTest extends EntityResourceTest<Table, CreateTable> {
// Headers: name, displayName, description, owner, tags, retentionPeriod, sourceUrl, domain
// Create table with invalid tags field
String resultsHeader = recordToString(EntityCsv.getResultHeaders(TableCsv.HEADERS));
String record = "s1,dsp1,dsc1,,Tag.invalidTag,,,,,,c1,c1,c1,,INT,,,,";
String csv = createCsv(TableCsv.HEADERS, listOf(record), null);
// Add an invalid column tag
String csvRecord = "c1,,,,INT,,,Tag.invalidTag,";
String csv = createCsv(TableCsv.HEADERS, listOf(csvRecord), null);
CsvImportResult result = importCsv(tableName, csv, false);
assertSummary(result, ApiStatus.PARTIAL_SUCCESS, 2, 1, 1);
String[] expectedRows =
new String[] {
resultsHeader,
getFailedRecord(record, EntityCsv.entityNotFound(4, "tag", "Tag.invalidTag"))
};
assertRows(result, expectedRows);
// Add an invalid column tag
record = "s1,dsp1,dsc1,,,,,,,,c1,,,,INT,,,Tag.invalidTag,";
csv = createCsv(TableCsv.HEADERS, listOf(record), null);
result = importCsv(tableName, csv, false);
assertSummary(result, ApiStatus.PARTIAL_SUCCESS, 2, 1, 1);
expectedRows =
new String[] {
resultsHeader,
getFailedRecord(record, EntityCsv.entityNotFound(17, "tag", "Tag.invalidTag"))
getFailedRecord(csvRecord, EntityCsv.entityNotFound(7, "tag", "Tag.invalidTag"))
};
assertRows(result, expectedRows);
// Update a non-existing column, this should create a new column with name "nonExistingColumn"
record = "s1,dsp1,dsc1,,,,,,,,nonExistingColumn,,,,INT,,,,";
csv = createCsv(TableCsv.HEADERS, listOf(record), null);
csvRecord = "nonExistingColumn,,,,INT,,,,";
csv = createCsv(TableCsv.HEADERS, listOf(csvRecord), null);
result = importCsv(tableName, csv, false);
assertSummary(result, ApiStatus.SUCCESS, 2, 2, 0);
expectedRows = new String[] {resultsHeader, getSuccessRecord(record, "Entity updated")};
expectedRows = new String[] {resultsHeader, getSuccessRecord(csvRecord, "Entity updated")};
assertRows(result, expectedRows);
}
@Test
void testImportExport() throws IOException {
String user1 = USER1.getName();
Column c1 = new Column().withName("c1").withDataType(STRUCT);
Column c11 = new Column().withName("c11").withDataType(INT);
Column c2 = new Column().withName("c2").withDataType(INT);
@ -2735,13 +2722,10 @@ public class TableResourceTest extends EntityResourceTest<Table, CreateTable> {
// Update terms with change in description
List<String> updateRecords =
listOf(
String.format(
"s1,dsp1,new-dsc1,user:%s,,,Tier.Tier1,P23DT23H,http://test.com,%s,c1,"
+ "dsp1-new,desc1,type,STRUCT,,,PII.Sensitive,",
user1, escapeCsv(DOMAIN.getFullyQualifiedName())),
",,,,,,,,,,c1.c11,dsp11-new,desc11,type1,INT,,,PII.Sensitive,",
",,,,,,,,,,c2,,,type1,INT,,,,",
",,,,,,,,,,c3,,,type1,INT,,,,");
"c1,dsp1-new,desc1,type,STRUCT,,,PII.Sensitive,",
"c1.c11,dsp11-new,desc11,type1,INT,,,PII.Sensitive,",
"c2,,,type1,INT,,,,",
"c3,,,type1,INT,,,,");
// Update created entity with changes
importCsvAndValidate(table.getFullyQualifiedName(), TableCsv.HEADERS, null, updateRecords);