Add MetadataInventoryEvent processor and API

This commit is contained in:
Yi Wang 2016-09-14 18:53:38 -07:00
parent f7878cdfe4
commit b136fc6c37
10 changed files with 466 additions and 8 deletions

View File

@ -630,7 +630,6 @@ public class DatasetInfoController extends Controller {
return ok(resultJson);
}
public static Result getDatasetSchema()
throws SQLException {
ObjectNode resultJson = Json.newObject();
@ -690,4 +689,54 @@ public class DatasetInfoController extends Controller {
}
return ok(resultJson);
}
public static Result getDatasetInventoryItems()
throws SQLException {
ObjectNode resultJson = Json.newObject();
String dataPlatform = request().getQueryString("dataPlatform");
String nativeName = request().getQueryString("nativeName");
String dataOrigin = request().getQueryString("dataOrigin");
int limit = 1;
try {
limit = Integer.parseInt(request().getQueryString("limit"));
} catch (NumberFormatException e) {
}
if (dataPlatform != null && nativeName != null && dataOrigin != null) {
try {
List<Map<String, Object>> items =
DatasetInfoDao.getDatasetInventoryItems(dataPlatform, nativeName, dataOrigin, limit);
resultJson.put("return_code", 200);
resultJson.set("dataset_inventory_items", Json.toJson(items));
} catch (EmptyResultDataAccessException e) {
Logger.debug("DataAccessException nativeName: " + nativeName + " , dataOrigin: " + dataOrigin, e);
resultJson.put("return_code", 404);
resultJson.put("error_message",
"dataset inventory for " + nativeName + " at " + dataOrigin + " cannot be found!");
}
return ok(resultJson);
}
// if no parameter, return an error message
resultJson.put("return_code", 400);
resultJson.put("error_message", "No parameter provided");
return ok(resultJson);
}
@BodyParser.Of(BodyParser.Json.class)
public static Result updateDatesetInventory() {
JsonNode root = request().body().asJson();
ObjectNode resultJson = Json.newObject();
try {
DatasetInfoDao.updateDatasetInventory(root);
resultJson.put("return_code", 200);
resultJson.put("message", "Dataset inventory updated!");
} catch (Exception e) {
e.printStackTrace();
resultJson.put("return_code", 404);
resultJson.put("error_message", e.getMessage());
}
return ok(resultJson);
}
}

View File

@ -15,9 +15,10 @@ package models.daos;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.PropertyNamingStrategy;
import java.sql.SQLException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@ -34,6 +35,7 @@ import wherehows.common.schemas.DatasetDeploymentRecord;
import wherehows.common.schemas.DatasetFieldIndexRecord;
import wherehows.common.schemas.DatasetFieldSchemaRecord;
import wherehows.common.schemas.DatasetIndexRecord;
import wherehows.common.schemas.DatasetInventoryItemRecord;
import wherehows.common.schemas.DatasetOwnerRecord;
import wherehows.common.schemas.DatasetPartitionKeyRecord;
import wherehows.common.schemas.DatasetPartitionRecord;
@ -62,6 +64,7 @@ public class DatasetInfoDao {
private static final String DATASET_INDEX_TABLE = "dataset_index";
private static final String DATASET_SCHEMA_TABLE = "dataset_schema_info";
private static final String DATASET_FIELD_DETAIL_TABLE = "dict_field_detail";
private static final String DATASET_INVENTORY_TABLE = "dataset_inventory";
private static final String EXTERNAL_USER_TABLE = "dir_external_user_info";
private static final String EXTERNAL_GROUP_TABLE = "dir_external_group_user_map";
@ -93,6 +96,8 @@ public class DatasetInfoDao {
new DatabaseWriter(JdbcUtil.wherehowsJdbcTemplate, DATASET_SCHEMA_TABLE);
private static final DatabaseWriter FIELD_DETAIL_WRITER =
new DatabaseWriter(JdbcUtil.wherehowsJdbcTemplate, DATASET_FIELD_DETAIL_TABLE);
private static final DatabaseWriter INVENTORY_WRITER =
new DatabaseWriter(JdbcUtil.wherehowsJdbcTemplate, DATASET_INVENTORY_TABLE);
public static final String GET_DATASET_DEPLOYMENT_BY_DATASET_ID =
"SELECT * FROM " + DATASET_DEPLOYMENT_TABLE + " WHERE dataset_id = :dataset_id";
@ -200,6 +205,14 @@ public class DatasetInfoDao {
public static final String UPDATE_DATASET_FIELD_PARTITIONED_BY_FIELDNAME =
"UPDATE " + DATASET_FIELD_DETAIL_TABLE + " SET is_partitioned=? WHERE dataset_id=? AND field_name=?";
public static final String GET_DATASET_INVENTORY_ITEMS =
"SELECT * FROM " + DATASET_INVENTORY_TABLE + " WHERE data_platform = :data_platform AND native_name = :native_name "
+ "AND data_origin = :data_origin ORDER BY event_date DESC LIMIT :limit";
public static final String INSERT_DATASET_INVENTORY_ITEM =
PreparedStatementUtil.prepareInsertTemplateWithColumn("REPLACE", DATASET_INVENTORY_TABLE,
DatasetInventoryItemRecord.getInventoryItemColumns());
public static List<Map<String, Object>> getDatasetDeploymentByDatasetId(int datasetId)
throws DataAccessException {
Map<String, Object> params = new HashMap<>();
@ -899,4 +912,44 @@ public class DatasetInfoDao {
params.put("dataset_id", datasetId);
return JdbcUtil.wherehowsNamedJdbcTemplate.queryForList(GET_DATASET_FIELDS_BY_DATASET_ID, params);
}
public static List<Map<String, Object>> getDatasetInventoryItems(String dataPlatform,
String nativeName, String dataOrigin, int limit)
throws DataAccessException {
Map<String, Object> params = new HashMap<>();
params.put("data_platform", dataPlatform);
params.put("native_name", nativeName);
params.put("data_origin", dataOrigin);
params.put("limit", limit);
return JdbcUtil.wherehowsNamedJdbcTemplate.queryForList(GET_DATASET_INVENTORY_ITEMS, params);
}
public static void updateDatasetInventory(JsonNode root)
throws Exception {
final JsonNode auditHeader = root.path("auditHeader");
final JsonNode dataPlatform = root.path("dataPlatformUrn");
final JsonNode datasetList = root.path("datasetList");
if (auditHeader.isMissingNode() || dataPlatform.isMissingNode() || datasetList.isMissingNode()) {
throw new IllegalArgumentException(
"Dataset inventory info update fail, " + "Json missing necessary fields: " + root.toString());
}
final Long eventTime = auditHeader.get("time").asLong();
final String eventDate = new SimpleDateFormat("yyyy-MM-dd").format(new Date(eventTime));
final String dataPlatformUrn = dataPlatform.asText();
final ObjectMapper om = new ObjectMapper();
for (JsonNode datasetItem : datasetList) {
try {
DatasetInventoryItemRecord item = om.convertValue(datasetItem, DatasetInventoryItemRecord.class);
item.setDataPlatformUrn(dataPlatformUrn);
item.setEventDate(eventDate);
INVENTORY_WRITER.execute(INSERT_DATASET_INVENTORY_ITEM, item.getInventoryItemValues());
} catch (Exception ex) {
Logger.debug("Dataset inventory item insertion error. ", ex);
}
}
}
}

View File

@ -0,0 +1,44 @@
/**
* Copyright 2015 LinkedIn Corp. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
package utils;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import models.daos.DatasetInfoDao;
import org.apache.avro.generic.GenericData;
import wherehows.common.schemas.Record;
public class MetadataInventoryProcessor {
/**
* Process a MetadataInventoryEvent record
* @param record GenericData.Record
* @param topic String
* @throws Exception
* @return null
*/
public Record process(GenericData.Record record, String topic)
throws Exception {
if (record != null) {
// Logger.info("Processing Metadata Inventory Event record. ");
final GenericData.Record auditHeader = (GenericData.Record) record.get("auditHeader");
final JsonNode rootNode = new ObjectMapper().readTree(record.toString());
DatasetInfoDao.updateDatasetInventory(rootNode);
}
return null;
}
}

View File

@ -186,3 +186,9 @@ GET /dataset/schema controllers.DatasetInfoController.getD
# Update schema of a dataset
POST /dataset/schema controllers.DatasetInfoController.updateDatesetSchema()
# Get inventory items by data platform + native name + origin + (optional) limit
GET /dataset/inventory controllers.DatasetInfoController.getDatasetInventoryItems()
# Update dataset inventory
POST /dataset/inventory controllers.DatasetInfoController.updateDatesetInventory()

View File

@ -74,12 +74,12 @@ CREATE TABLE dataset_case_sensitivity (
DEFAULT CHARSET = latin1;
CREATE TABLE dataset_reference (
`dataset_id` INT UNSIGNED NOT NULL,
`dataset_urn` VARCHAR(200) NOT NULL,
`reference_type` VARCHAR(20) NOT NULL,
`reference_format` VARCHAR(50) NOT NULL,
`dataset_id` INT UNSIGNED NOT NULL,
`dataset_urn` VARCHAR(200) NOT NULL,
`reference_type` VARCHAR(20) NOT NULL,
`reference_format` VARCHAR(50) NOT NULL,
`reference_list` TEXT CHAR SET utf8 DEFAULT NULL,
`modified_time` INT UNSIGNED DEFAULT NULL
`modified_time` INT UNSIGNED DEFAULT NULL
COMMENT 'the modified time in epoch',
PRIMARY KEY (`dataset_id`, `reference_type`, `reference_format`),
UNIQUE KEY (`dataset_urn`, `reference_type`, `reference_format`)
@ -180,3 +180,22 @@ CREATE TABLE dataset_schema_info (
)
ENGINE = InnoDB
DEFAULT CHARSET = latin1;
CREATE TABLE dataset_inventory (
`event_date` DATE NOT NULL,
`data_platform` VARCHAR(50) NOT NULL,
`native_name` VARCHAR(200) NOT NULL,
`data_origin` VARCHAR(20) NOT NULL,
`change_actor_urn` VARCHAR(200) DEFAULT NULL,
`change_type` VARCHAR(20) DEFAULT NULL,
`change_time` BIGINT UNSIGNED DEFAULT NULL,
`change_note` TEXT CHAR SET utf8 DEFAULT NULL,
`native_type` VARCHAR(20) DEFAULT NULL,
`uri` VARCHAR(200) DEFAULT NULL,
`dataset_name_case_sensitivity` BOOLEAN DEFAULT NULL,
`field_name_case_sensitivity` BOOLEAN DEFAULT NULL,
`data_content_case_sensitivity` BOOLEAN DEFAULT NULL,
PRIMARY KEY (`data_platform`, `native_name`, `data_origin`, `event_date`)
)
ENGINE = InnoDB
DEFAULT CHARSET = latin1;

View File

@ -13,7 +13,11 @@
*/
package wherehows.common.schemas;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class DatasetCaseSensitiveRecord extends AbstractRecord {
@ -38,6 +42,19 @@ public class DatasetCaseSensitiveRecord extends AbstractRecord {
public DatasetCaseSensitiveRecord() {
}
@Override
public String toString() {
try {
Map<String, Object> valueMap = new HashMap<>();
valueMap.put("datasetName", datasetName);
valueMap.put("fieldName", fieldName);
valueMap.put("dataContent", dataContent);
return new ObjectMapper().writeValueAsString(valueMap);
} catch (Exception ex) {
return null;
}
}
public void setDataset(Integer datasetId, String datasetUrn) {
this.datasetId = datasetId;
this.datasetUrn = datasetUrn;

View File

@ -0,0 +1,70 @@
/**
* Copyright 2015 LinkedIn Corp. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
package wherehows.common.schemas;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
public class DatasetChangeAuditStamp {
String actorUrn;
String type;
Long time;
String note;
public DatasetChangeAuditStamp() {
}
@Override
public String toString() {
try {
return new ObjectMapper().writeValueAsString(this);
} catch (JsonProcessingException ex) {
return null;
}
}
public String getActorUrn() {
return actorUrn;
}
public void setActorUrn(String actorUrn) {
this.actorUrn = actorUrn;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public Long getTime() {
return time;
}
public void setTime(Long time) {
this.time = time;
}
public String getNote() {
return note;
}
public void setNote(String note) {
this.note = note;
}
}

View File

@ -0,0 +1,98 @@
/**
* Copyright 2015 LinkedIn Corp. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
package wherehows.common.schemas;
import java.util.List;
public class DatasetInventoryItemRecord extends AbstractRecord {
String nativeName;
String dataOrigin;
DatasetPropertiesRecord datasetProperties;
String dataPlatformUrn;
String eventDate;
@Override
public String[] getDbColumnNames() {
return new String[]{"native_name", "data_origin", "dataset_properties", "data_platform", "event_date"};
}
@Override
public List<Object> fillAllFields() {
return null;
}
public static String[] getInventoryItemColumns() {
return new String[]{"native_name", "data_origin", "data_platform", "event_date", "change_actor_urn", "change_type",
"change_time", "change_note", "native_type", "uri", "dataset_name_case_sensitivity",
"field_name_case_sensitivity", "data_content_case_sensitivity"};
}
public Object[] getInventoryItemValues() {
if (datasetProperties == null) {
return new Object[]{nativeName, dataOrigin, dataPlatformUrn, eventDate, null, null, null, null, null, null, null,
null, null};
} else {
DatasetChangeAuditStamp changes = datasetProperties.getChangeAuditStamp();
DatasetCaseSensitiveRecord cases = datasetProperties.getCaseSensitivity();
return new Object[]{nativeName, dataOrigin, dataPlatformUrn, eventDate, changes.getActorUrn(), changes.getType(),
changes.getTime(), changes.getNote(), datasetProperties.getNativeType(), datasetProperties.getUri(),
cases.getDatasetName(), cases.getFieldName(), cases.getDataContent()};
}
}
public DatasetInventoryItemRecord() {
}
public String getNativeName() {
return nativeName;
}
public void setNativeName(String nativeName) {
this.nativeName = nativeName;
}
public String getDataOrigin() {
return dataOrigin;
}
public void setDataOrigin(String dataOrigin) {
this.dataOrigin = dataOrigin;
}
public DatasetPropertiesRecord getDatasetProperties() {
return datasetProperties;
}
public void setDatasetProperties(DatasetPropertiesRecord datasetProperties) {
this.datasetProperties = datasetProperties;
}
public String getDataPlatformUrn() {
return dataPlatformUrn;
}
public void setDataPlatformUrn(String dataPlatformUrn) {
this.dataPlatformUrn = dataPlatformUrn;
}
public String getEventDate() {
return eventDate;
}
public void setEventDate(String eventDate) {
this.eventDate = eventDate;
}
}

View File

@ -0,0 +1,88 @@
/**
* Copyright 2015 LinkedIn Corp. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
package wherehows.common.schemas;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class DatasetPropertiesRecord extends AbstractRecord {
DatasetChangeAuditStamp changeAuditStamp;
String nativeType;
String uri;
DatasetCaseSensitiveRecord caseSensitivity;
@Override
public String[] getDbColumnNames() {
return new String[]{"change_audit_stamp", "native_type", "uri", "case_sensitivity"};
}
@Override
public List<Object> fillAllFields() {
return null;
}
public DatasetPropertiesRecord() {
}
@Override
public String toString() {
try {
Map<String, String> valueMap = new HashMap<>();
valueMap.put("changeAuditStamp", changeAuditStamp.toString());
valueMap.put("nativeType", nativeType);
valueMap.put("uri", uri);
valueMap.put("caseSensitivity", caseSensitivity.toString());
return new ObjectMapper().writeValueAsString(valueMap);
} catch (JsonProcessingException ex) {
return null;
}
}
public DatasetChangeAuditStamp getChangeAuditStamp() {
return changeAuditStamp;
}
public void setChangeAuditStamp(DatasetChangeAuditStamp changeAuditStamp) {
this.changeAuditStamp = changeAuditStamp;
}
public String getNativeType() {
return nativeType;
}
public void setNativeType(String nativeType) {
this.nativeType = nativeType;
}
public String getUri() {
return uri;
}
public void setUri(String uri) {
this.uri = uri;
}
public DatasetCaseSensitiveRecord getCaseSensitivity() {
return caseSensitivity;
}
public void setCaseSensitivity(DatasetCaseSensitiveRecord caseSensitivity) {
this.caseSensitivity = caseSensitivity;
}
}

View File

@ -17,16 +17,29 @@ public class PreparedStatementUtil {
/**
* prepare SQL insert template with column names and placeholders, 'INSERT INTO table(`a`,`b`) VALUES (?,?)'
* @param tableName
* @param columnNames String[]
* @return SQL String
*/
public static String prepareInsertTemplateWithColumn(String tableName, String[] columnNames) {
return "INSERT INTO " + tableName + "(`" + String.join("`,`", columnNames) + "`) VALUES " + generatePlaceholder(
return prepareInsertTemplateWithColumn("INSERT", tableName, columnNames);
}
/**
* prepare SQL insert template with column names and placeholders, 'INSERT/REPLACE INTO table(`a`,`b`) VALUES (?,?)'
* @param action INSERT or REPLACE
* @param tableName
* @param columnNames
* @return
*/
public static String prepareInsertTemplateWithColumn(String action, String tableName, String[] columnNames) {
return action + " INTO " + tableName + "(`" + String.join("`,`", columnNames) + "`) VALUES " + generatePlaceholder(
columnNames.length);
}
/**
* prepare SQL insert template with placeholders, 'INSERT INTO table VALUES (?,?,?)'
* @param tableName
* @param columnNum int
* @return SQL String
*/
@ -36,6 +49,7 @@ public class PreparedStatementUtil {
/**
* prepare SQL update template with placeholders: "UPDATE table SET a=?, b=? WHERE c=? AND d=?"
* @param tableName
* @param columnNames String[] fields to be assigned/updated
* @param conditions String[] condition fields
* @return