460 lines
16 KiB
Java
Raw Normal View History

2015-11-19 14:39:21 -08:00
/**
* Copyright 2015 LinkedIn Corp. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
package models.daos;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.PropertyNamingStrategy;
import com.fasterxml.jackson.databind.node.ObjectNode;
2015-11-19 14:39:21 -08:00
import java.sql.SQLException;
2016-06-14 16:17:24 -07:00
import java.util.ArrayList;
2015-11-19 14:39:21 -08:00
import java.util.HashMap;
2016-06-14 16:17:24 -07:00
import java.util.List;
2015-11-19 14:39:21 -08:00
import java.util.Map;
2016-06-14 16:17:24 -07:00
import org.apache.commons.lang3.StringUtils;
import org.springframework.dao.EmptyResultDataAccessException;
2016-06-14 16:17:24 -07:00
import play.libs.Json;
2015-11-19 14:39:21 -08:00
import utils.JdbcUtil;
2016-06-14 16:17:24 -07:00
import wherehows.common.schemas.DatasetDependencyRecord;
2015-11-19 14:39:21 -08:00
import wherehows.common.schemas.DatasetRecord;
import wherehows.common.utils.PartitionPatternMatcher;
import wherehows.common.writers.DatabaseWriter;
/**
* Created by zsun on 3/18/15.
* Modified by zechen on 10/12/15.
*/
public class DatasetDao {
public static final String GET_DATASET_BY_ID = "SELECT * FROM dict_dataset WHERE id = :id";
public static final String GET_DATASET_BY_URN = "SELECT * FROM dict_dataset WHERE urn = :urn";
2016-06-14 16:17:24 -07:00
public static final String DEFAULT_CLUSTER_NAME = "ltx1-holdem";
public static final String CLUSTER_NAME_KEY = "cluster_name";
public static final String DATASET_URI_KEY = "dataset_uri";
public static final String HIVE_PREFIX_WITH_3_SLASH = "hive:///";
public static final String DALIDS_PREFIX_WITH_3_SLASH = "dalids:///";
public static final String HIVE_PREFIX_WITH_2_SLASH = "hive://";
public static final String DALIDS_PREFIX_WITH_2_SLASH = "dalids://";
public static final String GET_DATASET_ID_IN_MAP_TABLE_WITH_TYPE_AND_CLUSTER = "SELECT " +
"c.object_dataset_id as dataset_id, d.urn, d.dataset_type, " +
"i.deployment_tier, i.data_center, i.server_cluster " +
"FROM cfg_object_name_map c JOIN dict_dataset d ON c.object_dataset_id = d.id " +
"LEFT JOIN dict_dataset_instance i ON c.object_dataset_id = i.dataset_id " +
"WHERE c.object_dataset_id is not null and lower(c.object_name) = ? " +
"and lower(c.object_type) = ? and lower(i.server_cluster) = ?";
public static final String GET_DATASET_ID_IN_MAP_TABLE_WITH_CLUSTER = "SELECT c.object_dataset_id as dataset_id, " +
"d.urn, d.dataset_type, i.deployment_tier, i.data_center, i.server_cluster " +
"FROM cfg_object_name_map c JOIN dict_dataset d ON c.object_dataset_id = d.id " +
"LEFT JOIN dict_dataset_instance i ON c.object_dataset_id = i.dataset_id " +
"WHERE c.object_dataset_id is not null and lower(c.object_name) = ? and lower(i.server_cluster) = ?";
private final static String GET_DATASET_DEPENDS_VIEW = "SELECT object_type, object_sub_type, " +
"object_name, map_phrase, is_identical_map, mapped_object_dataset_id, " +
"mapped_object_type, mapped_object_sub_type, mapped_object_name " +
"FROM cfg_object_name_map WHERE object_dataset_id = ?";
2015-11-19 14:39:21 -08:00
public final static String GET_DATASET_URN_PROPERTIES_LIKE_EXPR = "select urn from dict_dataset where properties like :properties";
public static final String GET_DATASET_DEPENDENTS_IN_OBJ_MAP_TABLE_BY_ID = "SELECT " +
"c.object_dataset_id as dataset_id, d.urn, d.dataset_type, c.object_sub_type " +
"FROM cfg_object_name_map c JOIN dict_dataset d ON c.object_dataset_id = d.id " +
"WHERE c.mapped_object_dataset_id = ?";
public static final String GET_DATASET_DEPENDENTS_IN_OBJ_MAP_TABLE_BY_NAME = "SELECT " +
"c.object_dataset_id as dataset_id, d.urn, d.dataset_type, c.object_sub_type " +
"FROM cfg_object_name_map c JOIN dict_dataset d ON c.object_dataset_id = d.id " +
"WHERE c.mapped_object_type = ? and " +
"(c.mapped_object_name = ? or c.mapped_object_name like ?)";
2015-11-19 14:39:21 -08:00
public static Map<String, Object> getDatasetById(int datasetId)
throws SQLException {
Map<String, Object> params = new HashMap<>();
params.put("id", datasetId);
return JdbcUtil.wherehowsNamedJdbcTemplate.queryForMap(GET_DATASET_BY_ID, params);
}
public static Map<String, Object> getDatasetByUrn(String urn)
throws SQLException {
Map<String, Object> params = new HashMap<>();
params.put("urn", urn);
return JdbcUtil.wherehowsNamedJdbcTemplate.queryForMap(GET_DATASET_BY_URN, params);
}
public static void insertDataset(JsonNode dataset)
throws Exception {
ObjectMapper om = new ObjectMapper();
om.setPropertyNamingStrategy(PropertyNamingStrategy.CAMEL_CASE_TO_LOWER_CASE_WITH_UNDERSCORES);
DatasetRecord record = om.convertValue(dataset, DatasetRecord.class);
2016-03-18 15:26:55 -07:00
if (record.getRefDatasetUrn() != null) {
Map<String, Object> refDataset = getDatasetByUrn(record.getRefDatasetUrn());
// Find ref dataset id
if (refDataset != null) {
record.setRefDatasetId(((Long) refDataset.get("id")).intValue());
}
2015-11-19 14:39:21 -08:00
}
2016-03-18 15:26:55 -07:00
2015-11-19 14:39:21 -08:00
// Find layout id
if (record.getSamplePartitionFullPath() != null) {
PartitionPatternMatcher ppm = new PartitionPatternMatcher(PartitionLayoutDao.getPartitionLayouts());
record.setPartitionLayoutPatternId(ppm.analyze(record.getSamplePartitionFullPath()));
}
DatabaseWriter dw = new DatabaseWriter(JdbcUtil.wherehowsJdbcTemplate, "dict_dataset");
dw.append(record);
dw.close();
}
2016-06-14 16:17:24 -07:00
public static void setDatasetRecord (JsonNode dataset)
throws Exception {
ObjectMapper om = new ObjectMapper();
om.setPropertyNamingStrategy(PropertyNamingStrategy.CAMEL_CASE_TO_LOWER_CASE_WITH_UNDERSCORES);
DatasetRecord record = om.convertValue(dataset, DatasetRecord.class);
if (record != null) {
Map<String, Object> params = new HashMap<>();
params.put("urn", record.getUrn());
try {
Map<String, Object> result = JdbcUtil.wherehowsNamedJdbcTemplate.queryForMap(GET_DATASET_BY_URN, params);
updateDataset(dataset);
} catch (EmptyResultDataAccessException e) {
insertDataset(dataset);
}
}
}
public static void updateDataset(JsonNode dataset)
throws Exception {
ObjectMapper om = new ObjectMapper();
om.setPropertyNamingStrategy(PropertyNamingStrategy.CAMEL_CASE_TO_LOWER_CASE_WITH_UNDERSCORES);
DatasetRecord record = om.convertValue(dataset, DatasetRecord.class);
if (record.getRefDatasetUrn() != null) {
Map<String, Object> refDataset = getDatasetByUrn(record.getRefDatasetUrn());
// Find ref dataset id
if (refDataset != null) {
record.setRefDatasetId(((Long) refDataset.get("id")).intValue());
}
}
// Find layout id
if (record.getSamplePartitionFullPath() != null) {
PartitionPatternMatcher ppm = new PartitionPatternMatcher(PartitionLayoutDao.getPartitionLayouts());
record.setPartitionLayoutPatternId(ppm.analyze(record.getSamplePartitionFullPath()));
}
DatabaseWriter dw = new DatabaseWriter(JdbcUtil.wherehowsJdbcTemplate, "dict_dataset");
dw.update(record.toUpdateDatabaseValue(), record.getUrn());
dw.close();
}
2016-06-14 16:17:24 -07:00
public static int getDatasetDependencies(
Long datasetId,
String topologySortId,
int level,
List<DatasetDependencyRecord> depends)
{
if (depends == null)
{
depends = new ArrayList<DatasetDependencyRecord>();
}
List<Map<String, Object>> rows = null;
rows = JdbcUtil.wherehowsJdbcTemplate.queryForList(
GET_DATASET_DEPENDS_VIEW,
datasetId);
int index = 1;
if (rows != null)
{
for (Map row : rows) {
DatasetDependencyRecord datasetDependencyRecord = new DatasetDependencyRecord();
datasetDependencyRecord.dataset_id = (Long) row.get("mapped_object_dataset_id");
String objectName = (String) row.get("mapped_object_name");
if (StringUtils.isNotBlank(objectName))
{
String[] info = objectName.split("/");
if (info != null && info.length == 3)
{
datasetDependencyRecord.database_name = info[1];
datasetDependencyRecord.table_name = info[2];
}
}
datasetDependencyRecord.level_from_root = level;
datasetDependencyRecord.type = (String) row.get("mapped_object_sub_type");
datasetDependencyRecord.ref_obj_location = objectName;
datasetDependencyRecord.ref_obj_type = (String) row.get("mapped_object_type");
datasetDependencyRecord.topology_sort_id = topologySortId + Integer.toString((index++)*100);
datasetDependencyRecord.next_level_dependency_count =
getDatasetDependencies(datasetDependencyRecord.dataset_id,
datasetDependencyRecord.topology_sort_id,
level + 1,
depends);
depends.add(datasetDependencyRecord);
}
return rows.size();
}
else
{
return 0;
}
}
public static ObjectNode getDatasetDependency(JsonNode input)
throws Exception {
ObjectNode resultJson = Json.newObject();
String cluster = DEFAULT_CLUSTER_NAME;
String datasetUri = null;
String dbName = null;
String tableName = null;
boolean isHive = false;
boolean isDalids = false;
if (input != null && input.isContainerNode())
{
if (input.has(CLUSTER_NAME_KEY))
{
cluster = input.get(CLUSTER_NAME_KEY).asText();
}
if (input.has(DATASET_URI_KEY))
{
datasetUri = input.get(DATASET_URI_KEY).asText();
}
}
if (StringUtils.isBlank(datasetUri))
{
resultJson.put("return_code", 404);
resultJson.put("message", "Wrong input format! Missing dataset uri");
return resultJson;
}
Integer index = -1;
if ((index = datasetUri.indexOf(HIVE_PREFIX_WITH_3_SLASH)) != -1)
{
isHive = true;
String tmp = datasetUri.substring(index + HIVE_PREFIX_WITH_3_SLASH.length());
String[] info = tmp.split("\\.|/");
if (info != null && info.length == 2)
{
dbName = info[0];
tableName = info[1];
}
}
else if ((index = datasetUri.indexOf(DALIDS_PREFIX_WITH_3_SLASH)) != -1)
{
isDalids = true;
String tmp = datasetUri.substring(index + DALIDS_PREFIX_WITH_3_SLASH.length());
String[] info = tmp.split("\\.|/");
if (info != null && info.length == 2)
{
dbName = info[0];
tableName = info[1];
}
}
else if ((index = datasetUri.indexOf(HIVE_PREFIX_WITH_2_SLASH)) != -1)
{
isHive = true;
String tmp = datasetUri.substring(index + HIVE_PREFIX_WITH_2_SLASH.length());
String[] info = tmp.split("\\.|/");
if (info != null && info.length == 3)
{
cluster = info[0];
dbName = info[1];
tableName = info[2];
}
}
else if ((index = datasetUri.indexOf(DALIDS_PREFIX_WITH_2_SLASH)) != -1)
{
isDalids = true;
String tmp = datasetUri.substring(index + DALIDS_PREFIX_WITH_2_SLASH.length());
String[] info = tmp.split("\\.|/");
if (info != null && info.length == 3)
{
cluster = info[0];
dbName = info[1];
tableName = info[2];
}
}
else if (datasetUri.indexOf('.') != -1)
{
index = datasetUri.indexOf(':');
String tmp = datasetUri;
if (index != -1)
{
cluster = datasetUri.substring(0, index);
tmp = datasetUri.substring(index + 1);
}
String[] info = tmp.split("\\.|/");
if (info != null && info.length == 2)
{
dbName = info[0];
tableName = info[1];
}
}
if (StringUtils.isBlank(cluster) || StringUtils.isBlank(dbName) || StringUtils.isBlank(tableName))
{
resultJson.put("return_code", 404);
resultJson.put("message", "Wrong input format! Missing dataset uri");
return resultJson;
}
String sqlQuery = null;
List<Map<String, Object>> rows = null;
if (isHive)
{
rows = JdbcUtil.wherehowsJdbcTemplate.queryForList(
2016-06-14 16:17:24 -07:00
GET_DATASET_ID_IN_MAP_TABLE_WITH_TYPE_AND_CLUSTER,
"/" + dbName + "/" + tableName,
"hive",
cluster);
2016-06-14 16:17:24 -07:00
}
else if (isDalids)
{
rows = JdbcUtil.wherehowsJdbcTemplate.queryForList(
2016-06-14 16:17:24 -07:00
GET_DATASET_ID_IN_MAP_TABLE_WITH_TYPE_AND_CLUSTER,
"/" + dbName + "/" + tableName,
"dalids",
cluster);
}
else
{
rows = JdbcUtil.wherehowsJdbcTemplate.queryForList(
2016-06-14 16:17:24 -07:00
GET_DATASET_ID_IN_MAP_TABLE_WITH_CLUSTER,
"/" + dbName + "/" + tableName, cluster);
}
Long datasetId = 0L;
String urn = null;
String datasetType = null;
String deploymentTier = null;
String dataCenter = null;
String serverCluster = null;
if (rows != null && rows.size() > 0) {
for (Map row : rows) {
datasetId = (Long) row.get("dataset_id");
urn = (String) row.get("urn");
datasetType = (String) row.get("dataset_type");
if (datasetType.equalsIgnoreCase("hive"))
{
isHive = true;
}
else if (datasetType.equalsIgnoreCase("dalids"))
{
isDalids = true;
}
2016-06-14 16:17:24 -07:00
deploymentTier = (String) row.get("deployment_tier");
dataCenter = (String) row.get("data_center");
serverCluster = (String) row.get("server_cluster");
break;
}
}
else {
resultJson.put("return_code", 200);
resultJson.put("message", "Dependency information is not available.");
return resultJson;
}
List<DatasetDependencyRecord> depends = new ArrayList<DatasetDependencyRecord>();
getDatasetDependencies(datasetId, "", 1, depends);
int leafLevelDependencyCount = 0;
if (depends.size() > 0)
{
for(DatasetDependencyRecord d : depends)
{
if (d.next_level_dependency_count == 0)
{
leafLevelDependencyCount++;
}
}
}
2016-06-14 16:17:24 -07:00
StringBuilder inputUri = new StringBuilder("");
if (isHive)
{
inputUri.append("hive://");
}
else if (isDalids)
{
inputUri.append("dalids://");
}
inputUri.append(cluster + "/" + dbName + "/" + tableName);
2016-06-14 16:17:24 -07:00
resultJson.put("return_code", 200);
resultJson.put("deployment_tier", deploymentTier);
resultJson.put("data_center", dataCenter);
resultJson.put("cluster", StringUtils.isNotBlank(serverCluster) ? serverCluster: cluster);
resultJson.put("dataset_type", datasetType);
resultJson.put("database_name", dbName);
resultJson.put("table_name", tableName);
resultJson.put("urn", urn);
resultJson.put("dataset_id", datasetId);
resultJson.put("input_uri", inputUri.toString());
resultJson.set("dependencies", Json.toJson(depends));
resultJson.put("leaf_level_dependency_count", leafLevelDependencyCount);
2016-06-14 16:17:24 -07:00
return resultJson;
}
//
public static ObjectNode getDatasetUrnForPropertiesLike(String properties) {
ObjectNode result = Json.newObject();
List<String> datasetUrns = new ArrayList<String>();
if (StringUtils.isNotBlank(properties)) {
Map<String, Object> params = new HashMap<>();
params.put("properties", properties);
List<Map<String, Object>> rows = null;
rows = JdbcUtil.wherehowsNamedJdbcTemplate.queryForList(GET_DATASET_URN_PROPERTIES_LIKE_EXPR, params);
for (Map row : rows) {
String datasetUrn = (String) row.get("urn");
datasetUrns.add(datasetUrn);
}
result.put("count", datasetUrns.size());
result.set("urns", Json.toJson(datasetUrns));
}
return result;
}
//
public static List<Map<String, Object>> getDatasetDependents(Long datasetId) {
if (datasetId > 0) {
return JdbcUtil.wherehowsJdbcTemplate.queryForList(GET_DATASET_DEPENDENTS_IN_OBJ_MAP_TABLE_BY_ID, datasetId);
}
return null;
}
//
public static List<Map<String, Object>> getDatasetDependents(String dataPlatform, String path) {
if (path.length() > 0 && dataPlatform.length() > 0) {
String child_path = path + "/%";
return JdbcUtil.wherehowsJdbcTemplate.queryForList(
GET_DATASET_DEPENDENTS_IN_OBJ_MAP_TABLE_BY_NAME,
dataPlatform,
path,
child_path);
}
return null;
}
2015-11-19 14:39:21 -08:00
}