feat(data-platforms): adding rest resource for /dataPlatforms and mid-tier support (#1817)

* feat(data-platforms): Adding rest resource for /dataPlatforms and mid-tier support

* Removed data platforms which are Linkedin internal
This commit is contained in:
Kerem Sahin 2020-08-20 12:55:30 -07:00 committed by GitHub
parent b35a1b329b
commit 57f81d488d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 483 additions and 0 deletions

View File

@ -1,5 +1,6 @@
package com.linkedin.datahub.dao;
import com.linkedin.datahub.dao.table.DataPlatformsDao;
import com.linkedin.datahub.dao.table.DatasetOwnerDao;
import com.linkedin.datahub.dao.table.DatasetsDao;
import com.linkedin.datahub.dao.table.GmsDao;
@ -26,6 +27,7 @@ public class DaoFactory {
private static DatasetOwnerDao datasetOwnerDao;
private static DatasetsDao datasetsDao;
private static LineageDao lineageDao;
private static DataPlatformsDao dataPlatformsDao;
private DaoFactory() {
}
@ -101,4 +103,11 @@ public class DaoFactory {
}
return lineageDao;
}
public static DataPlatformsDao getDataPlatformsDao() {
if (dataPlatformsDao == null) {
dataPlatformsDao = new DataPlatformsDao(getGmsDao().get_dataPlatforms());
}
return dataPlatformsDao;
}
}

View File

@ -0,0 +1,25 @@
package com.linkedin.datahub.dao.table;
import com.linkedin.dataplatform.DataPlatformInfo;
import com.linkedin.dataplatform.client.DataPlatforms;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import javax.annotation.Nonnull;
public class DataPlatformsDao {
private final DataPlatforms _dataPlatforms;
public DataPlatformsDao(@Nonnull DataPlatforms dataPlatforms) {
_dataPlatforms = dataPlatforms;
}
/**
* Get all data platforms
*/
public List<Map<String, Object>> getAllPlatforms() throws Exception {
return _dataPlatforms.getAllPlatforms().stream().map(DataPlatformInfo::data).collect(Collectors.toList());
}
}

View File

@ -1,5 +1,6 @@
package com.linkedin.datahub.dao.table;
import com.linkedin.dataplatform.client.DataPlatforms;
import com.linkedin.dataset.client.Datasets;
import com.linkedin.dataset.client.Deprecations;
import com.linkedin.dataset.client.InstitutionalMemory;
@ -23,6 +24,7 @@ public class GmsDao {
private final Deprecations _deprecations;
private final Schemas _schemas;
private final Lineages _lineages;
private final DataPlatforms _dataPlatforms;
public GmsDao(@Nonnull Client restClient) {
_corpUsers = new CorpUsers(restClient);
@ -32,6 +34,7 @@ public class GmsDao {
_deprecations = new Deprecations(restClient);
_schemas = new Schemas(restClient);
_lineages = new Lineages(restClient);
_dataPlatforms = new DataPlatforms(restClient);
}
public GmsDao(@Nonnull String restliHostName, @Nonnull int restliHostPort) {

View File

@ -6,6 +6,7 @@ import com.linkedin.common.AuditStamp;
import com.linkedin.common.InstitutionalMemory;
import com.linkedin.common.urn.CorpuserUrn;
import com.linkedin.datahub.dao.DaoFactory;
import com.linkedin.datahub.dao.table.DataPlatformsDao;
import com.linkedin.datahub.dao.table.DatasetOwnerDao;
import com.linkedin.datahub.dao.table.LineageDao;
import com.linkedin.datahub.dao.view.DatasetViewDao;
@ -30,6 +31,7 @@ public class Dataset extends Controller {
private final OwnerViewDao _ownerViewDao;
private final DatasetOwnerDao _datasetOwnerDao;
private final LineageDao _lineageDao;
private final DataPlatformsDao _dataPlatformsDao;
private static final JsonNode EMPTY_RESPONSE = Json.newObject();
@ -38,6 +40,7 @@ public class Dataset extends Controller {
_ownerViewDao = DaoFactory.getOwnerViewDao();
_datasetOwnerDao = DaoFactory.getDatasetOwnerDao();
_lineageDao = DaoFactory.getLineageDao();
_dataPlatformsDao = DaoFactory.getDataPlatformsDao();
}
@Security.Authenticated(Secured.class)
@ -264,4 +267,15 @@ public class Dataset extends Controller {
}
return ok(Json.toJson(downstreams));
}
@Security.Authenticated(Secured.class)
@Nonnull
public Result getDataPlatforms() {
try {
return ok(ControllerUtil.jsonNode("platforms", _dataPlatformsDao.getAllPlatforms()));
} catch (final Exception e) {
Logger.error("Fail to get data platforms", e);
return notFound(ControllerUtil.errorResponse(e));
}
}
}

View File

@ -1,8 +1,12 @@
package utils;
import com.fasterxml.jackson.databind.JsonNode;
import com.linkedin.data.template.RecordTemplate;
import com.linkedin.datahub.util.RestliUtil;
import com.linkedin.restli.client.RestLiResponseException;
import java.io.IOException;
import javax.annotation.Nonnull;
import play.Logger;
import play.libs.Json;
@ -29,4 +33,26 @@ public class ControllerUtil {
return (e instanceof RestLiResponseException) && (((RestLiResponseException) e).getStatus() == statusCode);
}
/**
* Creates a generic jsonNode out of a key and value which is a very common use case
* @param key
* @param value
* @return
*/
@Nonnull
public static JsonNode jsonNode(@Nonnull final String key, @Nonnull final Object value) {
JsonNode node = null;
if (value instanceof RecordTemplate) {
try {
node = RestliUtil.toJsonNode((RecordTemplate) value);
} catch (final IOException e) {
Logger.error("Could not create a json", e);
}
} else if (value instanceof JsonNode) {
node = (JsonNode) value;
} else {
node = Json.toJson(value);
}
return Json.newObject().set(key, node);
}
}

View File

@ -35,6 +35,7 @@ GET /api/v2/datasets/:urn/owners/suggestion co
GET /api/v2/datasets/:urn/schema controllers.api.v2.Dataset.getDatasetSchema(urn: String)
GET /api/v2/datasets/:urn/snapshot controllers.api.v2.Dataset.getDatasetSnapshot(urn: String)
GET /api/v2/datasets/:urn/upstreams controllers.api.v2.Dataset.getDatasetUpstreams(urn: String)
GET /api/v2/list/platforms controllers.api.v2.Dataset.getDataPlatforms
GET /api/v2/search controllers.api.v2.Search.search()
GET /api/*path controllers.Application.apiNotFound(path)

View File

@ -0,0 +1,23 @@
{
"name" : "dataPlatforms",
"namespace" : "com.linkedin.dataplatform",
"path" : "/dataPlatforms",
"schema" : "com.linkedin.dataplatform.DataPlatformInfo",
"doc" : "Resource provides information about various data platforms.\n\ngenerated from: com.linkedin.metadata.resources.dataplatform.DataPlatforms",
"collection" : {
"identifier" : {
"name" : "platformName",
"type" : "string"
},
"supports" : [ "get", "get_all" ],
"methods" : [ {
"method" : "get"
}, {
"method" : "get_all",
"pagingSupported" : true
} ],
"entity" : {
"path" : "/dataPlatforms/{platformName}"
}
}
}

View File

@ -0,0 +1,65 @@
{
"models" : [ {
"type" : "record",
"name" : "DataPlatformInfo",
"namespace" : "com.linkedin.dataplatform",
"doc" : "Information about a data platform",
"fields" : [ {
"name" : "name",
"type" : "string",
"doc" : "Name of the data platform",
"validate" : {
"strlen" : {
"max" : 15
}
}
}, {
"name" : "type",
"type" : {
"type" : "enum",
"name" : "PlatformType",
"doc" : "Platform types available at LinkedIn",
"symbols" : [ "FILE_SYSTEM", "KEY_VALUE_STORE", "MESSAGE_BROKER", "OBJECT_STORE", "OLAP_DATASTORE", "OTHERS", "QUERY_ENGINE", "RELATIONAL_DB", "SEARCH_ENGINE" ],
"symbolDocs" : {
"FILE_SYSTEM" : "Value for a file system, e.g. hdfs",
"KEY_VALUE_STORE" : "Value for a key value store, e.g. espresso, voldemort",
"MESSAGE_BROKER" : "Value for a message broker, e.g. kafka",
"OBJECT_STORE" : "Value for an object store, e.g. ambry",
"OLAP_DATASTORE" : "Value for an OLAP datastore, e.g. pinot",
"OTHERS" : "Value for other platforms, e.g salesforce, dovetail",
"QUERY_ENGINE" : "Value for a query engine, e.g. presto",
"RELATIONAL_DB" : "Value for a relational database, e.g. oracle, mysql",
"SEARCH_ENGINE" : "Value for a search engine, e.g seas"
}
},
"doc" : "Platform type this data platform describes"
}, {
"name" : "datasetNameDelimiter",
"type" : "string",
"doc" : "The delimiter in the dataset names on the data platform, e.g. '/' for HDFS and '.' for Oracle"
} ]
}, "com.linkedin.dataplatform.PlatformType" ],
"schema" : {
"name" : "dataPlatforms",
"namespace" : "com.linkedin.dataplatform",
"path" : "/dataPlatforms",
"schema" : "com.linkedin.dataplatform.DataPlatformInfo",
"doc" : "Resource provides information about various data platforms.\n\ngenerated from: com.linkedin.metadata.resources.dataplatform.DataPlatforms",
"collection" : {
"identifier" : {
"name" : "platformName",
"type" : "string"
},
"supports" : [ "get", "get_all" ],
"methods" : [ {
"method" : "get"
}, {
"method" : "get_all",
"pagingSupported" : true
} ],
"entity" : {
"path" : "/dataPlatforms/{platformName}"
}
}
}
}

View File

@ -0,0 +1,44 @@
package com.linkedin.dataplatform.client;
import com.linkedin.metadata.restli.BaseClient;
import com.linkedin.dataplatform.DataPlatformInfo;
import com.linkedin.dataplatform.DataPlatformsRequestBuilders;
import com.linkedin.r2.RemoteInvocationException;
import com.linkedin.restli.client.Client;
import com.linkedin.restli.client.GetAllRequest;
import com.linkedin.restli.client.Request;
import java.util.List;
import javax.annotation.Nonnull;
public class DataPlatforms extends BaseClient {
private static final DataPlatformsRequestBuilders PLATFORMS_REQUEST_BUILDERS = new DataPlatformsRequestBuilders();
public DataPlatforms(@Nonnull Client restliClient) {
super(restliClient);
}
/**
* Get data platform details by name
* @param platformName String
* @return DataPlatformInfo
* @throws RemoteInvocationException
*/
@Nonnull
public DataPlatformInfo getPlatformByName(@Nonnull String platformName) throws RemoteInvocationException {
Request<DataPlatformInfo> req = PLATFORMS_REQUEST_BUILDERS.get().id(platformName).build();
return _client.sendRequest(req).getResponse().getEntity();
}
/**
* Get all data platforms
* @return List<DataPlatformInfo>
* @throws RemoteInvocationException
*/
@Nonnull
public List<DataPlatformInfo> getAllPlatforms() throws RemoteInvocationException {
GetAllRequest<DataPlatformInfo> req = PLATFORMS_REQUEST_BUILDERS.getAll().build();
return _client.sendRequest(req).getResponse().getEntity().getElements();
}
}

View File

@ -0,0 +1,21 @@
package com.linkedin.dataplatform.factory;
import com.linkedin.metadata.aspect.DataPlatformAspect;
import com.linkedin.metadata.dao.ImmutableLocalDAO;
import com.linkedin.metadata.resources.dataplatform.utils.DataPlatformsUtil;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.ApplicationContext;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@Configuration
public class DataPlatformLocalDaoFactory {
@Autowired
ApplicationContext applicationContext;
@Bean(name = "dataPlatformLocalDAO")
protected ImmutableLocalDAO createInstance() {
return new ImmutableLocalDAO<>(DataPlatformAspect.class, DataPlatformsUtil.getDataPlatformInfoMap());
}
}

View File

@ -0,0 +1,40 @@
package com.linkedin.metadata.resources.dataplatform;
import com.linkedin.common.urn.DataPlatformUrn;
import com.linkedin.dataplatform.DataPlatformInfo;
import com.linkedin.metadata.aspect.DataPlatformAspect;
import com.linkedin.metadata.dao.ImmutableLocalDAO;
import com.linkedin.metadata.restli.RestliUtils;
import com.linkedin.parseq.Task;
import com.linkedin.restli.server.PagingContext;
import com.linkedin.restli.server.annotations.PagingContextParam;
import com.linkedin.restli.server.annotations.RestLiCollection;
import com.linkedin.restli.server.annotations.RestMethod;
import com.linkedin.restli.server.resources.CollectionResourceAsyncTemplate;
import java.util.List;
import javax.annotation.Nonnull;
import javax.inject.Inject;
import javax.inject.Named;
/**
* Resource provides information about various data platforms.
*/
@RestLiCollection(name = "dataPlatforms", namespace = "com.linkedin.dataplatform", keyName = "platformName")
public class DataPlatforms extends CollectionResourceAsyncTemplate<String, DataPlatformInfo> {
@Inject
@Named("dataPlatformLocalDAO")
private ImmutableLocalDAO<DataPlatformAspect, DataPlatformUrn> _localDAO;
@RestMethod.Get
public Task<DataPlatformInfo> get(@Nonnull final String name) {
return RestliUtils.toTaskFromOptional(() -> _localDAO.get(DataPlatformInfo.class, new DataPlatformUrn(name)));
}
@RestMethod.GetAll
public Task<List<DataPlatformInfo>> getAll(@Nonnull @PagingContextParam(defaultCount = 100) PagingContext pagingContext) {
return Task.value(_localDAO.list(DataPlatformInfo.class, pagingContext.getStart(), pagingContext.getCount()).getValues());
}
}

View File

@ -0,0 +1,53 @@
package com.linkedin.metadata.resources.dataplatform.utils;
import com.linkedin.common.urn.DataPlatformUrn;
import com.linkedin.dataplatform.DataPlatformInfo;
import com.linkedin.metadata.dao.ImmutableLocalDAO;
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.Map;
import java.util.Optional;
import javax.annotation.Nonnull;
import org.json.simple.parser.ParseException;
public class DataPlatformsUtil {
private DataPlatformsUtil() {
}
private static final Map<DataPlatformUrn, DataPlatformInfo> DATA_PLATFORM_INFO_MAP =
loadAspectsFromResource("DataPlatformInfo.json");
public static Map<DataPlatformUrn, DataPlatformInfo> getDataPlatformInfoMap() {
return DATA_PLATFORM_INFO_MAP;
}
private static Map<DataPlatformUrn, DataPlatformInfo> loadAspectsFromResource(
@Nonnull final String resource) {
try {
return ImmutableLocalDAO.loadAspects(DataPlatformInfo.class,
DataPlatformsUtil.class.getClassLoader().getResourceAsStream(resource));
} catch (ParseException | IOException | URISyntaxException e) {
throw new RuntimeException(e);
}
}
public static boolean isValidDataPlatform(String platformName) {
return DATA_PLATFORM_INFO_MAP.containsKey(toDataPlatformUrn(platformName));
}
@Nonnull
public static Optional<DataPlatformInfo> get(@Nonnull String platformName) {
return Optional.ofNullable(DATA_PLATFORM_INFO_MAP.get(toDataPlatformUrn(platformName)));
}
@Nonnull
public static Optional<String> getPlatformDelimiter(@Nonnull String platformName) {
return get(platformName).map(DataPlatformInfo::getDatasetNameDelimiter);
}
private static DataPlatformUrn toDataPlatformUrn(@Nonnull String platformName) {
return new DataPlatformUrn(platformName);
}
}

View File

@ -0,0 +1,82 @@
{
"urn:li:dataPlatform:adlsGen1": {
"datasetNameDelimiter": "/",
"name": "adlsGen1",
"type": "FILE_SYSTEM"
},
"urn:li:dataPlatform:adlsGen2": {
"datasetNameDelimiter": "/",
"name": "adlsGen2",
"type": "FILE_SYSTEM"
},
"urn:li:dataPlatform:ambry": {
"datasetNameDelimiter": ".",
"name": "ambry",
"type": "OBJECT_STORE"
},
"urn:li:dataPlatform:couchbase": {
"datasetNameDelimiter": ".",
"name": "couchbase",
"type": "KEY_VALUE_STORE"
},
"urn:li:dataPlatform:external": {
"datasetNameDelimiter": ".",
"name": "external",
"type": "OTHERS"
},
"urn:li:dataPlatform:hdfs": {
"datasetNameDelimiter": "/",
"name": "hdfs",
"type": "FILE_SYSTEM"
},
"urn:li:dataPlatform:hive": {
"datasetNameDelimiter": ".",
"name": "hive",
"type": "FILE_SYSTEM"
},
"urn:li:dataPlatform:kafka": {
"datasetNameDelimiter": ".",
"name": "kafka",
"type": "MESSAGE_BROKER"
},
"urn:li:dataPlatform:kusto": {
"datasetNameDelimiter": ".",
"name": "kusto",
"type": "OLAP_DATASTORE"
},
"urn:li:dataPlatform:mongo": {
"datasetNameDelimiter": ".",
"name": "mongo",
"type": "KEY_VALUE_STORE"
},
"urn:li:dataPlatform:mysql": {
"datasetNameDelimiter": ".",
"name": "mysql",
"type": "RELATIONAL_DB"
},
"urn:li:dataPlatform:oracle": {
"datasetNameDelimiter": ".",
"name": "oracle",
"type": "RELATIONAL_DB"
},
"urn:li:dataPlatform:pinot": {
"datasetNameDelimiter": ".",
"name": "pinot",
"type": "OLAP_DATASTORE"
},
"urn:li:dataPlatform:presto": {
"datasetNameDelimiter": ".",
"name": "presto",
"type": "QUERY_ENGINE"
},
"urn:li:dataPlatform:teradata": {
"datasetNameDelimiter": ".",
"name": "teradata",
"type": "RELATIONAL_DB"
},
"urn:li:dataPlatform:voldemort": {
"datasetNameDelimiter": ".",
"name": "voldemort",
"type": "KEY_VALUE_STORE"
}
}

View File

@ -0,0 +1,77 @@
package com.linkedin.metadata.resources.dataplatform.utils;
import com.linkedin.dataplatform.DataPlatformInfo;
import com.linkedin.dataplatform.PlatformType;
import org.testng.annotations.Test;
import static org.testng.Assert.*;
public class DataPlatformsUtilTest {
@Test
public void testGet() {
String platformName = "hdfs";
DataPlatformInfo platform = DataPlatformsUtil.get(platformName).orElse(null);
assertNotNull(platform, platformName);
assertEquals(platform.getName(), platformName, platformName);
assertEquals(platform.getType(), PlatformType.FILE_SYSTEM, platformName);
platformName = "fake";
platform = DataPlatformsUtil.get(platformName).orElse(null);
assertNull(platform, platformName);
}
@Test
public void testGetPlatformType() {
assertPlatformType("ambry", PlatformType.OBJECT_STORE);
assertPlatformType("couchbase", PlatformType.KEY_VALUE_STORE);
assertPlatformType("external", PlatformType.OTHERS);
assertPlatformType("hdfs", PlatformType.FILE_SYSTEM);
assertPlatformType("hive", PlatformType.FILE_SYSTEM);
assertPlatformType("kafka", PlatformType.MESSAGE_BROKER);
assertPlatformType("mongo", PlatformType.KEY_VALUE_STORE);
assertPlatformType("mysql", PlatformType.RELATIONAL_DB);
assertPlatformType("oracle", PlatformType.RELATIONAL_DB);
assertPlatformType("pinot", PlatformType.OLAP_DATASTORE);
assertPlatformType("presto", PlatformType.QUERY_ENGINE);
assertPlatformType("teradata", PlatformType.RELATIONAL_DB);
assertPlatformType("voldemort", PlatformType.KEY_VALUE_STORE);
}
private void assertPlatformType(String name, PlatformType type) {
DataPlatformInfo platform = DataPlatformsUtil.get(name).orElse(null);
assertNotNull(platform, name);
assertEquals(platform.getName(), name, name);
assertEquals(platform.getType(), type, name);
}
@Test
public void testIsValidPlatform() {
String platformName = "hdfs";
boolean validDataPlatform = DataPlatformsUtil.isValidDataPlatform(platformName);
assertTrue(validDataPlatform);
platformName = "fake";
validDataPlatform = DataPlatformsUtil.isValidDataPlatform(platformName);
assertFalse(validDataPlatform, platformName);
}
@Test
public void testGetPlatformDelimiter() {
assertEquals(DataPlatformsUtil.getPlatformDelimiter("ambry").get(), ".");
assertEquals(DataPlatformsUtil.getPlatformDelimiter("couchbase").get(), ".");
assertEquals(DataPlatformsUtil.getPlatformDelimiter("external").get(), ".");
assertEquals(DataPlatformsUtil.getPlatformDelimiter("hdfs").get(), "/");
assertEquals(DataPlatformsUtil.getPlatformDelimiter("hive").get(), ".");
assertEquals(DataPlatformsUtil.getPlatformDelimiter("kafka").get(), ".");
assertEquals(DataPlatformsUtil.getPlatformDelimiter("mongo").get(), ".");
assertEquals(DataPlatformsUtil.getPlatformDelimiter("mysql").get(), ".");
assertEquals(DataPlatformsUtil.getPlatformDelimiter("oracle").get(), ".");
assertEquals(DataPlatformsUtil.getPlatformDelimiter("pinot").get(), ".");
assertEquals(DataPlatformsUtil.getPlatformDelimiter("presto").get(), ".");
assertEquals(DataPlatformsUtil.getPlatformDelimiter("teradata").get(), ".");
assertEquals(DataPlatformsUtil.getPlatformDelimiter("voldemort").get(), ".");
assertFalse(DataPlatformsUtil.getPlatformDelimiter("fake").isPresent());
}
}