mirror of
https://github.com/datahub-project/datahub.git
synced 2025-12-12 18:47:45 +00:00
switch from avro schema to espresso schema format
This commit is contained in:
parent
eee2fbf230
commit
aacb722e4e
@ -34,6 +34,7 @@ import org.codehaus.jackson.JsonParseException;
|
||||
import org.codehaus.jackson.map.JsonMappingException;
|
||||
import org.codehaus.jackson.map.ObjectMapper;
|
||||
import org.codehaus.jackson.node.ArrayNode;
|
||||
import org.codehaus.jackson.node.ObjectNode;
|
||||
import org.springframework.jdbc.core.namedparam.NamedParameterJdbcTemplate;
|
||||
import wherehows.common.Constant;
|
||||
import wherehows.common.utils.JdbcConnection;
|
||||
@ -73,7 +74,6 @@ public class EspressoPopulateSchemaMetadataEtl extends EtlJob {
|
||||
final Client r2Client = new TransportClientAdapter(http.getClient(Collections.<String, String>emptyMap()));
|
||||
RestClient _restClient = new RestClient(r2Client, restLiServerURL);
|
||||
logger.debug("restLiServerURL is: " + restLiServerURL);
|
||||
System.out.println("restLiServerURL is: " + restLiServerURL);
|
||||
|
||||
NamedParameterJdbcTemplate
|
||||
lNamedParameterJdbcTemplate = JdbcConnection.getNamedParameterJdbcTemplate(driverClassName,url,dbUserName,dbPassword);
|
||||
@ -93,45 +93,78 @@ public class EspressoPopulateSchemaMetadataEtl extends EtlJob {
|
||||
|
||||
String urn = (String) row.get("urn");
|
||||
String lDatasetName = urn.substring(12).replace('/','.');
|
||||
String lSchemaName = lDatasetName; // TO CHECK? IS THIS EXPECTED?
|
||||
String lSchemaName = lDatasetName;
|
||||
String lOwnerName = ownerQueryResult.get("namespace") + ":" + ownerQueryResult.get("owner_id");
|
||||
|
||||
String lSchemaJsonString = (String)row.get("schema");
|
||||
|
||||
logger.info("The populating schema is: " + lSchemaJsonString);
|
||||
String lTableSchemaData = " ";
|
||||
String lDocumentSchemaData = " ";
|
||||
String lTableSchemaData = null;
|
||||
String lDocumentSchemaData = null;
|
||||
|
||||
try {
|
||||
|
||||
ObjectMapper mapper = new ObjectMapper();
|
||||
JsonNode rootNode = mapper.readTree(lSchemaJsonString);
|
||||
|
||||
ArrayNode keySchemaNode = (ArrayNode) rootNode.get("keySchema");
|
||||
if (keySchemaNode!= null) {
|
||||
ObjectNode tableSchemaNode = mapper.createObjectNode();
|
||||
ArrayNode resourceKeyPartsNode = mapper.createArrayNode();
|
||||
|
||||
if (keySchemaNode!= null)
|
||||
{
|
||||
Iterator<JsonNode> slaidsIterator = keySchemaNode.getElements();
|
||||
while (slaidsIterator.hasNext()) {
|
||||
while (slaidsIterator.hasNext())
|
||||
{
|
||||
JsonNode oneElementNode = slaidsIterator.next();
|
||||
lTableSchemaData = mapper.writeValueAsString(oneElementNode);
|
||||
resourceKeyPartsNode.add(oneElementNode);
|
||||
}
|
||||
}
|
||||
|
||||
JsonNode valueSchemaNode = rootNode.path("valueSchema");
|
||||
if (valueSchemaNode != null) {
|
||||
lDocumentSchemaData = mapper.writeValueAsString(valueSchemaNode);
|
||||
}
|
||||
String name = "";
|
||||
String doc = "";
|
||||
if (rootNode.has("name") && rootNode.has("doc"))
|
||||
{
|
||||
name = rootNode.get("name").toString();
|
||||
doc = rootNode.get("doc").toString();
|
||||
|
||||
tableSchemaNode.put("name",name);
|
||||
tableSchemaNode.put("doc",doc);
|
||||
tableSchemaNode.put("schemaType","TableSchema");
|
||||
tableSchemaNode.put("recordType","/schemata/document/DUMMY/DUMMY");
|
||||
tableSchemaNode.put("version",1);
|
||||
tableSchemaNode.put("resourceKeyParts",resourceKeyPartsNode);
|
||||
|
||||
lTableSchemaData = tableSchemaNode.toString();
|
||||
}
|
||||
else
|
||||
{
|
||||
logger.info("Missing name or doc field. Corrupted schema : " + lSchemaName);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
JsonNode valueSchemaNode = rootNode.get("valueSchema");
|
||||
if (valueSchemaNode != null && valueSchemaNode.has("name"))
|
||||
{
|
||||
lDocumentSchemaData = valueSchemaNode.toString();
|
||||
}
|
||||
else
|
||||
{
|
||||
logger.info("ValueSchema is missing name field in schema : " + lSchemaName);
|
||||
}
|
||||
}
|
||||
catch (JsonParseException e) { e.printStackTrace(); }
|
||||
catch (JsonMappingException e) { e.printStackTrace(); }
|
||||
catch (IOException e) { e.printStackTrace(); }
|
||||
|
||||
if (lTableSchemaData != null && lDocumentSchemaData != null) // not found then create, avoid committing transaction error. Is this necessary?
|
||||
if (lTableSchemaData != null && lDocumentSchemaData != null)
|
||||
{
|
||||
try {
|
||||
logger.info("table schema: " + lTableSchemaData);
|
||||
logger.info("valueSchema: " + lDocumentSchemaData);
|
||||
System.out.println("lSchemaName is " + lSchemaName);
|
||||
|
||||
lRestClient.create(_restClient, lSchemaName,lTableSchemaData,lDocumentSchemaData,lDatasetName,lOwnerName); //"urn:li:corpuser:gmohanas"
|
||||
lRestClient.create(_restClient, lSchemaName,lTableSchemaData,lDocumentSchemaData,lDatasetName,lOwnerName);
|
||||
} catch (Exception e) {
|
||||
logger.error(e.toString());
|
||||
}
|
||||
|
||||
@ -46,7 +46,7 @@ public class OraclePopulateSchemaMetadataEtl extends EtlJob {
|
||||
|
||||
public static final String GET_OWNER_BY_ID = "SELECT * FROM dataset_owner WHERE dataset_urn like :platform limit 1";
|
||||
public final static String GET_DATASETS_NAME_LIKE_PLAT =
|
||||
"SELECT * FROM dict_dataset WHERE urn like :platform limit 1";
|
||||
"SELECT * FROM dict_dataset WHERE urn like :platform";
|
||||
|
||||
/** The property_name field in wh_property table for WhereHows database connection information */
|
||||
public String driverClassName = prop.getProperty(Constant.WH_DB_DRIVER_KEY);
|
||||
|
||||
@ -163,3 +163,7 @@ base.url.key=
|
||||
# dali
|
||||
dali.git.urn=
|
||||
git.committer.blacklist=
|
||||
|
||||
# wherehows metadata store restli server
|
||||
wherehows.restli.server.url=
|
||||
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
Please get the extra library files, which may not be available in Maven/TypeSafe repository or Artifactory, and put them here. For example:
|
||||
Please get the extra library files from linkedin artifactory:
|
||||
|
||||
* https://downloads.teradata.com/download/connectivity/jdbc-driver
|
||||
* http://download.oracle.com/otn/utilities_drivers/jdbc/121010/ojdbc7.jar
|
||||
* http://download.oracle.com/otn/utilities_drivers/jdbc/121010/orai18n.jar
|
||||
-metadata-store-api-rest-client-0.1.26.jar
|
||||
-metadata-store-api-data-template-0.1.26.jar
|
||||
-models-data-template-16.0.4.jar
|
||||
-resource-identity-urn-3.0.5.jar
|
||||
@ -34,6 +34,10 @@ import com.linkedin.restli.client.RestClient;
|
||||
import com.linkedin.restli.common.ComplexResourceKey;
|
||||
import com.linkedin.restli.common.EmptyRecord;
|
||||
import com.linkedin.restli.common.IdResponse;
|
||||
import com.linkedin.restli.client.Response;
|
||||
import com.linkedin.restli.client.RestLiResponseException;
|
||||
import com.linkedin.r2.RemoteInvocationException;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
@ -68,15 +72,17 @@ public class EspressoCreateSchemaMetadata {
|
||||
.setForeignKeysSpecs(new ForeignKeySpecMap());
|
||||
|
||||
try {
|
||||
|
||||
IdResponse<ComplexResourceKey<SchemaMetadataKey, EmptyRecord>> response =
|
||||
_restClient.sendRequest(_schemaMetadataBuilders.create().input(newSchemaMetadata).actorParam(new Urn(iUrn)).build()).getResponseEntity();
|
||||
Thread.sleep(10000);
|
||||
|
||||
System.out.println(response.getId().getKey());
|
||||
} catch (Exception e) {
|
||||
System.out.println(_schemaMetadataBuilders.create().input(newSchemaMetadata).build());
|
||||
System.out.println(e.fillInStackTrace());
|
||||
} catch (RemoteInvocationException ex) {
|
||||
RestLiResponseException ex2 = (RestLiResponseException)ex;
|
||||
System.out.println(ex2.getDecodedResponse().getHeader("X-LI-R2-W-IC-1-serviceCallTraceData"));
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
private static final SchemaMetadataRequestBuilders _schemaMetadataBuilders = new SchemaMetadataRequestBuilders();
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user