datahub/wherehows-backend/app/utils/Urn.java

/**
 * Copyright 2015 LinkedIn Corp. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 */
package utils;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
import play.Logger;


/**
 * Urn class used for urn conversion
 * Created by zsun on 1/15/15.
 */
public class Urn {
  public String urnString;
  public String datasetType;
  public String schemaName;
  public String abstractObjectName;

  static final String[] stoList = new String[] {"teradata", "hdfs", "hive", "dalids", "oracle", "mysql", "pinot"};
  static final Set<String> datasetTypes = new HashSet<String>(Arrays.asList(stoList));

  /**
   * Urn can contain 3 parts
   *      (1)           (2)           (3)
   * dataset_type://cluster:port/parent/name
   * the 2nd part is only used to identify deployed dataset instance
   * for dataset definition, we only use part (1) + (3)
   */
  public Urn(String urnString) {
    this.urnString = urnString;
    String[] splitResult = urnString.split(":///");
    datasetType = splitResult[0].toLowerCase();
    Logger.debug(urnString);
    switch (datasetType) {
      /* example: hdfs:///data/tracking/PageViewEvent -> 'hdfs', '', 'data/tracking/PageViewEvent' */
      case "hdfs": abstractObjectName = "/" + splitResult[1];
        schemaName = "";
        break;
      /* example: teradata:///dwh_dim/dim_table_name -> 'teradata', 'dwh_dim', 'dim_table_name'
      *           hive:///db_name/table_name -> 'hive', 'db_name', 'table_name'
      * */
      case "teradata":
      case "oracle":
      case "mysql":
      case "espresso":
      case "pinot":
      case "hive":
      case "dalids":
        String[] split2 = splitResult[1].split("/");
        abstractObjectName = split2[split2.length-1];
        StringBuffer sb = new StringBuffer();
        if (split2.length > 1) {
          for (int i = 0; i < split2.length - 1; i++) {
            sb.append(split2[i]);
            sb.append("/");
          }
          sb.deleteCharAt(sb.length() - 1);
        }
        schemaName = sb.toString();
        break;
      default: abstractObjectName = splitResult[1];
    }
  }

  public Urn(String datasetType, String schemaName, String abstractObjectName) {
    this.datasetType = datasetType.toLowerCase();
    if (schemaName != null)
      this.schemaName = schemaName.toLowerCase();
    this.abstractObjectName = abstractObjectName;
    switch (this.datasetType) {
      case "teradata":
      case "oracle":
      case "mysql":
      case "espresso":
      case "pinot":
      case "hive":
      case "dalids":
        urnString = this.datasetType + ":///" + schemaName + "/" + abstractObjectName;
        break;
      default: String trimName = abstractObjectName.startsWith("/") ? abstractObjectName.substring(1) : abstractObjectName;
        urnString = this.datasetType + ":///" + trimName;
    }
  }

  public static boolean validateUrn(String urnString) {

    String[] splitResult = urnString.split(":///");
    if ((datasetTypes.contains(splitResult[0]) || splitResult[0].matches("\\w+")) &&
        splitResult.length > 1)
      return true;
    return false;
  }
}
Initial commit 2015-11-19 14:39:21 -08:00			`/**`
			`* Copyright 2015 LinkedIn Corp. All rights reserved.`
			`*`
			`* Licensed under the Apache License, Version 2.0 (the "License");`
			`* you may not use this file except in compliance with the License.`
			`* You may obtain a copy of the License at`
			`*`
			`* http://www.apache.org/licenses/LICENSE-2.0`
			`*`
			`* Unless required by applicable law or agreed to in writing, software`
			`* distributed under the License is distributed on an "AS IS" BASIS,`
			`* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`*/`
Update MetastoreAuditProcessor to reduce storage, also refactor some code 2016-10-11 11:26:36 -07:00			`package utils;`
Initial commit 2015-11-19 14:39:21 -08:00			`import java.util.Arrays;`
			`import java.util.HashSet;`
			`import java.util.Set;`
			`import play.Logger;`


			`/**`
Update MetastoreAuditProcessor to reduce storage, also refactor some code 2016-10-11 11:26:36 -07:00			`* Urn class used for urn conversion`
Initial commit 2015-11-19 14:39:21 -08:00			`* Created by zsun on 1/15/15.`
			`*/`
			`public class Urn {`
			`public String urnString;`
Use ProcessBuilder and redirected log file for HDFS Extract (#198) * Use ProcessBuilder and redirected log file for HDFS Extract * relax urn validation rule 2016-08-08 14:02:34 -07:00			`public String datasetType;`
Initial commit 2015-11-19 14:39:21 -08:00			`public String schemaName;`
			`public String abstractObjectName;`

Use ProcessBuilder and redirected log file for HDFS Extract (#198) * Use ProcessBuilder and redirected log file for HDFS Extract * relax urn validation rule 2016-08-08 14:02:34 -07:00			`static final String[] stoList = new String[] {"teradata", "hdfs", "hive", "dalids", "oracle", "mysql", "pinot"};`
			`static final Set<String> datasetTypes = new HashSet<String>(Arrays.asList(stoList));`
Initial commit 2015-11-19 14:39:21 -08:00
Use ProcessBuilder and redirected log file for HDFS Extract (#198) * Use ProcessBuilder and redirected log file for HDFS Extract * relax urn validation rule 2016-08-08 14:02:34 -07:00			`/**`
			`* Urn can contain 3 parts`
			`* (1) (2) (3)`
			`* dataset_type://cluster:port/parent/name`
			`* the 2nd part is only used to identify deployed dataset instance`
			`* for dataset definition, we only use part (1) + (3)`
			`*/`
Initial commit 2015-11-19 14:39:21 -08:00			`public Urn(String urnString) {`
			`this.urnString = urnString;`
			`String[] splitResult = urnString.split(":///");`
Use ProcessBuilder and redirected log file for HDFS Extract (#198) * Use ProcessBuilder and redirected log file for HDFS Extract * relax urn validation rule 2016-08-08 14:02:34 -07:00			`datasetType = splitResult[0].toLowerCase();`
Initial commit 2015-11-19 14:39:21 -08:00			`Logger.debug(urnString);`
Use ProcessBuilder and redirected log file for HDFS Extract (#198) * Use ProcessBuilder and redirected log file for HDFS Extract * relax urn validation rule 2016-08-08 14:02:34 -07:00			`switch (datasetType) {`
			`/* example: hdfs:///data/tracking/PageViewEvent -> 'hdfs', '', 'data/tracking/PageViewEvent' */`
Initial commit 2015-11-19 14:39:21 -08:00			`case "hdfs": abstractObjectName = "/" + splitResult[1];`
			`schemaName = "";`
			`break;`
Use ProcessBuilder and redirected log file for HDFS Extract (#198) * Use ProcessBuilder and redirected log file for HDFS Extract * relax urn validation rule 2016-08-08 14:02:34 -07:00			`/* example: teradata:///dwh_dim/dim_table_name -> 'teradata', 'dwh_dim', 'dim_table_name'`
			`* hive:///db_name/table_name -> 'hive', 'db_name', 'table_name'`
			`* */`
			`case "teradata":`
			`case "oracle":`
			`case "mysql":`
			`case "espresso":`
			`case "pinot":`
			`case "hive":`
			`case "dalids":`
			`String[] split2 = splitResult[1].split("/");`
Initial commit 2015-11-19 14:39:21 -08:00			`abstractObjectName = split2[split2.length-1];`
			`StringBuffer sb = new StringBuffer();`
			`if (split2.length > 1) {`
			`for (int i = 0; i < split2.length - 1; i++) {`
			`sb.append(split2[i]);`
			`sb.append("/");`
			`}`
			`sb.deleteCharAt(sb.length() - 1);`
			`}`
			`schemaName = sb.toString();`
			`break;`
			`default: abstractObjectName = splitResult[1];`
			`}`
			`}`

Use ProcessBuilder and redirected log file for HDFS Extract (#198) * Use ProcessBuilder and redirected log file for HDFS Extract * relax urn validation rule 2016-08-08 14:02:34 -07:00			`public Urn(String datasetType, String schemaName, String abstractObjectName) {`
			`this.datasetType = datasetType.toLowerCase();`
Initial commit 2015-11-19 14:39:21 -08:00			`if (schemaName != null)`
			`this.schemaName = schemaName.toLowerCase();`
			`this.abstractObjectName = abstractObjectName;`
Use ProcessBuilder and redirected log file for HDFS Extract (#198) * Use ProcessBuilder and redirected log file for HDFS Extract * relax urn validation rule 2016-08-08 14:02:34 -07:00			`switch (this.datasetType) {`
			`case "teradata":`
			`case "oracle":`
			`case "mysql":`
			`case "espresso":`
			`case "pinot":`
			`case "hive":`
			`case "dalids":`
			`urnString = this.datasetType + ":///" + schemaName + "/" + abstractObjectName;`
Initial commit 2015-11-19 14:39:21 -08:00			`break;`
			`default: String trimName = abstractObjectName.startsWith("/") ? abstractObjectName.substring(1) : abstractObjectName;`
Use ProcessBuilder and redirected log file for HDFS Extract (#198) * Use ProcessBuilder and redirected log file for HDFS Extract * relax urn validation rule 2016-08-08 14:02:34 -07:00			`urnString = this.datasetType + ":///" + trimName;`
Initial commit 2015-11-19 14:39:21 -08:00			`}`
			`}`

			`public static boolean validateUrn(String urnString) {`

			`String[] splitResult = urnString.split(":///");`
Use ProcessBuilder and redirected log file for HDFS Extract (#198) * Use ProcessBuilder and redirected log file for HDFS Extract * relax urn validation rule 2016-08-08 14:02:34 -07:00			`if ((datasetTypes.contains(splitResult[0]) \|\| splitResult[0].matches("\\w+")) &&`
			`splitResult.length > 1)`
Initial commit 2015-11-19 14:39:21 -08:00			`return true;`
			`return false;`
			`}`
			`}`