package com.linkedin.common.urn; import com.linkedin.data.template.Custom; import com.linkedin.util.ArgumentUtil; import java.net.URISyntaxException; import java.util.Collection; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import javax.annotation.Nullable; /** * Represents a URN (Uniform Resource Name) for a Linkedin entity, in the spirit of RFC 2141. Our * default URN format uses the non-standard namespace identifier "li", and hence default URNs begin * with "urn:li:". Note that the namespace according to RFC 2141 [Section 2.1] is case-insensitive and * for safety we only allow lower-case letters in our implementation. * *

Our URNs all consist of an "entity type", which denotes an internal namespace for the * resource, as well as an entity key, formatted as a tuple of parts. The full format of a URN is: * *

<URN> ::= urn:<namespace>:<entityType>:<entityKey> * *

The entity key is represented as a tuple of strings. If the tuple is of length 1, the key is * encoded directly. If the tuple has multiple parts, the parts are enclosed in parenthesizes and * comma-delimited, e.g., a URN whose key is the tuple [1, 2, 3] would be encoded as: * *

urn:li:example:(1,2,3) */ public class Urn { /** * @deprecated Don't create the Urn string manually, use Typed Urns or {@link #create(String * entityType, Object... tupleParts)} */ @Deprecated public static final String URN_PREFIX = "urn:li:"; private static final String URN_START = "urn:"; private static final String DEFAULT_NAMESPACE = "li"; private final String _entityType; private final TupleKey _entityKey; private final String _namespace; // Used to speed up toString() in the common case where the Urn is built up // from parsing an input string. @Nullable private String _cachedStringUrn; static { Custom.registerCoercer(new UrnCoercer(), Urn.class); } /** * Customized interner for all strings that may be used for _entityType. Urn._entityType is by * nature a pretty small set of values, such as "member", "company" etc. Due to this fact, when an * app creates and keeps in memory a large number of Urn's, it may end up with a very big number * of identical strings. Thus it's worth saving memory by interning _entityType when an Urn is * instantiated. String.intern() would be a natural choice, but it takes a few microseconds, and * thus may become too expensive when many (temporary) Urns are generated in very quick * succession. Thus we use a faster CHM below. Compared to the internal table used by * String.intern() it has a bigger memory overhead per each interned string, but for a small set * of canonical strings it doesn't matter. */ private static final Map ENTITY_TYPE_INTERNER = new ConcurrentHashMap<>(); /** * Create a Urn given its raw String representation. * * @param rawUrn - the String representation of a Urn. * @throws URISyntaxException - if the String is not a valid Urn. */ public Urn(String rawUrn) throws URISyntaxException { ArgumentUtil.notNull(rawUrn, "rawUrn"); _cachedStringUrn = rawUrn; if (!rawUrn.startsWith(URN_START)) { throw new URISyntaxException(rawUrn, "Urn doesn't start with 'urn:'. Urn: " + rawUrn, 0); } int secondColonIndex = rawUrn.indexOf(':', URN_START.length() + 1); _namespace = validateAndExtractNamespace(rawUrn, secondColonIndex); // First char of entityType must be [a-z] if (!charIsLowerCaseAlphabet(rawUrn, secondColonIndex + 1)) { throw new URISyntaxException( rawUrn, "First char of entityType must be [a-z]! Urn: " + rawUrn, secondColonIndex + 1); } int thirdColonIndex = rawUrn.indexOf(':', secondColonIndex + 2); // Case: urn:li:foo if (thirdColonIndex == -1) { _entityType = rawUrn.substring(secondColonIndex + 1); if (!charsAreWordClass(_entityType)) { throw new URISyntaxException( rawUrn, "entityType must have only [a-zA-Z0-9] chars. Urn: " + rawUrn); } _entityKey = new TupleKey(); return; } String entityType = rawUrn.substring(secondColonIndex + 1, thirdColonIndex); if (!charsAreWordClass(entityType)) { throw new URISyntaxException( rawUrn, "entityType must have only [a-zA-Z_0-9] chars. Urn: " + rawUrn); } int numEntityKeyChars = rawUrn.length() - (thirdColonIndex + 1); if (numEntityKeyChars <= 0) { throw new URISyntaxException( rawUrn, "Urns with empty entityKey are not allowed. Urn: " + rawUrn); } _entityType = internEntityType(entityType); _entityKey = TupleKey.fromString(rawUrn, thirdColonIndex + 1); // For the sake of backwards compatibility, we must ensure that // new Urn("urn:li:y:(urn:li:z:1)").toString() == "urn:li:y:urn:li:z:1" // Thus, if we detect a TupleKey with 1 part AND we had a paren in the // input, we abort our optimization of storing the original URN. if (_entityKey.size() == 1 && rawUrn.charAt(thirdColonIndex + 1) == '(') { _cachedStringUrn = null; } } /** * Create a Urn from an entity type and an encoded String key. The key is converted to a Tuple by * parsing using @see TupleKey#fromString * * @param entityType - the entity type for the Urn * @param typeSpecificString - the encoded string representation of a TupleKey * @throws URISyntaxException if the typeSpecificString is not a valid encoding of a TupleKey */ public Urn(String entityType, String typeSpecificString) throws URISyntaxException { this(DEFAULT_NAMESPACE, entityType, TupleKey.fromString(typeSpecificString)); } public Urn(String entityType, TupleKey entityKey) { this(DEFAULT_NAMESPACE, entityType, entityKey); } public Urn(String namespace, String entityType, TupleKey entityKey) { _namespace = namespace; _entityType = entityType; _entityKey = entityKey; _cachedStringUrn = null; } /** * DEPRECATED - use {@link #createFromTuple(String, Object...)} Create a Urn from an entity type * and a sequence of key parts. The key parts are converted to a tuple using @see TupleKey#create * * @param entityType - the entity type for the Urn * @param tupleParts - a sequence of objects representing the key of the Urn * @return - a new Urn object */ @Deprecated public static Urn create(String entityType, Object... tupleParts) { return new Urn(entityType, TupleKey.create(tupleParts)); } /** * DEPRECATED - use {@link #createFromTuple(String, java.util.Collection)} Create a Urn from an * entity type and a sequence of key parts. The key parts are converted to a tuple using @see * TupleKey#create * * @param entityType - the entity type for the Urn * @param tupleParts - a sequence of objects representing the key of the Urn * @return - a new Urn object */ @Deprecated public static Urn create(String entityType, Collection tupleParts) { return new Urn(entityType, TupleKey.create(tupleParts)); } /** * Create a Urn from an entity type and a sequence of key parts. The key parts are converted to a * tuple using @see TupleKey#create * * @param entityType - the entity type for the Urn * @param tupleParts - a sequence of objects representing the key of the Urn * @return - a new Urn object */ public static Urn createFromTuple(String entityType, Object... tupleParts) { return new Urn(entityType, TupleKey.create(tupleParts)); } /** * Create a Urn from an namespace, entity type and a sequence of key parts. The key parts are * converted to a tuple using @see TupleKey#create * * @param namespace - The namespace of this urn. * @param entityType - the entity type for the Urn * @param tupleParts - a sequence of objects representing the key of the Urn * @return - a new Urn object */ public static Urn createFromTupleWithNamespace( String namespace, String entityType, Object... tupleParts) { return new Urn(namespace, entityType, TupleKey.create(tupleParts)); } /** * Create a Urn from an entity type and a sequence of key parts. The key parts are converted to a * tuple using @see TupleKey#create * * @param entityType - the entity type for the Urn * @param tupleParts - a sequence of objects representing the key of the Urn * @return - a new Urn object */ public static Urn createFromTuple(String entityType, Collection tupleParts) { return new Urn(entityType, TupleKey.create(tupleParts)); } /** * Create a Urn given its raw String representation. * * @param rawUrn - the String representation of a Urn. * @throws URISyntaxException - if the String is not a valid Urn. */ public static Urn createFromString(String rawUrn) throws URISyntaxException { return new Urn(rawUrn); } /** * Create a Urn given its raw CharSequence representation. * * @param rawUrn - the Char Sequence representation of a Urn. * @throws URISyntaxException - if the String is not a valid Urn. */ public static Urn createFromCharSequence(CharSequence rawUrn) throws URISyntaxException { ArgumentUtil.notNull(rawUrn, "rawUrn"); return new Urn(rawUrn.toString()); } /** * Create a Urn from an entity type and an encoded String key. The key is converted to a Tuple by * parsing using @see TupleKey#fromString * * @param entityType - the entity type for the Urn * @param typeSpecificString - the encoded string representation of a TupleKey * @throws URISyntaxException if the typeSpecificString is not a valid encoding of a TupleKey */ public static Urn createFromTypeSpecificString(String entityType, String typeSpecificString) throws URISyntaxException { return new Urn(entityType, typeSpecificString); } public String getEntityType() { return _entityType; } public String getNamespace() { return _namespace; } public TupleKey getEntityKey() { return _entityKey; } /** * Convenience method to get the key's first tuple element as a String * * @return key's first tuple element */ public String getId() { return _entityKey.getAs(0, String.class); } /** * Convenience method to get the key's first tuple element as an Integer * * @return key's first tuple element, coerced to Integer */ public Integer getIdAsInt() { return _entityKey.getAs(0, Integer.class); } /** * Convenience method to get the key's first tuple element as a Long * * @return key's first tuple element, coerced to Long */ public Long getIdAsLong() { return _entityKey.getAs(0, Long.class); } public Urn getIdAsUrn() { return _entityKey.getAs(0, Urn.class); } /** * Return the namespace-specific string portion of this URN, i.e., everything following the * "urn:<namespace>:" prefix. * * @return The namespace-specific string portion of this URN */ public String getNSS() { return _entityType + (_entityKey.size() > 0 ? ':' + _entityKey.toString() : ""); } @Override public String toString() { if (_cachedStringUrn != null) { return _cachedStringUrn; } // This can be written to by multiple threads, but that's actually safe // because Urn is immutable and all the threads will compute the same // logical String (even though they may produce different String objects). // So whichever thread "wins" the write race, the result is the same. // This field also doesn't need to be volatile for memory visibility // because it's just a cache, so if one thread sees a null here while // another sees non-null, it's still fine: the thread seeing non-null // uses the cache and the other thread computes a "new" value for the // field which is again the same logical String. _cachedStringUrn = URN_START + _namespace + ':' + getNSS(); return _cachedStringUrn; } @Override public boolean equals(Object obj) { if (obj == null || !Urn.class.isAssignableFrom(obj.getClass())) { return false; } Urn other = (Urn) obj; return _entityType.equals(other._entityType) && _entityKey.equals(other._entityKey) && _namespace.equals(other._namespace); } @Override public int hashCode() { final int prime = 31; int result = _entityType.hashCode(); result = prime * result + _entityKey.hashCode(); return result; } private static String validateAndExtractNamespace(String rawUrn, int secondColonIndex) throws URISyntaxException { if (!charIsLowerCaseAlphabet(rawUrn, URN_START.length())) { throw new URISyntaxException( rawUrn, "First char of Urn namespace must be [a-z]! Urn: " + rawUrn, URN_START.length()); } if (secondColonIndex == -1) { throw new URISyntaxException(rawUrn, "Missing second ':' char. Urn: " + rawUrn); } int namespaceLen = secondColonIndex - URN_START.length(); if (namespaceLen > 32) { throw new URISyntaxException( rawUrn, "Namespace length > 32 chars. Urn: " + rawUrn, secondColonIndex); } if (namespaceLen == 2 && rawUrn.charAt(URN_START.length()) == 'l' && rawUrn.charAt(URN_START.length() + 1) == 'i') { // We want to avoid an allocation for the ultra-common "li" namespace! return DEFAULT_NAMESPACE; } String namespace = rawUrn.substring(URN_START.length(), secondColonIndex); if (!charsAreValidNamespace(namespace)) { throw new URISyntaxException(rawUrn, "Chars in namespace must be [a-z0-9-]!. Urn: " + rawUrn); } return namespace; } // Not using Character.isLowerCase on purpose because that is unicode-aware // and we only need ASCII. Handling only ASCII is faster. private static boolean charIsLowerCaseAlphabet(String input, int index) { if (index >= input.length()) { return false; } char c = input.charAt(index); return c >= 'a' && c <= 'z'; } // These are [a-z0-9-] private static boolean charsAreValidNamespace(String input) { for (int index = 0; index < input.length(); index++) { char c = input.charAt(index); // Not using Character.isLowerCase etc on purpose because that is // unicode-aware and we only need ASCII. Handling only ASCII is faster. if (!((c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '-')) { return false; } } return true; } // Regex word class (\w) is defined as: [a-zA-Z_0-9] // Source: https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.html private static boolean charsAreWordClass(String input) { for (int index = 0; index < input.length(); index++) { char c = input.charAt(index); // Not using Character.isLowerCase etc on purpose because that is // unicode-aware and we only need ASCII. Handling only ASCII is faster. if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_')) { return false; } } return true; } /** Intern a string to be assigned to the _entityType field. */ private static String internEntityType(String et) { // Most of the times this method is called, the canonical string is already // in the table, so let's do a quick get() first. String canonicalET = ENTITY_TYPE_INTERNER.get(et); if (canonicalET != null) { return canonicalET; } canonicalET = ENTITY_TYPE_INTERNER.putIfAbsent(et, et); return canonicalET != null ? canonicalET : et; } }