From e56b6a28715a47c390d4e0b595711d527c3d8e91 Mon Sep 17 00:00:00 2001 From: Kerem Sahin Date: Wed, 5 Feb 2020 19:05:49 -0800 Subject: [PATCH] Add forward slash escape for Elasticsearch queries --- .../app/controllers/api/v2/Search.java | 15 ++++- datahub-frontend/app/utils/SearchUtil.java | 30 ++++++++++ .../test/utils/SearchUtilTest.java | 17 ++++++ .../elasticsearch/corpuser-index-config.json | 56 +++++++------------ runMidTier.sh | 2 - 5 files changed, 78 insertions(+), 42 deletions(-) create mode 100644 datahub-frontend/app/utils/SearchUtil.java create mode 100644 datahub-frontend/test/utils/SearchUtilTest.java delete mode 100755 runMidTier.sh diff --git a/datahub-frontend/app/controllers/api/v2/Search.java b/datahub-frontend/app/controllers/api/v2/Search.java index fcfd1c2d1d..853f55a9f8 100644 --- a/datahub-frontend/app/controllers/api/v2/Search.java +++ b/datahub-frontend/app/controllers/api/v2/Search.java @@ -12,6 +12,7 @@ import play.mvc.Controller; import play.mvc.Result; import play.mvc.Security; import utils.ControllerUtil; +import utils.SearchUtil; import javax.annotation.Nonnull; import java.util.Collections; @@ -69,7 +70,11 @@ public class Search extends Controller { return badRequest("Bad Request. type parameter can not be null or empty"); } - String input = request().getQueryString(REQUEST_INPUT); + // escape forward slash since it is a reserved character in Elasticsearch + // TODO: Once apis for exact or advanced(with support for field specific/regex) search are exposed, + // update to call appropriate api based on indication from user. + final String input = SearchUtil + .escapeForwardSlash(Strings.nullToEmpty(request().getQueryString(REQUEST_INPUT))); if (isBlank(input)) { return badRequest("Bad Request. input parameter can not be null or empty"); } @@ -101,13 +106,17 @@ public class Search extends Controller { return badRequest("Bad Request. type parameter can not be null or empty"); } - String input = Strings.nullToEmpty(request().getQueryString(REQUEST_INPUT)); - String field = request().getQueryString(REQUEST_FIELD); if (isBlank(field)) { return badRequest("Bad Request. field parameter can not be null or empty"); } + // escape forward slash since it is a reserved character in Elasticsearch + // TODO: Once apis for exact or advanced(with support for field specific/regex) search are exposed, + // update to call appropriate api based on indication from user. + final String input = SearchUtil + .escapeForwardSlash(Strings.nullToEmpty(request().getQueryString(REQUEST_INPUT))); + int limit = NumberUtils.toInt(request().getQueryString(REQUEST_LIMIT), _DEFAULT_LIMIT_VALUE); Map requestMap = buildRequestMap(type); diff --git a/datahub-frontend/app/utils/SearchUtil.java b/datahub-frontend/app/utils/SearchUtil.java new file mode 100644 index 0000000000..2c52ff5b40 --- /dev/null +++ b/datahub-frontend/app/utils/SearchUtil.java @@ -0,0 +1,30 @@ +package utils; + +import javax.annotation.Nonnull; + + +/** + * Utility functions for Search + */ +public class SearchUtil { + + private SearchUtil() { + //utility class + } + + /** + * Returns the string with the forward slash escaped + * More details on reserved characters in Elasticsearch can be found at, + * https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#_reserved_characters + * + * @param input + * @return + */ + @Nonnull + public static String escapeForwardSlash(@Nonnull String input) { + if (input.contains("/")) { + input = input.replace("/", "\\\\/"); + } + return input; + } +} diff --git a/datahub-frontend/test/utils/SearchUtilTest.java b/datahub-frontend/test/utils/SearchUtilTest.java new file mode 100644 index 0000000000..f6c4683e98 --- /dev/null +++ b/datahub-frontend/test/utils/SearchUtilTest.java @@ -0,0 +1,17 @@ +package utils; + +import org.junit.Test; + +import static org.junit.Assert.*; + +public class SearchUtilTest { + @Test + public void testEscapeForwardSlash() { + // escape "/" + assertEquals("\\\\/foo\\\\/bar", SearchUtil.escapeForwardSlash("/foo/bar")); + // "/" is escaped but "*" is not escaped and is treated as regex. Since currently we want to retain the regex behaviour with "*" + assertEquals("\\\\/foo\\\\/bar\\\\/*", SearchUtil.escapeForwardSlash("/foo/bar/*")); + assertEquals("", ""); + assertEquals("foo", "foo"); + } +} diff --git a/docker/elasticsearch/corpuser-index-config.json b/docker/elasticsearch/corpuser-index-config.json index 3514e56997..6a6c3a3b1b 100644 --- a/docker/elasticsearch/corpuser-index-config.json +++ b/docker/elasticsearch/corpuser-index-config.json @@ -16,10 +16,20 @@ "catenate_words": "false" } }, + "normalizer": { + "custom_normalizer": { + "filter": [ + "lowercase", + "asciifolding" + ], + "type": "custom" + } + }, "analyzer": { "delimit_edgengram": { "filter": [ "lowercase", + "custom_delimiter", "autocomplete_filter" ], "tokenizer": "whitespace" @@ -27,8 +37,7 @@ "delimit": { "filter": [ "lowercase", - "custom_delimiter", - "autocomplete_filter" + "custom_delimiter" ], "tokenizer": "whitespace" }, @@ -48,13 +57,7 @@ "doc": { "properties": { "aboutMe": { - "type": "text", - "fields": { - "keyword": { - "type": "keyword", - "ignore_above": 256 - } - } + "type": "text" }, "active": { "type": "boolean" @@ -66,8 +69,7 @@ "type": "text", "analyzer": "delimit_edgengram" } - }, - "analyzer": "delimit" + } }, "ldap": { "type": "text", @@ -90,38 +92,18 @@ "analyzer": "lowercase_keyword" }, "skills": { - "type": "text", - "fields": { - "keyword": { - "type": "keyword", - "ignore_above": 256 - } - } + "type": "text" }, "teams": { - "type": "text", - "fields": { - "keyword": { - "type": "keyword", - "ignore_above": 256 - } - } + "type": "text" }, "title": { - "type": "text", - "fields": { - "ngram": { - "type": "text", - "analyzer": "delimit_edgengram" - } - }, - "analyzer": "delimit", - "fielddata": true + "type": "keyword", + "normalizer": "custom_normalizer" }, "urn": { - "type": "text", - "analyzer": "lowercase_keyword", - "fielddata": true + "type": "keyword", + "normalizer": "custom_normalizer" } } } diff --git a/runMidTier.sh b/runMidTier.sh deleted file mode 100755 index 666976588b..0000000000 --- a/runMidTier.sh +++ /dev/null @@ -1,2 +0,0 @@ -./gradlew build -x datahub-web:emberBuild -x datahub-web:emberWorkspaceTest -x datahub-frontend:unzipAssets -cd datahub-frontend/run && ./run-local-frontend \ No newline at end of file