diff --git a/ingestion/src/metadata/ingestion/ometa/mixins/user_mixin.py b/ingestion/src/metadata/ingestion/ometa/mixins/user_mixin.py index 1aaa7b67cf9..36eeb692ba6 100644 --- a/ingestion/src/metadata/ingestion/ometa/mixins/user_mixin.py +++ b/ingestion/src/metadata/ingestion/ometa/mixins/user_mixin.py @@ -35,7 +35,7 @@ class OMetaUserMixin: client: REST email_search = ( - "/search/query?q=email:{email}&from={from_}&size={size}&index=" + "/search/query?q=email.keyword:{email}&from={from_}&size={size}&index=" + ES_INDEX_MAP[User.__name__] ) @@ -44,7 +44,7 @@ class OMetaUserMixin: self, email: Optional[str], from_count: int = 0, - size: int = 10, + size: int = 1, fields: Optional[list] = None, ) -> Optional[User]: """ diff --git a/ingestion/src/metadata/ingestion/sink/elasticsearch_mapping/user_search_index_mapping.py b/ingestion/src/metadata/ingestion/sink/elasticsearch_mapping/user_search_index_mapping.py index afa34a59e2e..a0231d597ac 100644 --- a/ingestion/src/metadata/ingestion/sink/elasticsearch_mapping/user_search_index_mapping.py +++ b/ingestion/src/metadata/ingestion/sink/elasticsearch_mapping/user_search_index_mapping.py @@ -56,7 +56,13 @@ USER_ELASTICSEARCH_INDEX_MAPPING = textwrap.dedent( "type": "text" }, "email": { - "type": "text" + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } }, "isAdmin": { "type": "text" diff --git a/ingestion/tests/integration/ometa/test_ometa_user_api.py b/ingestion/tests/integration/ometa/test_ometa_user_api.py index fa14f40b0bf..7a3600fb032 100644 --- a/ingestion/tests/integration/ometa/test_ometa_user_api.py +++ b/ingestion/tests/integration/ometa/test_ometa_user_api.py @@ -78,6 +78,10 @@ class OMetaUserTest(TestCase): data=CreateUserRequest(name="Levy", email="user2.1234@getcollate.io"), ) + cls.user_3: User = cls.metadata.create_or_update( + data=CreateUserRequest(name="Lima", email="random.lima@getcollate.io"), + ) + # Leave some time for indexes to get updated, otherwise this happens too fast cls.check_es_index() @@ -112,6 +116,13 @@ class OMetaUserTest(TestCase): self.metadata.get_user_by_email(email="idonotexist@random.com") ) + # Non existing email returns, even if they have the same domain + # To get this fixed, we had to update the `email` field in the + # index as a `keyword` and search by `email.keyword` in ES. + self.assertIsNone( + self.metadata.get_user_by_email(email="idonotexist@getcollate.io") + ) + # I can get User 1, who has the name equal to its email self.assertEqual( self.user_1.id, diff --git a/openmetadata-service/src/main/resources/elasticsearch/en/user_index_mapping.json b/openmetadata-service/src/main/resources/elasticsearch/en/user_index_mapping.json index 773cc973a51..7a95c41f9c2 100644 --- a/openmetadata-service/src/main/resources/elasticsearch/en/user_index_mapping.json +++ b/openmetadata-service/src/main/resources/elasticsearch/en/user_index_mapping.json @@ -39,7 +39,13 @@ "type": "text" }, "email": { - "type": "text" + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } }, "isAdmin": { "type": "boolean"