refactor(recommendations): Filtering for specific entity types in recommendations (#6538)

This commit is contained in:
John Joyce 2022-11-23 13:33:35 -08:00 committed by GitHub
parent a400eb0d52
commit 73671de7e7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 108 additions and 4 deletions

View File

@ -2,8 +2,10 @@ package com.linkedin.metadata.recommendation.candidatesource;
import com.codahale.metrics.Timer;
import com.datahub.util.exception.ESQueryException;
import com.google.common.collect.ImmutableSet;
import com.linkedin.common.urn.Urn;
import com.linkedin.common.urn.UrnUtils;
import com.linkedin.metadata.Constants;
import com.linkedin.metadata.datahubusage.DataHubUsageEventConstants;
import com.linkedin.metadata.datahubusage.DataHubUsageEventType;
import com.linkedin.metadata.entity.EntityService;
@ -21,6 +23,7 @@ import io.opentelemetry.extension.annotations.WithSpan;
import java.io.IOException;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import javax.annotation.Nonnull;
import lombok.RequiredArgsConstructor;
@ -41,6 +44,20 @@ import org.elasticsearch.search.builder.SearchSourceBuilder;
@Slf4j
@RequiredArgsConstructor
public class MostPopularSource implements RecommendationSource {
/**
* Entity Types that should be in scope for this type of recommendation.
*/
private static final Set<String> SUPPORTED_ENTITY_TYPES = ImmutableSet.of(Constants.DATASET_ENTITY_NAME,
Constants.DATA_FLOW_ENTITY_NAME,
Constants.DATA_JOB_ENTITY_NAME,
Constants.CONTAINER_ENTITY_NAME,
Constants.DASHBOARD_ENTITY_NAME,
Constants.CHART_ENTITY_NAME,
Constants.ML_MODEL_ENTITY_NAME,
Constants.ML_FEATURE_ENTITY_NAME,
Constants.ML_MODEL_GROUP_ENTITY_NAME,
Constants.ML_FEATURE_TABLE_ENTITY_NAME
);
private final RestHighLevelClient _searchClient;
private final IndexConvention _indexConvention;
private final EntityService _entityService;
@ -99,6 +116,7 @@ public class MostPopularSource implements RecommendationSource {
}
private SearchRequest buildSearchRequest(@Nonnull Urn userUrn) {
// TODO: Proactively filter for entity types in the supported set.
SearchRequest request = new SearchRequest();
SearchSourceBuilder source = new SearchSourceBuilder();
BoolQueryBuilder query = QueryBuilders.boolQuery();
@ -121,7 +139,7 @@ public class MostPopularSource implements RecommendationSource {
private Optional<RecommendationContent> buildContent(@Nonnull String entityUrn) {
Urn entity = UrnUtils.getUrn(entityUrn);
if (EntityUtils.checkIfRemoved(_entityService, entity)) {
if (EntityUtils.checkIfRemoved(_entityService, entity) || !RecommendationUtils.isSupportedEntityType(entity, SUPPORTED_ENTITY_TYPES)) {
return Optional.empty();
}

View File

@ -2,8 +2,10 @@ package com.linkedin.metadata.recommendation.candidatesource;
import com.codahale.metrics.Timer;
import com.datahub.util.exception.ESQueryException;
import com.google.common.collect.ImmutableSet;
import com.linkedin.common.urn.Urn;
import com.linkedin.common.urn.UrnUtils;
import com.linkedin.metadata.Constants;
import com.linkedin.metadata.datahubusage.DataHubUsageEventConstants;
import com.linkedin.metadata.datahubusage.DataHubUsageEventType;
import com.linkedin.metadata.entity.EntityService;
@ -21,6 +23,7 @@ import io.opentelemetry.extension.annotations.WithSpan;
import java.io.IOException;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import javax.annotation.Nonnull;
import lombok.RequiredArgsConstructor;
@ -42,6 +45,20 @@ import org.elasticsearch.search.builder.SearchSourceBuilder;
@Slf4j
@RequiredArgsConstructor
public class RecentlyEditedSource implements RecommendationSource {
/**
* Entity Types that should be in scope for this type of recommendation.
*/
private static final Set<String> SUPPORTED_ENTITY_TYPES = ImmutableSet.of(Constants.DATASET_ENTITY_NAME,
Constants.DATA_FLOW_ENTITY_NAME,
Constants.DATA_JOB_ENTITY_NAME,
Constants.CONTAINER_ENTITY_NAME,
Constants.DASHBOARD_ENTITY_NAME,
Constants.CHART_ENTITY_NAME,
Constants.ML_MODEL_ENTITY_NAME,
Constants.ML_FEATURE_ENTITY_NAME,
Constants.ML_MODEL_GROUP_ENTITY_NAME,
Constants.ML_FEATURE_TABLE_ENTITY_NAME
);
private final RestHighLevelClient _searchClient;
private final IndexConvention _indexConvention;
private final EntityService _entityService;
@ -99,6 +116,7 @@ public class RecentlyEditedSource implements RecommendationSource {
}
private SearchRequest buildSearchRequest(@Nonnull Urn userUrn) {
// TODO: Proactively filter for entity types in the supported set.
SearchRequest request = new SearchRequest();
SearchSourceBuilder source = new SearchSourceBuilder();
BoolQueryBuilder query = QueryBuilders.boolQuery();
@ -124,11 +142,12 @@ public class RecentlyEditedSource implements RecommendationSource {
private Optional<RecommendationContent> buildContent(@Nonnull String entityUrn) {
Urn entity = UrnUtils.getUrn(entityUrn);
if (EntityUtils.checkIfRemoved(_entityService, entity)) {
if (EntityUtils.checkIfRemoved(_entityService, entity) || !RecommendationUtils.isSupportedEntityType(entity, SUPPORTED_ENTITY_TYPES)) {
return Optional.empty();
}
return Optional.of(new RecommendationContent().setEntity(entity)
return Optional.of(new RecommendationContent()
.setEntity(entity)
.setValue(entityUrn)
.setParams(new RecommendationParams().setEntityProfileParams(new EntityProfileParams().setUrn(entity))));
}

View File

@ -2,8 +2,10 @@ package com.linkedin.metadata.recommendation.candidatesource;
import com.codahale.metrics.Timer;
import com.datahub.util.exception.ESQueryException;
import com.google.common.collect.ImmutableSet;
import com.linkedin.common.urn.Urn;
import com.linkedin.common.urn.UrnUtils;
import com.linkedin.metadata.Constants;
import com.linkedin.metadata.datahubusage.DataHubUsageEventConstants;
import com.linkedin.metadata.datahubusage.DataHubUsageEventType;
import com.linkedin.metadata.entity.EntityService;
@ -21,6 +23,7 @@ import io.opentelemetry.extension.annotations.WithSpan;
import java.io.IOException;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import javax.annotation.Nonnull;
import lombok.RequiredArgsConstructor;
@ -42,6 +45,20 @@ import org.elasticsearch.search.builder.SearchSourceBuilder;
@Slf4j
@RequiredArgsConstructor
public class RecentlyViewedSource implements RecommendationSource {
/**
* Entity Types that should be in scope for this type of recommendation.
*/
private static final Set<String> SUPPORTED_ENTITY_TYPES = ImmutableSet.of(Constants.DATASET_ENTITY_NAME,
Constants.DATA_FLOW_ENTITY_NAME,
Constants.DATA_JOB_ENTITY_NAME,
Constants.CONTAINER_ENTITY_NAME,
Constants.DASHBOARD_ENTITY_NAME,
Constants.CHART_ENTITY_NAME,
Constants.ML_MODEL_ENTITY_NAME,
Constants.ML_FEATURE_ENTITY_NAME,
Constants.ML_MODEL_GROUP_ENTITY_NAME,
Constants.ML_FEATURE_TABLE_ENTITY_NAME
);
private final RestHighLevelClient _searchClient;
private final IndexConvention _indexConvention;
private final EntityService _entityService;
@ -99,6 +116,7 @@ public class RecentlyViewedSource implements RecommendationSource {
}
private SearchRequest buildSearchRequest(@Nonnull Urn userUrn) {
// TODO: Proactively filter for entity types in the supported set.
SearchRequest request = new SearchRequest();
SearchSourceBuilder source = new SearchSourceBuilder();
BoolQueryBuilder query = QueryBuilders.boolQuery();
@ -126,7 +144,7 @@ public class RecentlyViewedSource implements RecommendationSource {
private Optional<RecommendationContent> buildContent(@Nonnull String entityUrn) {
Urn entity = UrnUtils.getUrn(entityUrn);
if (EntityUtils.checkIfRemoved(_entityService, entity)) {
if (EntityUtils.checkIfRemoved(_entityService, entity) || !RecommendationUtils.isSupportedEntityType(entity, SUPPORTED_ENTITY_TYPES)) {
return Optional.empty();
}

View File

@ -0,0 +1,23 @@
package com.linkedin.metadata.recommendation.candidatesource;
import com.linkedin.common.urn.Urn;
import java.util.Set;
import javax.annotation.Nonnull;
public class RecommendationUtils {
/**
* Returns true if a given URN is in a fixed set of entity types, false otherwise.
*
* @param urn the urn to check
* @param entityTypes the set of valid entity types
* @return true if the type of the urn is in the set of valid entity types, false otherwise.
*/
public static boolean isSupportedEntityType(@Nonnull final Urn urn, @Nonnull final Set<String> entityTypes) {
final String entityType = urn.getEntityType();
return entityTypes.contains(entityType);
}
private RecommendationUtils() { }
}

View File

@ -0,0 +1,26 @@
package com.linkedin.metadata.recommendation.candidatesource;
import com.google.common.collect.ImmutableSet;
import com.linkedin.common.urn.Urn;
import com.linkedin.common.urn.UrnUtils;
import com.linkedin.metadata.Constants;
import java.util.Collections;
import org.junit.Assert;
import org.testng.annotations.Test;
public class RecommendationUtilsTest {
@Test
private void testIsSupportedEntityType() {
Urn testUrn = UrnUtils.getUrn("urn:li:corpuser:john");
Assert.assertTrue(
RecommendationUtils.isSupportedEntityType(testUrn, ImmutableSet.of(Constants.DATASET_ENTITY_NAME, Constants.CORP_USER_ENTITY_NAME))
);
Assert.assertFalse(
RecommendationUtils.isSupportedEntityType(testUrn, ImmutableSet.of(Constants.DATASET_ENTITY_NAME))
);
Assert.assertFalse(
RecommendationUtils.isSupportedEntityType(testUrn, Collections.emptySet())
);
}
}