Fix #2704: Add suggestions API to query users and teams like autocomplete (#2742)

This commit is contained in:
Sriharsha Chintalapani 2022-02-14 07:47:07 -08:00 committed by GitHub
parent d58fb32dd0
commit 562d6b39ef
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 591 additions and 3 deletions

View File

@ -37,6 +37,8 @@ import org.openmetadata.catalog.entity.data.Dashboard;
import org.openmetadata.catalog.entity.data.Pipeline; import org.openmetadata.catalog.entity.data.Pipeline;
import org.openmetadata.catalog.entity.data.Table; import org.openmetadata.catalog.entity.data.Table;
import org.openmetadata.catalog.entity.data.Topic; import org.openmetadata.catalog.entity.data.Topic;
import org.openmetadata.catalog.entity.teams.Team;
import org.openmetadata.catalog.entity.teams.User;
import org.openmetadata.catalog.events.AbstractEventPublisher; import org.openmetadata.catalog.events.AbstractEventPublisher;
import org.openmetadata.catalog.events.errors.EventPublisherException; import org.openmetadata.catalog.events.errors.EventPublisherException;
import org.openmetadata.catalog.resources.events.EventResource.ChangeEventList; import org.openmetadata.catalog.resources.events.EventResource.ChangeEventList;
@ -84,6 +86,12 @@ public class ElasticSearchEventPublisher extends AbstractEventPublisher {
case Entity.PIPELINE: case Entity.PIPELINE:
updateRequest = updatePipeline(event); updateRequest = updatePipeline(event);
break; break;
case Entity.USER:
updateRequest = updateUser(event);
break;
case Entity.TEAM:
updateRequest = updateTeam(event);
break;
default: default:
LOG.warn("Ignoring Entity Type {}", entityType); LOG.warn("Ignoring Entity Type {}", entityType);
} }
@ -291,6 +299,60 @@ public class ElasticSearchEventPublisher extends AbstractEventPublisher {
return updateRequest; return updateRequest;
} }
private UpdateRequest updateUser(ChangeEvent event) throws IOException {
UpdateRequest updateRequest =
new UpdateRequest(ElasticSearchIndexType.USER_SEARCH_INDEX.indexName, event.getEntityId().toString());
UserESIndex userESIndex = null;
if (event.getEntity() != null && event.getEventType() != EventType.ENTITY_SOFT_DELETED) {
User user = (User) event.getEntity();
userESIndex = UserESIndex.builder(user, event.getEventType()).build();
}
switch (event.getEventType()) {
case ENTITY_CREATED:
String json = JsonUtils.pojoToJson(userESIndex);
updateRequest.doc(json, XContentType.JSON);
updateRequest.docAsUpsert(true);
break;
case ENTITY_UPDATED:
scriptedUserUpsert(userESIndex, updateRequest);
break;
case ENTITY_SOFT_DELETED:
softDeleteEntity(updateRequest);
break;
case ENTITY_DELETED:
break;
}
return updateRequest;
}
private UpdateRequest updateTeam(ChangeEvent event) throws IOException {
UpdateRequest updateRequest =
new UpdateRequest(ElasticSearchIndexType.TEAM_SEARCH_INDEX.indexName, event.getEntityId().toString());
TeamESIndex teamESIndex = null;
if (event.getEntity() != null && event.getEventType() != EventType.ENTITY_SOFT_DELETED) {
Team team = (Team) event.getEntity();
teamESIndex = TeamESIndex.builder(team, event.getEventType()).build();
}
switch (event.getEventType()) {
case ENTITY_CREATED:
String json = JsonUtils.pojoToJson(teamESIndex);
updateRequest.doc(json, XContentType.JSON);
updateRequest.docAsUpsert(true);
break;
case ENTITY_UPDATED:
scriptedTeamUpsert(teamESIndex, updateRequest);
break;
case ENTITY_SOFT_DELETED:
softDeleteEntity(updateRequest);
break;
case ENTITY_DELETED:
break;
}
return updateRequest;
}
private void scriptedUpsert(Object index, UpdateRequest updateRequest) { private void scriptedUpsert(Object index, UpdateRequest updateRequest) {
String scriptTxt = String scriptTxt =
"for (k in params.keySet()) {if (k == 'change_descriptions') " "for (k in params.keySet()) {if (k == 'change_descriptions') "
@ -302,6 +364,32 @@ public class ElasticSearchEventPublisher extends AbstractEventPublisher {
updateRequest.scriptedUpsert(true); updateRequest.scriptedUpsert(true);
} }
private void scriptedUserUpsert(Object index, UpdateRequest updateRequest) {
String scriptTxt =
"for (k in params.keySet()) {if (k == 'teams') "
+ "{ ctx._source.teams.addAll(params.teams) } "
+ "else if (k == 'roles') "
+ " { ctx._source.roles.addAll(params.roles) }"
+ "else { ctx._source.put(k, params.get(k)) }}";
Map<String, Object> doc = JsonUtils.getMap(index);
Script script = new Script(ScriptType.INLINE, "painless", scriptTxt, doc);
updateRequest.script(script);
updateRequest.scriptedUpsert(true);
}
private void scriptedTeamUpsert(Object index, UpdateRequest updateRequest) {
String scriptTxt =
"for (k in params.keySet()) {if (k == 'users') "
+ "{ ctx._source.users.addAll(params.users) } "
+ "else if (k == 'owns') "
+ " { ctx._source.owns.addAll(params.owns) }"
+ "else { ctx._source.put(k, params.get(k)) }}";
Map<String, Object> doc = JsonUtils.getMap(index);
Script script = new Script(ScriptType.INLINE, "painless", scriptTxt, doc);
updateRequest.script(script);
updateRequest.scriptedUpsert(true);
}
private void softDeleteEntity(UpdateRequest updateRequest) { private void softDeleteEntity(UpdateRequest updateRequest) {
String scriptTxt = "ctx._source.deleted=true"; String scriptTxt = "ctx._source.deleted=true";
Script script = new Script(ScriptType.INLINE, "painless", scriptTxt, new HashMap<>()); Script script = new Script(ScriptType.INLINE, "painless", scriptTxt, new HashMap<>());

View File

@ -34,6 +34,8 @@ import org.openmetadata.catalog.entity.data.Dashboard;
import org.openmetadata.catalog.entity.data.Pipeline; import org.openmetadata.catalog.entity.data.Pipeline;
import org.openmetadata.catalog.entity.data.Table; import org.openmetadata.catalog.entity.data.Table;
import org.openmetadata.catalog.entity.data.Topic; import org.openmetadata.catalog.entity.data.Topic;
import org.openmetadata.catalog.entity.teams.Team;
import org.openmetadata.catalog.entity.teams.User;
import org.openmetadata.catalog.type.Column; import org.openmetadata.catalog.type.Column;
import org.openmetadata.catalog.type.EntityReference; import org.openmetadata.catalog.type.EntityReference;
import org.openmetadata.catalog.type.EventType; import org.openmetadata.catalog.type.EventType;
@ -63,7 +65,9 @@ public class ElasticSearchIndexDefinition {
TABLE_SEARCH_INDEX("table_search_index", "/elasticsearch/table_index_mapping.json"), TABLE_SEARCH_INDEX("table_search_index", "/elasticsearch/table_index_mapping.json"),
TOPIC_SEARCH_INDEX("topic_search_index", "/elasticsearch/topic_index_mapping.json"), TOPIC_SEARCH_INDEX("topic_search_index", "/elasticsearch/topic_index_mapping.json"),
DASHBOARD_SEARCH_INDEX("dashboard_search_index", "/elasticsearch/dashboard_index_mapping.json"), DASHBOARD_SEARCH_INDEX("dashboard_search_index", "/elasticsearch/dashboard_index_mapping.json"),
PIPELINE_SEARCH_INDEX("pipeline_search_index", "/elasticsearch/pipeline_index_mapping.json"); PIPELINE_SEARCH_INDEX("pipeline_search_index", "/elasticsearch/pipeline_index_mapping.json"),
USER_SEARCH_INDEX("user_search_index", "/elasticsearch/user_index_mapping.json"),
TEAM_SEARCH_INDEX("team_search_index", "/elasticsearch/team_index_mapping.json");
public final String indexName; public final String indexName;
public final String indexMappingFile; public final String indexMappingFile;
@ -189,6 +193,10 @@ public class ElasticSearchIndexDefinition {
return ElasticSearchIndexType.PIPELINE_SEARCH_INDEX; return ElasticSearchIndexType.PIPELINE_SEARCH_INDEX;
} else if (type.equalsIgnoreCase(Entity.TOPIC)) { } else if (type.equalsIgnoreCase(Entity.TOPIC)) {
return ElasticSearchIndexType.TOPIC_SEARCH_INDEX; return ElasticSearchIndexType.TOPIC_SEARCH_INDEX;
} else if (type.equalsIgnoreCase(Entity.USER)) {
return ElasticSearchIndexType.USER_SEARCH_INDEX;
} else if (type.equalsIgnoreCase(Entity.TEAM)) {
return ElasticSearchIndexType.TEAM_SEARCH_INDEX;
} }
throw new RuntimeException("Failed to find index doc for type " + type); throw new RuntimeException("Failed to find index doc for type " + type);
} }
@ -708,3 +716,139 @@ class PipelineESIndex extends ElasticSearchIndex {
return pipelineESIndexBuilder; return pipelineESIndexBuilder;
} }
} }
@Getter
@SuperBuilder(builderMethodName = "internalBuilder")
@Value
@JsonInclude(JsonInclude.Include.NON_NULL)
class UserESIndex {
@JsonProperty("user_id")
String userId;
@JsonProperty("name")
String name;
@JsonProperty("display_name")
String displayName;
@JsonProperty("email")
String email;
@JsonProperty("entity_type")
String entityType;
@JsonProperty("teams")
List<String> teams;
@JsonProperty("roles")
List<String> roles;
@JsonProperty("last_updated_timestamp")
@Builder.Default
Long lastUpdatedTimestamp = System.currentTimeMillis();
List<ElasticSearchSuggest> suggest;
Boolean deleted;
public static UserESIndexBuilder builder(User user, EventType eventType) {
List<String> teams = new ArrayList<>();
List<String> roles = new ArrayList<>();
List<ElasticSearchSuggest> suggest = new ArrayList<>();
suggest.add(ElasticSearchSuggest.builder().input(user.getName()).weight(5).build());
suggest.add(ElasticSearchSuggest.builder().input(user.getDisplayName()).weight(10).build());
Long updatedTimestamp = user.getUpdatedAt();
String displayName = user.getDisplayName() != null ? user.getDisplayName() : "";
if (user.getTeams() != null) {
for (EntityReference team : user.getTeams()) {
teams.add(team.getId().toString());
}
}
for (EntityReference role : user.getRoles()) {
roles.add(role.getId().toString());
}
UserESIndexBuilder userESIndexBuilder =
internalBuilder()
.userId(user.getId().toString())
.deleted(user.getDeleted())
.name(user.getName())
.email(user.getEmail())
.displayName(displayName)
.lastUpdatedTimestamp(updatedTimestamp)
.entityType("user")
.suggest(suggest)
.teams(teams)
.roles(roles);
return userESIndexBuilder;
}
}
@Getter
@SuperBuilder(builderMethodName = "internalBuilder")
@Value
@JsonInclude(JsonInclude.Include.NON_NULL)
class TeamESIndex {
@JsonProperty("team_id")
String teamId;
@JsonProperty("name")
String name;
@JsonProperty("display_name")
String displayName;
@JsonProperty("entity_type")
String entityType;
@JsonProperty("users")
List<String> users;
@JsonProperty("owns")
List<String> owns;
@JsonProperty("last_updated_timestamp")
@Builder.Default
Long lastUpdatedTimestamp = System.currentTimeMillis();
List<ElasticSearchSuggest> suggest;
Boolean deleted;
public static TeamESIndexBuilder builder(Team team, EventType eventType) {
List<String> users = new ArrayList<>();
List<String> owns = new ArrayList<>();
List<ElasticSearchSuggest> suggest = new ArrayList<>();
suggest.add(ElasticSearchSuggest.builder().input(team.getName()).weight(5).build());
suggest.add(ElasticSearchSuggest.builder().input(team.getDisplayName()).weight(10).build());
Long updatedTimestamp = team.getUpdatedAt();
String displayName = team.getDisplayName() != null ? team.getDisplayName() : "";
if (team.getUsers() != null) {
for (EntityReference user : team.getUsers()) {
users.add(user.getId().toString());
}
}
if (team.getOwns() != null) {
for (EntityReference own : team.getOwns()) {
owns.add(own.getId().toString());
}
}
TeamESIndexBuilder teamESIndexBuilder =
internalBuilder()
.teamId(team.getId().toString())
.deleted(team.getDeleted())
.name(team.getName()) // pipeline names can be unique ids from source, hence use displayName for search
// indexing
.displayName(displayName)
.lastUpdatedTimestamp(updatedTimestamp)
.entityType("team")
.suggest(suggest)
.owns(owns)
.users(users);
return teamESIndexBuilder;
}
}

View File

@ -147,6 +147,12 @@ public class SearchResource {
case "table_search_index": case "table_search_index":
searchSourceBuilder = buildTableSearchBuilder(query, from, size); searchSourceBuilder = buildTableSearchBuilder(query, from, size);
break; break;
case "user_search_index":
searchSourceBuilder = buildUserSearchBuilder(query, from, size);
break;
case "team_search_index":
searchSourceBuilder = buildTeamSearchBuilder(query, from, size);
break;
default: default:
searchSourceBuilder = buildAggregateSearchBuilder(query, from, size); searchSourceBuilder = buildAggregateSearchBuilder(query, from, size);
break; break;
@ -354,4 +360,24 @@ public class SearchResource {
return searchSourceBuilder; return searchSourceBuilder;
} }
private SearchSourceBuilder buildUserSearchBuilder(String query, int from, int size) {
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder
.query(QueryBuilders.queryStringQuery(query).field("name", 5.0f).field("display_name", 1.0f).lenient(true))
.from(from)
.size(size);
return searchSourceBuilder;
}
private SearchSourceBuilder buildTeamSearchBuilder(String query, int from, int size) {
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder
.query(QueryBuilders.queryStringQuery(query).field("name", 5.0f).field("display_name", 3.0f).lenient(true))
.from(from)
.size(size);
return searchSourceBuilder;
}
} }

View File

@ -0,0 +1,29 @@
{
"properties": {
"name": {
"type":"text"
},
"display_name": {
"type": "text"
},
"last_updated_timestamp": {
"type": "date",
"format": "epoch_second"
},
"entity_type": {
"type": "keyword"
},
"deleted": {
"type": "boolean"
},
"users": {
"type": "keyword"
},
"owns": {
"type": "keyword"
},
"suggest": {
"type": "completion"
}
}
}

View File

@ -0,0 +1,32 @@
{
"properties": {
"name": {
"type":"text"
},
"display_name": {
"type": "text"
},
"email": {
"type": "text"
},
"last_updated_timestamp": {
"type": "date",
"format": "epoch_second"
},
"entity_type": {
"type": "keyword"
},
"teams": {
"type": "keyword"
},
"roles": {
"type": "keyword"
},
"deleted": {
"type": "boolean"
},
"suggest": {
"type": "completion"
}
}
}

View File

@ -158,6 +158,37 @@ class PipelineESDocument(BaseModel):
doc_as_upsert: bool = True doc_as_upsert: bool = True
class UserESDocument(BaseModel):
"""Elastic Search Mapping doc for Users"""
user_id: str
deleted: bool
entity_type: str = "user"
name: str
display_name: str
email: str
suggest: List[dict]
last_updated_timestamp: Optional[int]
teams: List[str]
roles: List[str]
doc_as_upsert: bool = True
class TeamESDocument(BaseModel):
"""Elastic Search Mapping doc for Teams"""
team_id: str
deleted: bool
entity_type: str = "team"
name: str
display_name: str
suggest: List[dict]
last_updated_timestamp: Optional[int]
users: List[str]
owns: List[str]
doc_as_upsert: bool = True
class DashboardOwner(BaseModel): class DashboardOwner(BaseModel):
"""Dashboard owner""" """Dashboard owner"""

View File

@ -31,6 +31,8 @@ from metadata.generated.schema.entity.services.dashboardService import Dashboard
from metadata.generated.schema.entity.services.databaseService import DatabaseService from metadata.generated.schema.entity.services.databaseService import DatabaseService
from metadata.generated.schema.entity.services.messagingService import MessagingService from metadata.generated.schema.entity.services.messagingService import MessagingService
from metadata.generated.schema.entity.services.pipelineService import PipelineService from metadata.generated.schema.entity.services.pipelineService import PipelineService
from metadata.generated.schema.entity.teams.team import Team
from metadata.generated.schema.entity.teams.user import User
from metadata.generated.schema.type import entityReference from metadata.generated.schema.type import entityReference
from metadata.ingestion.api.common import Entity, WorkflowContext from metadata.ingestion.api.common import Entity, WorkflowContext
from metadata.ingestion.api.sink import Sink, SinkStatus from metadata.ingestion.api.sink import Sink, SinkStatus
@ -39,7 +41,9 @@ from metadata.ingestion.models.table_metadata import (
DashboardESDocument, DashboardESDocument,
PipelineESDocument, PipelineESDocument,
TableESDocument, TableESDocument,
TeamESDocument,
TopicESDocument, TopicESDocument,
UserESDocument,
) )
from metadata.ingestion.ometa.ometa_api import OpenMetadata from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.ometa.openmetadata_rest import MetadataServerConfig from metadata.ingestion.ometa.openmetadata_rest import MetadataServerConfig
@ -47,7 +51,9 @@ from metadata.ingestion.sink.elasticsearch_constants import (
DASHBOARD_ELASTICSEARCH_INDEX_MAPPING, DASHBOARD_ELASTICSEARCH_INDEX_MAPPING,
PIPELINE_ELASTICSEARCH_INDEX_MAPPING, PIPELINE_ELASTICSEARCH_INDEX_MAPPING,
TABLE_ELASTICSEARCH_INDEX_MAPPING, TABLE_ELASTICSEARCH_INDEX_MAPPING,
TEAM_ELASTICSEARCH_INDEX_MAPPING,
TOPIC_ELASTICSEARCH_INDEX_MAPPING, TOPIC_ELASTICSEARCH_INDEX_MAPPING,
USER_ELASTICSEARCH_INDEX_MAPPING,
) )
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -66,12 +72,14 @@ class ElasticSearchConfig(ConfigModel):
index_topics: Optional[bool] = True index_topics: Optional[bool] = True
index_dashboards: Optional[bool] = True index_dashboards: Optional[bool] = True
index_pipelines: Optional[bool] = True index_pipelines: Optional[bool] = True
index_dbt_models: Optional[bool] = True index_users: Optional[bool] = True
index_teams: Optional[bool] = True
table_index_name: str = "table_search_index" table_index_name: str = "table_search_index"
topic_index_name: str = "topic_search_index" topic_index_name: str = "topic_search_index"
dashboard_index_name: str = "dashboard_search_index" dashboard_index_name: str = "dashboard_search_index"
pipeline_index_name: str = "pipeline_search_index" pipeline_index_name: str = "pipeline_search_index"
dbt_index_name: str = "dbt_model_search_index" user_index_name: str = "user_search_index"
team_index_name: str = "team_search_index"
scheme: str = "http" scheme: str = "http"
use_ssl: bool = False use_ssl: bool = False
verify_certs: bool = False verify_certs: bool = False
@ -144,6 +152,16 @@ class ElasticsearchSink(Sink[Entity]):
self.config.pipeline_index_name, PIPELINE_ELASTICSEARCH_INDEX_MAPPING self.config.pipeline_index_name, PIPELINE_ELASTICSEARCH_INDEX_MAPPING
) )
if self.config.index_users:
self._check_or_create_index(
self.config.user_index_name, USER_ELASTICSEARCH_INDEX_MAPPING
)
if self.config.index_teams:
self._check_or_create_index(
self.config.team_index_name, TEAM_ELASTICSEARCH_INDEX_MAPPING
)
def _check_or_create_index(self, index_name: str, es_mapping: str): def _check_or_create_index(self, index_name: str, es_mapping: str):
""" """
Retrieve all indices that currently have {elasticsearch_alias} alias Retrieve all indices that currently have {elasticsearch_alias} alias
@ -208,6 +226,24 @@ class ElasticsearchSink(Sink[Entity]):
request_timeout=self.config.timeout, request_timeout=self.config.timeout,
) )
if isinstance(record, User):
user_doc = self._create_user_es_doc(record)
self.elasticsearch_client.index(
index=self.config.user_index_name,
id=str(user_doc.user_id),
body=user_doc.json(),
request_timeout=self.config.timeout,
)
if isinstance(record, Team):
team_doc = self._create_team_es_doc(record)
self.elasticsearch_client.index(
index=self.config.team_index_name,
id=str(team_doc.team_id),
body=team_doc.json(),
request_timeout=self.config.timeout,
)
if hasattr(record.name, "__root__"): if hasattr(record.name, "__root__"):
self.status.records_written(record.name.__root__) self.status.records_written(record.name.__root__)
else: else:
@ -443,6 +479,65 @@ class ElasticsearchSink(Sink[Entity]):
return pipeline_doc return pipeline_doc
def _create_user_es_doc(self, user: User):
suggest = [
{"input": [user.displayName], "weight": 5},
{"input": [user.name], "weight": 10},
]
timestamp = user.updatedAt.__root__
teams = []
roles = []
if user.teams:
for team in user.teams.__root__:
teams.append(str(team.id.__root__))
if user.roles:
for role in user.roles.__root__:
roles.append(str(role.id.__root__))
user_doc = UserESDocument(
user_id=str(user.id.__root__),
deleted=user.deleted,
name=user.name.__root__,
display_name=user.displayName,
email=user.email.__root__,
suggest=suggest,
last_updated_timestamp=timestamp,
teams=list(teams),
roles=list(roles),
)
return user_doc
def _create_team_es_doc(self, team: Team):
suggest = [
{"input": [team.displayName], "weight": 5},
{"input": [team.name], "weight": 10},
]
timestamp = team.updatedAt.__root__
users = []
owns = []
if team.users:
for user in team.users.__root__:
users.append(str(team.id.__root__))
if team.owns:
for own in team.owns.__root__:
owns.append(str(own.id.__root__))
team_doc = TeamESDocument(
team_id=str(team.id.__root__),
deleted=team.deleted,
name=team.name.__root__,
display_name=team.displayName,
suggest=suggest,
last_updated_timestamp=timestamp,
users=list(users),
owns=list(owns),
)
return team_doc
def _get_charts(self, chart_refs: Optional[List[entityReference.EntityReference]]): def _get_charts(self, chart_refs: Optional[List[entityReference.EntityReference]]):
charts = [] charts = []
if chart_refs: if chart_refs:

View File

@ -390,3 +390,78 @@ PIPELINE_ELASTICSEARCH_INDEX_MAPPING = textwrap.dedent(
} }
""" """
) )
USER_ELASTICSEARCH_INDEX_MAPPING = textwrap.dedent(
"""
{
"mappings":{
"properties": {
"name": {
"type":"text"
},
"display_name": {
"type": "text"
},
"email": {
"type": "text"
},
"last_updated_timestamp": {
"type": "date",
"format": "epoch_second"
},
"entity_type": {
"type": "keyword"
},
"teams": {
"type": "keyword"
},
"roles": {
"type": "keyword"
},
"deleted": {
"type": "boolean"
},
"suggest": {
"type": "completion"
},
}
}
}
"""
)
TEAM_ELASTICSEARCH_INDEX_MAPPING = textwrap.dedent(
"""
{
"mappings":{
"properties": {
"name": {
"type":"text"
},
"display_name": {
"type": "text"
},
"last_updated_timestamp": {
"type": "date",
"format": "epoch_second"
},
"entity_type": {
"type": "keyword"
},
"deleted": {
"type": "boolean"
},
"users": {
"type": "keyword"
},
"owns": {
"type": "keyword"
},
"suggest": {
"type": "completion"
},
}
}
}
"""
)

View File

@ -19,6 +19,8 @@ from metadata.generated.schema.entity.data.dashboard import Dashboard
from metadata.generated.schema.entity.data.pipeline import Pipeline from metadata.generated.schema.entity.data.pipeline import Pipeline
from metadata.generated.schema.entity.data.table import Table from metadata.generated.schema.entity.data.table import Table
from metadata.generated.schema.entity.data.topic import Topic from metadata.generated.schema.entity.data.topic import Topic
from metadata.generated.schema.entity.teams.team import Team
from metadata.generated.schema.entity.teams.user import User
from metadata.ingestion.api.common import Entity, WorkflowContext from metadata.ingestion.api.common import Entity, WorkflowContext
from metadata.ingestion.api.source import Source, SourceStatus from metadata.ingestion.api.source import Source, SourceStatus
from metadata.ingestion.ometa.ometa_api import OpenMetadata from metadata.ingestion.ometa.ometa_api import OpenMetadata
@ -34,6 +36,8 @@ class MetadataTablesRestSourceConfig(ConfigModel):
include_topics: Optional[bool] = True include_topics: Optional[bool] = True
include_dashboards: Optional[bool] = True include_dashboards: Optional[bool] = True
include_pipelines: Optional[bool] = True include_pipelines: Optional[bool] = True
include_users: Optional[bool] = True
include_teams: Optional[bool] = True
limit_records: int = 1000 limit_records: int = 1000
@ -78,6 +82,24 @@ class MetadataSourceStatus(SourceStatus):
self.success.append(dashboard_name) self.success.append(dashboard_name)
logger.info("Dashboard Scanned: %s", dashboard_name) logger.info("Dashboard Scanned: %s", dashboard_name)
def scanned_team(self, team_name: str) -> None:
"""scanned team method
Args:
team_name (str)
"""
self.success.append(team_name)
logger.info("Team Scanned: %s", team_name)
def scanned_user(self, user_name: str) -> None:
"""scanned user method
Args:
user_name (str)
"""
self.success.append(user_name)
logger.info("User Scanned: %s", user_name)
# pylint: disable=unused-argument # pylint: disable=unused-argument
def filtered( def filtered(
self, table_name: str, err: str, dataset_name: str = None, col_type: str = None self, table_name: str, err: str, dataset_name: str = None, col_type: str = None
@ -145,6 +167,8 @@ class MetadataSource(Source[Entity]):
yield from self.fetch_topic() yield from self.fetch_topic()
yield from self.fetch_dashboard() yield from self.fetch_dashboard()
yield from self.fetch_pipeline() yield from self.fetch_pipeline()
yield from self.fetch_users()
yield from self.fetch_teams()
def fetch_table(self) -> Table: def fetch_table(self) -> Table:
"""Fetch table method """Fetch table method
@ -247,6 +271,50 @@ class MetadataSource(Source[Entity]):
break break
after = pipeline_entities.after after = pipeline_entities.after
def fetch_users(self) -> User:
"""fetch users method
Returns:
User:
"""
if self.config.include_users:
after = None
while True:
user_entities = self.metadata.list_entities(
entity=User,
fields=["teams", "roles"],
after=after,
limit=self.config.limit_records,
)
for user in user_entities.entities:
self.status.scanned_user(user.name)
yield user
if user_entities.after is None:
break
after = user_entities.after
def fetch_teams(self) -> Team:
"""fetch teams method
Returns:
Team:
"""
if self.config.include_teams:
after = None
while True:
team_entities = self.metadata.list_entities(
entity=Team,
fields=["users", "owns"],
after=after,
limit=self.config.limit_records,
)
for team in team_entities.entities:
self.status.scanned_team(team.name)
yield team
if team_entities.after is None:
break
after = team_entities.after
def get_status(self) -> SourceStatus: def get_status(self) -> SourceStatus:
return self.status return self.status