mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-09-11 01:52:05 +00:00
* Fix airflow owner and add tasks * Add pipeline tasks ownership * MINOR - Fix py CI * Add pipeline tasks ownership * Add pipeline tasks ownership * MINOR - Fix py CI * MINOR - Fix py CI * Add pipeline tasks ownership * patch team * patch team * Format
This commit is contained in:
parent
95b90bc510
commit
b84ce33b80
@ -79,6 +79,23 @@ class ESMixin(Generic[T]):
|
|||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def _get_entity_from_es(
|
||||||
|
self, entity: Type[T], query_string: str, fields: Optional[list] = None
|
||||||
|
) -> Optional[T]:
|
||||||
|
"""Fetch an entity instance from ES"""
|
||||||
|
|
||||||
|
try:
|
||||||
|
entity_list = self._search_es_entity(
|
||||||
|
entity_type=entity, query_string=query_string, fields=fields
|
||||||
|
)
|
||||||
|
for instance in entity_list or []:
|
||||||
|
return instance
|
||||||
|
except Exception as err:
|
||||||
|
logger.debug(traceback.format_exc())
|
||||||
|
logger.warning(f"Could not get {entity.__name__} info from ES due to {err}")
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
def es_search_from_fqn(
|
def es_search_from_fqn(
|
||||||
self,
|
self,
|
||||||
entity_type: Type[T],
|
entity_type: Type[T],
|
||||||
|
@ -13,12 +13,15 @@ Mixin class containing User specific methods
|
|||||||
|
|
||||||
To be used by OpenMetadata class
|
To be used by OpenMetadata class
|
||||||
"""
|
"""
|
||||||
import traceback
|
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
from typing import Optional
|
from typing import Optional, Type
|
||||||
|
|
||||||
|
from metadata.generated.schema.entity.teams.team import Team
|
||||||
from metadata.generated.schema.entity.teams.user import User
|
from metadata.generated.schema.entity.teams.user import User
|
||||||
|
from metadata.generated.schema.type.entityReference import EntityReference
|
||||||
|
from metadata.ingestion.api.common import T
|
||||||
from metadata.ingestion.ometa.client import REST
|
from metadata.ingestion.ometa.client import REST
|
||||||
|
from metadata.utils.constants import ENTITY_REFERENCE_TYPE_MAP
|
||||||
from metadata.utils.elasticsearch import ES_INDEX_MAP
|
from metadata.utils.elasticsearch import ES_INDEX_MAP
|
||||||
from metadata.utils.logger import ometa_logger
|
from metadata.utils.logger import ometa_logger
|
||||||
|
|
||||||
@ -34,42 +37,134 @@ class OMetaUserMixin:
|
|||||||
|
|
||||||
client: REST
|
client: REST
|
||||||
|
|
||||||
email_search = (
|
@staticmethod
|
||||||
|
def email_search_query_es(entity: Type[T]) -> str:
|
||||||
|
return (
|
||||||
"/search/query?q=email.keyword:{email}&from={from_}&size={size}&index="
|
"/search/query?q=email.keyword:{email}&from={from_}&size={size}&index="
|
||||||
+ ES_INDEX_MAP[User.__name__]
|
+ ES_INDEX_MAP[entity.__name__]
|
||||||
)
|
)
|
||||||
|
|
||||||
@lru_cache(maxsize=None)
|
@staticmethod
|
||||||
def get_user_by_email(
|
def name_search_query_es(entity: Type[T]) -> str:
|
||||||
|
"""
|
||||||
|
Allow for more flexible lookup following what the UI is doing when searching users.
|
||||||
|
|
||||||
|
We don't want to stick to `q=name:{name}` since in case a user is named `random.user`
|
||||||
|
but looked as `Random User`, we want to find this match.
|
||||||
|
"""
|
||||||
|
return (
|
||||||
|
"/search/query?q={name}&from={from_}&size={size}&index="
|
||||||
|
+ ES_INDEX_MAP[entity.__name__]
|
||||||
|
)
|
||||||
|
|
||||||
|
def _search_by_email(
|
||||||
self,
|
self,
|
||||||
|
entity: Type[T],
|
||||||
email: Optional[str],
|
email: Optional[str],
|
||||||
from_count: int = 0,
|
from_count: int = 0,
|
||||||
size: int = 1,
|
size: int = 1,
|
||||||
fields: Optional[list] = None,
|
fields: Optional[list] = None,
|
||||||
) -> Optional[User]:
|
) -> Optional[T]:
|
||||||
"""
|
"""
|
||||||
GET user entity by name
|
GET user or team entity by mail
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
email: user email to search
|
email: user email to search
|
||||||
from_count: records to expect
|
from_count: records to expect
|
||||||
size: number of records
|
size: number of records
|
||||||
|
fields: Optional field list to pass to ES request
|
||||||
"""
|
"""
|
||||||
if email:
|
if email:
|
||||||
query_string = self.email_search.format(
|
query_string = self.email_search_query_es(entity=entity).format(
|
||||||
email=email, from_=from_count, size=size
|
email=email, from_=from_count, size=size
|
||||||
)
|
)
|
||||||
|
return self._get_entity_from_es(
|
||||||
try:
|
entity=entity, query_string=query_string, fields=fields
|
||||||
entity_list = self._search_es_entity(
|
)
|
||||||
entity_type=User, query_string=query_string, fields=fields
|
|
||||||
)
|
return None
|
||||||
for user in entity_list or []:
|
|
||||||
return user
|
def _search_by_name(
|
||||||
except Exception as err:
|
self,
|
||||||
logger.debug(traceback.format_exc())
|
entity: Type[T],
|
||||||
logger.warning(
|
name: Optional[str],
|
||||||
f"Could not get user info from ES for user email {email} due to {err}"
|
from_count: int = 0,
|
||||||
|
size: int = 1,
|
||||||
|
fields: Optional[list] = None,
|
||||||
|
) -> Optional[T]:
|
||||||
|
"""
|
||||||
|
GET entity by name
|
||||||
|
|
||||||
|
Args:
|
||||||
|
name: user name to search
|
||||||
|
from_count: records to expect
|
||||||
|
size: number of records
|
||||||
|
fields: Optional field list to pass to ES request
|
||||||
|
"""
|
||||||
|
if name:
|
||||||
|
query_string = self.name_search_query_es(entity=entity).format(
|
||||||
|
name=name, from_=from_count, size=size
|
||||||
|
)
|
||||||
|
return self._get_entity_from_es(
|
||||||
|
entity=entity, query_string=query_string, fields=fields
|
||||||
|
)
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
@lru_cache(maxsize=None)
|
||||||
|
def get_reference_by_email(
|
||||||
|
self,
|
||||||
|
email: Optional[str],
|
||||||
|
from_count: int = 0,
|
||||||
|
size: int = 1,
|
||||||
|
fields: Optional[list] = None,
|
||||||
|
) -> Optional[EntityReference]:
|
||||||
|
"""
|
||||||
|
Get a User or Team Entity Reference by searching by its mail
|
||||||
|
"""
|
||||||
|
maybe_user = self._search_by_email(
|
||||||
|
entity=User, email=email, from_count=from_count, size=size, fields=fields
|
||||||
|
)
|
||||||
|
if maybe_user:
|
||||||
|
return EntityReference(
|
||||||
|
id=maybe_user.id.__root__, type=ENTITY_REFERENCE_TYPE_MAP[User.__name__]
|
||||||
|
)
|
||||||
|
|
||||||
|
maybe_team = self._search_by_email(
|
||||||
|
entity=Team, email=email, from_count=from_count, size=size, fields=fields
|
||||||
|
)
|
||||||
|
if maybe_team:
|
||||||
|
return EntityReference(
|
||||||
|
id=maybe_team.id.__root__, type=ENTITY_REFERENCE_TYPE_MAP[Team.__name__]
|
||||||
|
)
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
@lru_cache(maxsize=None)
|
||||||
|
def get_reference_by_name(
|
||||||
|
self,
|
||||||
|
name: Optional[str],
|
||||||
|
from_count: int = 0,
|
||||||
|
size: int = 1,
|
||||||
|
fields: Optional[list] = None,
|
||||||
|
) -> Optional[EntityReference]:
|
||||||
|
"""
|
||||||
|
Get a User or Team Entity Reference by searching by its name
|
||||||
|
"""
|
||||||
|
maybe_user = self._search_by_name(
|
||||||
|
entity=User, name=name, from_count=from_count, size=size, fields=fields
|
||||||
|
)
|
||||||
|
if maybe_user:
|
||||||
|
return EntityReference(
|
||||||
|
id=maybe_user.id.__root__, type=ENTITY_REFERENCE_TYPE_MAP[User.__name__]
|
||||||
|
)
|
||||||
|
|
||||||
|
maybe_team = self._search_by_name(
|
||||||
|
entity=Team, name=name, from_count=from_count, size=size, fields=fields
|
||||||
|
)
|
||||||
|
if maybe_team:
|
||||||
|
return EntityReference(
|
||||||
|
id=maybe_team.id.__root__, type=ENTITY_REFERENCE_TYPE_MAP[Team.__name__]
|
||||||
)
|
)
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
@ -100,12 +100,7 @@ class DomodashboardSource(DashboardServiceSource):
|
|||||||
try:
|
try:
|
||||||
owner_details = self.client.domo.users_get(owner.id)
|
owner_details = self.client.domo.users_get(owner.id)
|
||||||
if owner_details.get("email"):
|
if owner_details.get("email"):
|
||||||
user = self.metadata.get_user_by_email(owner_details["email"])
|
return self.metadata.get_reference_by_email(owner_details["email"])
|
||||||
if user:
|
|
||||||
return EntityReference(id=user.id.__root__, type="user")
|
|
||||||
logger.warning(
|
|
||||||
f"No user found with email [{owner_details['email']}] in OMD"
|
|
||||||
)
|
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"Error while getting details of user {owner.displayName} - {exc}"
|
f"Error while getting details of user {owner.displayName} - {exc}"
|
||||||
|
@ -634,9 +634,7 @@ class LookerSource(DashboardServiceSource):
|
|||||||
try:
|
try:
|
||||||
if dashboard_details.user_id is not None:
|
if dashboard_details.user_id is not None:
|
||||||
dashboard_owner = self.client.user(dashboard_details.user_id)
|
dashboard_owner = self.client.user(dashboard_details.user_id)
|
||||||
user = self.metadata.get_user_by_email(dashboard_owner.email)
|
return self.metadata.get_reference_by_email(dashboard_owner.email)
|
||||||
if user:
|
|
||||||
return EntityReference(id=user.id.__root__, type="user")
|
|
||||||
|
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
logger.debug(traceback.format_exc())
|
logger.debug(traceback.format_exc())
|
||||||
|
@ -109,11 +109,9 @@ class RedashSource(DashboardServiceSource):
|
|||||||
def get_owner_details(self, dashboard_details) -> Optional[EntityReference]:
|
def get_owner_details(self, dashboard_details) -> Optional[EntityReference]:
|
||||||
"""Get owner from mail"""
|
"""Get owner from mail"""
|
||||||
if dashboard_details.get("user") and dashboard_details["user"].get("email"):
|
if dashboard_details.get("user") and dashboard_details["user"].get("email"):
|
||||||
user = self.metadata.get_user_by_email(
|
return self.metadata.get_reference_by_email(
|
||||||
dashboard_details["user"].get("email")
|
dashboard_details["user"].get("email")
|
||||||
)
|
)
|
||||||
if user:
|
|
||||||
return EntityReference(id=user.id.__root__, type="user")
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def get_dashboard_url(self, dashboard_details: dict) -> str:
|
def get_dashboard_url(self, dashboard_details: dict) -> str:
|
||||||
|
@ -97,10 +97,7 @@ class SupersetSourceMixin(DashboardServiceSource):
|
|||||||
|
|
||||||
def _get_user_by_email(self, email: Optional[str]) -> Optional[EntityReference]:
|
def _get_user_by_email(self, email: Optional[str]) -> Optional[EntityReference]:
|
||||||
if email:
|
if email:
|
||||||
user = self.metadata.get_user_by_email(email)
|
return self.metadata.get_reference_by_email(email)
|
||||||
if user:
|
|
||||||
return EntityReference(id=user.id.__root__, type="user")
|
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def get_owner_details(
|
def get_owner_details(
|
||||||
|
@ -158,9 +158,7 @@ class TableauSource(DashboardServiceSource):
|
|||||||
) -> Optional[EntityReference]:
|
) -> Optional[EntityReference]:
|
||||||
"""Get dashboard owner from email"""
|
"""Get dashboard owner from email"""
|
||||||
if dashboard_details.owner and dashboard_details.owner.email:
|
if dashboard_details.owner and dashboard_details.owner.email:
|
||||||
user = self.metadata.get_user_by_email(dashboard_details.owner.email)
|
return self.metadata.get_reference_by_email(dashboard_details.owner.email)
|
||||||
if user:
|
|
||||||
return EntityReference(id=user.id.__root__, type="user")
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def yield_tag(self, *_, **__) -> Iterable[Either[OMetaTagAndClassification]]:
|
def yield_tag(self, *_, **__) -> Iterable[Either[OMetaTagAndClassification]]:
|
||||||
|
@ -166,9 +166,7 @@ class DomodatabaseSource(DatabaseServiceSource):
|
|||||||
try:
|
try:
|
||||||
owner_details = User(**self.domo_client.users_get(owner.id))
|
owner_details = User(**self.domo_client.users_get(owner.id))
|
||||||
if owner_details.email:
|
if owner_details.email:
|
||||||
user = self.metadata.get_user_by_email(owner_details.email)
|
return self.metadata.get_reference_by_email(owner_details.email)
|
||||||
if user:
|
|
||||||
return EntityReference(id=user.id.__root__, type="user")
|
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.warning(f"Error while getting details of user {owner.name} - {exc}")
|
logger.warning(f"Error while getting details of user {owner.name} - {exc}")
|
||||||
return None
|
return None
|
||||||
|
@ -1054,9 +1054,9 @@ class SampleDataSource(
|
|||||||
for pipeline in self.pipelines["pipelines"]:
|
for pipeline in self.pipelines["pipelines"]:
|
||||||
owner = None
|
owner = None
|
||||||
if pipeline.get("owner"):
|
if pipeline.get("owner"):
|
||||||
user = self.metadata.get_user_by_email(email=pipeline.get("owner"))
|
owner = self.metadata.get_reference_by_email(
|
||||||
if user:
|
email=pipeline.get("owner")
|
||||||
owner = EntityReference(id=user.id.__root__, type="user")
|
)
|
||||||
pipeline_ev = CreatePipelineRequest(
|
pipeline_ev = CreatePipelineRequest(
|
||||||
name=pipeline["name"],
|
name=pipeline["name"],
|
||||||
displayName=pipeline["displayName"],
|
displayName=pipeline["displayName"],
|
||||||
|
@ -12,6 +12,7 @@
|
|||||||
Airflow source to extract metadata from OM UI
|
Airflow source to extract metadata from OM UI
|
||||||
"""
|
"""
|
||||||
import traceback
|
import traceback
|
||||||
|
from collections import Counter
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Iterable, List, Optional, cast
|
from typing import Iterable, List, Optional, cast
|
||||||
|
|
||||||
@ -98,7 +99,7 @@ class AirflowSource(PipelineServiceSource):
|
|||||||
self._session = None
|
self._session = None
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def create(cls, config_dict, metadata: OpenMetadata):
|
def create(cls, config_dict, metadata: OpenMetadata) -> "AirflowSource":
|
||||||
config: WorkflowSource = WorkflowSource.parse_obj(config_dict)
|
config: WorkflowSource = WorkflowSource.parse_obj(config_dict)
|
||||||
connection: AirflowConnection = config.serviceConnection.__root__.config
|
connection: AirflowConnection = config.serviceConnection.__root__.config
|
||||||
if not isinstance(connection, AirflowConnection):
|
if not isinstance(connection, AirflowConnection):
|
||||||
@ -283,7 +284,7 @@ class AirflowSource(PipelineServiceSource):
|
|||||||
start_date=data.get("start_date", None),
|
start_date=data.get("start_date", None),
|
||||||
tasks=data.get("tasks", []),
|
tasks=data.get("tasks", []),
|
||||||
schedule_interval=get_schedule_interval(data),
|
schedule_interval=get_schedule_interval(data),
|
||||||
owners=self.fetch_owners(data),
|
owner=self.fetch_dag_owners(data),
|
||||||
)
|
)
|
||||||
|
|
||||||
yield dag
|
yield dag
|
||||||
@ -296,12 +297,29 @@ class AirflowSource(PipelineServiceSource):
|
|||||||
logger.debug(traceback.format_exc())
|
logger.debug(traceback.format_exc())
|
||||||
logger.warning(f"Wild error yielding dag {serialized_dag} - {err}")
|
logger.warning(f"Wild error yielding dag {serialized_dag} - {err}")
|
||||||
|
|
||||||
def fetch_owners(self, data) -> Optional[str]:
|
def fetch_dag_owners(self, data) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
In Airflow, ownership is defined as:
|
||||||
|
- `default_args`: Applied to all tasks and available on the DAG payload
|
||||||
|
- `owners`: Applied at the tasks. In Airflow's source code, DAG ownership is then a
|
||||||
|
list joined with the owners of all the tasks.
|
||||||
|
|
||||||
|
We will pick the owner from the tasks that appears in most tasks.
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
if self.source_config.includeOwners and data.get("default_args"):
|
if self.source_config.includeOwners:
|
||||||
return data.get("default_args", [])["__var"].get("email", [])
|
task_owners = [
|
||||||
except TypeError:
|
task.get("owner")
|
||||||
pass
|
for task in data.get("tasks", [])
|
||||||
|
if task.get("owner") is not None
|
||||||
|
]
|
||||||
|
if task_owners:
|
||||||
|
most_common_owner, _ = Counter(task_owners).most_common(1)[0]
|
||||||
|
return most_common_owner
|
||||||
|
except Exception as exc:
|
||||||
|
self.status.warning(
|
||||||
|
data.get("dag_id"), f"Could not extract owner information due to {exc}"
|
||||||
|
)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def get_pipeline_name(self, pipeline_details: SerializedDAG) -> str:
|
def get_pipeline_name(self, pipeline_details: SerializedDAG) -> str:
|
||||||
@ -310,8 +328,7 @@ class AirflowSource(PipelineServiceSource):
|
|||||||
"""
|
"""
|
||||||
return pipeline_details.dag_id
|
return pipeline_details.dag_id
|
||||||
|
|
||||||
@staticmethod
|
def get_tasks_from_dag(self, dag: AirflowDagDetails, host_port: str) -> List[Task]:
|
||||||
def get_tasks_from_dag(dag: AirflowDagDetails, host_port: str) -> List[Task]:
|
|
||||||
"""
|
"""
|
||||||
Obtain the tasks from a SerializedDAG
|
Obtain the tasks from a SerializedDAG
|
||||||
:param dag: AirflowDagDetails
|
:param dag: AirflowDagDetails
|
||||||
@ -332,28 +349,26 @@ class AirflowSource(PipelineServiceSource):
|
|||||||
startDate=task.start_date.isoformat() if task.start_date else None,
|
startDate=task.start_date.isoformat() if task.start_date else None,
|
||||||
endDate=task.end_date.isoformat() if task.end_date else None,
|
endDate=task.end_date.isoformat() if task.end_date else None,
|
||||||
taskType=task.task_type,
|
taskType=task.task_type,
|
||||||
|
owner=self.get_owner(task.owner),
|
||||||
)
|
)
|
||||||
for task in cast(Iterable[BaseOperator], dag.tasks)
|
for task in cast(Iterable[BaseOperator], dag.tasks)
|
||||||
]
|
]
|
||||||
|
|
||||||
def get_user_details(self, email) -> Optional[EntityReference]:
|
def get_owner(self, owner) -> Optional[EntityReference]:
|
||||||
user = self.metadata.get_user_by_email(email=email)
|
"""
|
||||||
if user:
|
Fetching users by name via ES to keep things as fast as possible.
|
||||||
return EntityReference(id=user.id.__root__, type="user")
|
|
||||||
return None
|
|
||||||
|
|
||||||
def get_owner(self, owners) -> Optional[EntityReference]:
|
We use the `owner` field since it's the onw used by Airflow to showcase
|
||||||
|
the info in its UI. In other connectors we might use the mail (e.g., in Looker),
|
||||||
|
but we use name here to be consistent with Airflow itself.
|
||||||
|
|
||||||
|
If data is not indexed, we can live without this information
|
||||||
|
until the next run.
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
if isinstance(owners, str) and owners:
|
return self.metadata.get_reference_by_name(name=owner)
|
||||||
return self.get_user_details(email=owners)
|
|
||||||
|
|
||||||
if isinstance(owners, List) and owners:
|
|
||||||
for owner in owners or []:
|
|
||||||
return self.get_user_details(email=owner)
|
|
||||||
|
|
||||||
logger.debug(f"No user found with email [{owners}] in OMD")
|
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.warning(f"Error while getting details of user {owners} - {exc}")
|
logger.warning(f"Error while getting details of user {owner} - {exc}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def yield_pipeline(
|
def yield_pipeline(
|
||||||
@ -380,7 +395,7 @@ class AirflowSource(PipelineServiceSource):
|
|||||||
pipeline_details, self.service_connection.hostPort
|
pipeline_details, self.service_connection.hostPort
|
||||||
),
|
),
|
||||||
service=self.context.pipeline_service,
|
service=self.context.pipeline_service,
|
||||||
owner=self.get_owner(pipeline_details.owners),
|
owner=self.get_owner(pipeline_details.owner),
|
||||||
scheduleInterval=pipeline_details.schedule_interval,
|
scheduleInterval=pipeline_details.schedule_interval,
|
||||||
)
|
)
|
||||||
yield Either(right=pipeline_request)
|
yield Either(right=pipeline_request)
|
||||||
|
@ -31,7 +31,7 @@ class AirflowBaseModel(BaseModel):
|
|||||||
dag_id: str
|
dag_id: str
|
||||||
|
|
||||||
|
|
||||||
class Task(BaseModel):
|
class AirflowTask(BaseModel):
|
||||||
pool: Optional[str]
|
pool: Optional[str]
|
||||||
doc_md: Optional[str]
|
doc_md: Optional[str]
|
||||||
inlets: Optional[List[Any]] = Field(alias="_inlets")
|
inlets: Optional[List[Any]] = Field(alias="_inlets")
|
||||||
@ -41,6 +41,7 @@ class Task(BaseModel):
|
|||||||
downstream_task_ids: Optional[List[str]]
|
downstream_task_ids: Optional[List[str]]
|
||||||
start_date: Optional[datetime]
|
start_date: Optional[datetime]
|
||||||
end_date: Optional[datetime]
|
end_date: Optional[datetime]
|
||||||
|
owner: Optional[str]
|
||||||
|
|
||||||
# Allow picking up data from key `inlets` and `_inlets`
|
# Allow picking up data from key `inlets` and `_inlets`
|
||||||
class Config:
|
class Config:
|
||||||
@ -48,7 +49,7 @@ class Task(BaseModel):
|
|||||||
|
|
||||||
|
|
||||||
class TaskList(BaseModel):
|
class TaskList(BaseModel):
|
||||||
__root__: List[Task]
|
__root__: List[AirflowTask]
|
||||||
|
|
||||||
|
|
||||||
class Dag(BaseModel):
|
class Dag(BaseModel):
|
||||||
@ -68,6 +69,6 @@ class AirflowDagDetails(AirflowBaseModel):
|
|||||||
max_active_runs: Optional[int]
|
max_active_runs: Optional[int]
|
||||||
description: Optional[str]
|
description: Optional[str]
|
||||||
start_date: Optional[datetime]
|
start_date: Optional[datetime]
|
||||||
tasks: List[Task]
|
tasks: List[AirflowTask]
|
||||||
owners: Optional[Any]
|
owner: Optional[str]
|
||||||
schedule_interval: Optional[str]
|
schedule_interval: Optional[str]
|
||||||
|
@ -32,6 +32,8 @@ from metadata.generated.schema.entity.services.mlmodelService import MlModelServ
|
|||||||
from metadata.generated.schema.entity.services.pipelineService import PipelineService
|
from metadata.generated.schema.entity.services.pipelineService import PipelineService
|
||||||
from metadata.generated.schema.entity.services.searchService import SearchService
|
from metadata.generated.schema.entity.services.searchService import SearchService
|
||||||
from metadata.generated.schema.entity.services.storageService import StorageService
|
from metadata.generated.schema.entity.services.storageService import StorageService
|
||||||
|
from metadata.generated.schema.entity.teams.team import Team
|
||||||
|
from metadata.generated.schema.entity.teams.user import User
|
||||||
|
|
||||||
DOT = "_DOT_"
|
DOT = "_DOT_"
|
||||||
TEN_MIN = 10 * 60
|
TEN_MIN = 10 * 60
|
||||||
@ -92,4 +94,7 @@ ENTITY_REFERENCE_TYPE_MAP = {
|
|||||||
SearchIndex.__name__: "searchIndex",
|
SearchIndex.__name__: "searchIndex",
|
||||||
MlModel.__name__: "mlmodel",
|
MlModel.__name__: "mlmodel",
|
||||||
Container.__name__: "container",
|
Container.__name__: "container",
|
||||||
|
# User Entities
|
||||||
|
User.__name__: "user",
|
||||||
|
Team.__name__: "team",
|
||||||
}
|
}
|
||||||
|
@ -15,15 +15,12 @@ import logging
|
|||||||
import time
|
import time
|
||||||
from unittest import TestCase
|
from unittest import TestCase
|
||||||
|
|
||||||
|
from metadata.generated.schema.api.teams.createTeam import CreateTeamRequest
|
||||||
from metadata.generated.schema.api.teams.createUser import CreateUserRequest
|
from metadata.generated.schema.api.teams.createUser import CreateUserRequest
|
||||||
from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import (
|
from metadata.generated.schema.entity.teams.team import Team, TeamType
|
||||||
OpenMetadataConnection,
|
|
||||||
)
|
|
||||||
from metadata.generated.schema.entity.teams.user import User
|
from metadata.generated.schema.entity.teams.user import User
|
||||||
from metadata.generated.schema.security.client.openMetadataJWTClientConfig import (
|
|
||||||
OpenMetadataJWTClientConfig,
|
from ..integration_base import int_admin_ometa
|
||||||
)
|
|
||||||
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
|
||||||
|
|
||||||
|
|
||||||
class OMetaUserTest(TestCase):
|
class OMetaUserTest(TestCase):
|
||||||
@ -32,16 +29,7 @@ class OMetaUserTest(TestCase):
|
|||||||
Install the ingestion package before running the tests
|
Install the ingestion package before running the tests
|
||||||
"""
|
"""
|
||||||
|
|
||||||
server_config = OpenMetadataConnection(
|
metadata = int_admin_ometa()
|
||||||
hostPort="http://localhost:8585/api",
|
|
||||||
authProvider="openmetadata",
|
|
||||||
securityConfig=OpenMetadataJWTClientConfig(
|
|
||||||
jwtToken="eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg"
|
|
||||||
),
|
|
||||||
)
|
|
||||||
metadata = OpenMetadata(server_config)
|
|
||||||
|
|
||||||
assert metadata.health_check()
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def check_es_index(cls) -> None:
|
def check_es_index(cls) -> None:
|
||||||
@ -67,9 +55,15 @@ class OMetaUserTest(TestCase):
|
|||||||
Prepare ingredients
|
Prepare ingredients
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
cls.team: Team = cls.metadata.create_or_update(
|
||||||
|
data=CreateTeamRequest(
|
||||||
|
teamType=TeamType.Group, name="ops.team", email="ops.team@getcollate.io"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
cls.user_1: User = cls.metadata.create_or_update(
|
cls.user_1: User = cls.metadata.create_or_update(
|
||||||
data=CreateUserRequest(
|
data=CreateUserRequest(
|
||||||
name="random.user", email="random.user@getcollate.io"
|
name="random.user.es", email="random.user.es@getcollate.io"
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -102,34 +96,92 @@ class OMetaUserTest(TestCase):
|
|||||||
hard_delete=True,
|
hard_delete=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
cls.metadata.delete(
|
||||||
|
entity=User,
|
||||||
|
entity_id=cls.user_3.id,
|
||||||
|
hard_delete=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
cls.metadata.delete(
|
||||||
|
entity=Team,
|
||||||
|
entity_id=cls.team.id,
|
||||||
|
hard_delete=True,
|
||||||
|
)
|
||||||
|
|
||||||
def test_es_search_from_email(self):
|
def test_es_search_from_email(self):
|
||||||
"""
|
"""
|
||||||
We can fetch users by its email
|
We can fetch users by its email
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# No email returns None
|
# No email returns None
|
||||||
self.assertIsNone(self.metadata.get_user_by_email(email=None))
|
self.assertIsNone(self.metadata.get_reference_by_email(email=None))
|
||||||
|
|
||||||
# Non existing email returns None
|
# Non existing email returns None
|
||||||
self.assertIsNone(
|
self.assertIsNone(
|
||||||
self.metadata.get_user_by_email(email="idonotexist@random.com")
|
self.metadata.get_reference_by_email(email="idonotexist@random.com")
|
||||||
)
|
)
|
||||||
|
|
||||||
# Non existing email returns, even if they have the same domain
|
# Non existing email returns, even if they have the same domain
|
||||||
# To get this fixed, we had to update the `email` field in the
|
# To get this fixed, we had to update the `email` field in the
|
||||||
# index as a `keyword` and search by `email.keyword` in ES.
|
# index as a `keyword` and search by `email.keyword` in ES.
|
||||||
self.assertIsNone(
|
self.assertIsNone(
|
||||||
self.metadata.get_user_by_email(email="idonotexist@getcollate.io")
|
self.metadata.get_reference_by_email(email="idonotexist@getcollate.io")
|
||||||
)
|
)
|
||||||
|
|
||||||
# I can get User 1, who has the name equal to its email
|
# I can get User 1, who has the name equal to its email
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
self.user_1.id,
|
self.user_1.id,
|
||||||
self.metadata.get_user_by_email(email="random.user@getcollate.io").id,
|
self.metadata.get_reference_by_email(
|
||||||
|
email="random.user.es@getcollate.io"
|
||||||
|
).id,
|
||||||
)
|
)
|
||||||
|
|
||||||
# I can get User 2, who has an email not matching the name
|
# I can get User 2, who has an email not matching the name
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
self.user_2.id,
|
self.user_2.id,
|
||||||
self.metadata.get_user_by_email(email="user2.1234@getcollate.io").id,
|
self.metadata.get_reference_by_email(email="user2.1234@getcollate.io").id,
|
||||||
|
)
|
||||||
|
|
||||||
|
# I can get the team by its mail
|
||||||
|
self.assertEqual(
|
||||||
|
self.team.id,
|
||||||
|
self.metadata.get_reference_by_email(email="ops.team@getcollate.io").id,
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_es_search_from_name(self):
|
||||||
|
"""
|
||||||
|
We can fetch users by its name
|
||||||
|
"""
|
||||||
|
# No email returns None
|
||||||
|
self.assertIsNone(self.metadata.get_reference_by_name(name=None))
|
||||||
|
|
||||||
|
# Non existing email returns None
|
||||||
|
self.assertIsNone(self.metadata.get_reference_by_name(name="idonotexist"))
|
||||||
|
|
||||||
|
# We can get the user matching its name
|
||||||
|
self.assertEqual(
|
||||||
|
self.user_1.id,
|
||||||
|
self.metadata.get_reference_by_name(name="random.user.es").id,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Casing does not matter
|
||||||
|
self.assertEqual(
|
||||||
|
self.user_2.id,
|
||||||
|
self.metadata.get_reference_by_name(name="levy").id,
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertEqual(
|
||||||
|
self.user_2.id,
|
||||||
|
self.metadata.get_reference_by_name(name="Levy").id,
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertEqual(
|
||||||
|
self.user_1.id,
|
||||||
|
self.metadata.get_reference_by_name(name="Random User Es").id,
|
||||||
|
)
|
||||||
|
|
||||||
|
# I can get the team by its name
|
||||||
|
self.assertEqual(
|
||||||
|
self.team.id,
|
||||||
|
self.metadata.get_reference_by_name(name="OPS Team").id,
|
||||||
)
|
)
|
||||||
|
@ -253,9 +253,8 @@ class LookerUnitTest(TestCase):
|
|||||||
ref = EntityReference(id=uuid.uuid4(), type="user")
|
ref = EntityReference(id=uuid.uuid4(), type="user")
|
||||||
|
|
||||||
with patch.object(Looker40SDK, "user", return_value=MOCK_USER), patch.object(
|
with patch.object(Looker40SDK, "user", return_value=MOCK_USER), patch.object(
|
||||||
# This does not really return a ref, but for simplicity
|
|
||||||
OpenMetadata,
|
OpenMetadata,
|
||||||
"get_user_by_email",
|
"get_reference_by_email",
|
||||||
return_value=ref,
|
return_value=ref,
|
||||||
):
|
):
|
||||||
self.assertEqual(self.looker.get_owner_details(MOCK_LOOKER_DASHBOARD), ref)
|
self.assertEqual(self.looker.get_owner_details(MOCK_LOOKER_DASHBOARD), ref)
|
||||||
|
@ -12,33 +12,56 @@
|
|||||||
Test Airflow processing
|
Test Airflow processing
|
||||||
"""
|
"""
|
||||||
from unittest import TestCase
|
from unittest import TestCase
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
from metadata.generated.schema.metadataIngestion.workflow import (
|
||||||
|
OpenMetadataWorkflowConfig,
|
||||||
|
)
|
||||||
|
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
||||||
|
from metadata.ingestion.source.pipeline.airflow.metadata import AirflowSource
|
||||||
from metadata.ingestion.source.pipeline.airflow.models import (
|
from metadata.ingestion.source.pipeline.airflow.models import (
|
||||||
AirflowDag,
|
AirflowDag,
|
||||||
AirflowDagDetails,
|
AirflowDagDetails,
|
||||||
)
|
)
|
||||||
from metadata.ingestion.source.pipeline.airflow.utils import get_schedule_interval
|
from metadata.ingestion.source.pipeline.airflow.utils import get_schedule_interval
|
||||||
|
|
||||||
|
MOCK_CONFIG = {
|
||||||
|
"source": {
|
||||||
|
"type": "airflow",
|
||||||
|
"serviceName": "test_airflow",
|
||||||
|
"serviceConnection": {
|
||||||
|
"config": {
|
||||||
|
"type": "Airflow",
|
||||||
|
"hostPort": "https://localhost:8080",
|
||||||
|
"connection": {"type": "Backend"},
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"sourceConfig": {
|
||||||
|
"config": {
|
||||||
|
"type": "PipelineMetadata",
|
||||||
|
"includeOwners": True,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"sink": {"type": "metadata-rest", "config": {}},
|
||||||
|
"workflowConfig": {
|
||||||
|
"openMetadataServerConfig": {
|
||||||
|
"hostPort": "http://localhost:8585/api",
|
||||||
|
"authProvider": "openmetadata",
|
||||||
|
"securityConfig": {"jwtToken": "token"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
class TestAirflow(TestCase):
|
|
||||||
"""
|
|
||||||
Test Airflow model processing
|
|
||||||
"""
|
|
||||||
|
|
||||||
def test_parsing(self):
|
SERIALIZED_DAG = {
|
||||||
"""
|
|
||||||
We can properly pick up Airflow's payload and convert
|
|
||||||
it to our models
|
|
||||||
"""
|
|
||||||
|
|
||||||
serialized_dag = {
|
|
||||||
"__version": 1,
|
"__version": 1,
|
||||||
"dag": {
|
"dag": {
|
||||||
"_dag_id": "test-lineage-253",
|
"_dag_id": "test-lineage-253",
|
||||||
"fileloc": "/opt/airflow/dags/lineage-test.py",
|
"fileloc": "/opt/airflow/dags/lineage-test.py",
|
||||||
"default_args": {
|
"default_args": {
|
||||||
"__var": {
|
"__var": {
|
||||||
"owner": "airflow",
|
"owner": "my_owner",
|
||||||
"depends_on_past": False,
|
"depends_on_past": False,
|
||||||
"email": ["airflow@example.com"],
|
"email": ["airflow@example.com"],
|
||||||
"email_on_failure": False,
|
"email_on_failure": False,
|
||||||
@ -74,7 +97,7 @@ class TestAirflow(TestCase):
|
|||||||
"_processor_dags_folder": "/opt/airflow/dags",
|
"_processor_dags_folder": "/opt/airflow/dags",
|
||||||
"tasks": [
|
"tasks": [
|
||||||
{
|
{
|
||||||
"owner": "airflow",
|
"owner": "another_owner",
|
||||||
"retry_delay": 1,
|
"retry_delay": 1,
|
||||||
"retries": 1,
|
"retries": 1,
|
||||||
"ui_color": "#e8f7e4",
|
"ui_color": "#e8f7e4",
|
||||||
@ -89,9 +112,7 @@ class TestAirflow(TestCase):
|
|||||||
"inlets": [
|
"inlets": [
|
||||||
{
|
{
|
||||||
"__var": {
|
"__var": {
|
||||||
"tables": [
|
"tables": ["sample_data.ecommerce_db.shopify.dim_location"]
|
||||||
"sample_data.ecommerce_db.shopify.dim_location"
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
"__type": "dict",
|
"__type": "dict",
|
||||||
}
|
}
|
||||||
@ -106,14 +127,12 @@ class TestAirflow(TestCase):
|
|||||||
"outlets": [
|
"outlets": [
|
||||||
{
|
{
|
||||||
"__var": {
|
"__var": {
|
||||||
"tables": [
|
"tables": ["sample_data.ecommerce_db.shopify.dim_staff"]
|
||||||
"sample_data.ecommerce_db.shopify.dim_staff"
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
"__type": "dict",
|
"__type": "dict",
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"owner": "airflow",
|
"owner": "another_owner",
|
||||||
"retry_delay": 1,
|
"retry_delay": 1,
|
||||||
"retries": 1,
|
"retries": 1,
|
||||||
"ui_color": "#e8f7e4",
|
"ui_color": "#e8f7e4",
|
||||||
@ -135,20 +154,46 @@ class TestAirflow(TestCase):
|
|||||||
"dag_dependencies": [],
|
"dag_dependencies": [],
|
||||||
"params": {},
|
"params": {},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
data = serialized_dag["dag"]
|
|
||||||
|
class TestAirflow(TestCase):
|
||||||
|
"""
|
||||||
|
Test Airflow model processing
|
||||||
|
"""
|
||||||
|
|
||||||
|
@patch(
|
||||||
|
"metadata.ingestion.source.pipeline.pipeline_service.PipelineServiceSource.test_connection"
|
||||||
|
)
|
||||||
|
def __init__(self, methodName, test_connection) -> None:
|
||||||
|
super().__init__(methodName)
|
||||||
|
test_connection.return_value = False
|
||||||
|
self.config = OpenMetadataWorkflowConfig.parse_obj(MOCK_CONFIG)
|
||||||
|
|
||||||
|
# This already validates that the source can be initialized
|
||||||
|
self.airflow: AirflowSource = AirflowSource.create(
|
||||||
|
MOCK_CONFIG["source"],
|
||||||
|
OpenMetadata(self.config.workflowConfig.openMetadataServerConfig),
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_parsing(self):
|
||||||
|
"""
|
||||||
|
We can properly pick up Airflow's payload and convert
|
||||||
|
it to our models
|
||||||
|
"""
|
||||||
|
|
||||||
|
data = SERIALIZED_DAG["dag"]
|
||||||
|
|
||||||
dag = AirflowDagDetails(
|
dag = AirflowDagDetails(
|
||||||
dag_id="id",
|
dag_id="id",
|
||||||
fileloc="loc",
|
fileloc="loc",
|
||||||
data=AirflowDag.parse_obj(serialized_dag),
|
data=AirflowDag.parse_obj(SERIALIZED_DAG),
|
||||||
max_active_runs=data.get("max_active_runs", None),
|
max_active_runs=data.get("max_active_runs", None),
|
||||||
description=data.get("_description", None),
|
description=data.get("_description", None),
|
||||||
start_date=data.get("start_date", None),
|
start_date=data.get("start_date", None),
|
||||||
tasks=data.get("tasks", []),
|
tasks=data.get("tasks", []),
|
||||||
schedule_interval=None,
|
schedule_interval=None,
|
||||||
owners=None,
|
owner=None,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
@ -172,6 +217,28 @@ class TestAirflow(TestCase):
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def test_get_dag_owners(self):
|
||||||
|
data = SERIALIZED_DAG["dag"]
|
||||||
|
|
||||||
|
# The owner will be the one appearing as owner in most of the tasks
|
||||||
|
self.assertEqual("another_owner", self.airflow.fetch_dag_owners(data))
|
||||||
|
|
||||||
|
# if we monkey-patch the data dict with tasks with different owner counts...
|
||||||
|
data = {
|
||||||
|
"tasks": [
|
||||||
|
{"owner": "my_owner"},
|
||||||
|
{"owner": "my_owner"},
|
||||||
|
{"owner": "another_owner"},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
self.assertEqual("my_owner", self.airflow.fetch_dag_owners(data))
|
||||||
|
|
||||||
|
# If there are no owners, return None
|
||||||
|
data = {
|
||||||
|
"tasks": [{"something": None}, {"another_thing": None}, {"random": None}]
|
||||||
|
}
|
||||||
|
self.assertIsNone(self.airflow.fetch_dag_owners(data))
|
||||||
|
|
||||||
def test_get_schedule_interval(self):
|
def test_get_schedule_interval(self):
|
||||||
"""
|
"""
|
||||||
Check the shape of different DAGs
|
Check the shape of different DAGs
|
||||||
|
@ -139,6 +139,7 @@ public final class Entity {
|
|||||||
public static final String DASHBOARD = "dashboard";
|
public static final String DASHBOARD = "dashboard";
|
||||||
public static final String DASHBOARD_DATA_MODEL = "dashboardDataModel";
|
public static final String DASHBOARD_DATA_MODEL = "dashboardDataModel";
|
||||||
public static final String PIPELINE = "pipeline";
|
public static final String PIPELINE = "pipeline";
|
||||||
|
public static final String TASK = "task";
|
||||||
public static final String CHART = "chart";
|
public static final String CHART = "chart";
|
||||||
public static final String APPLICATION = "app";
|
public static final String APPLICATION = "app";
|
||||||
public static final String APP_MARKET_PLACE_DEF = "appMarketPlaceDefinition";
|
public static final String APP_MARKET_PLACE_DEF = "appMarketPlaceDefinition";
|
||||||
|
@ -17,7 +17,9 @@ import static org.openmetadata.common.utils.CommonUtil.listOrEmpty;
|
|||||||
import static org.openmetadata.common.utils.CommonUtil.nullOrEmpty;
|
import static org.openmetadata.common.utils.CommonUtil.nullOrEmpty;
|
||||||
import static org.openmetadata.schema.type.Include.ALL;
|
import static org.openmetadata.schema.type.Include.ALL;
|
||||||
import static org.openmetadata.schema.type.Include.NON_DELETED;
|
import static org.openmetadata.schema.type.Include.NON_DELETED;
|
||||||
|
import static org.openmetadata.schema.type.Relationship.OWNS;
|
||||||
import static org.openmetadata.service.Entity.CONTAINER;
|
import static org.openmetadata.service.Entity.CONTAINER;
|
||||||
|
import static org.openmetadata.service.Entity.FIELD_OWNER;
|
||||||
import static org.openmetadata.service.Entity.FIELD_TAGS;
|
import static org.openmetadata.service.Entity.FIELD_TAGS;
|
||||||
import static org.openmetadata.service.resources.tags.TagLabelUtil.addDerivedTags;
|
import static org.openmetadata.service.resources.tags.TagLabelUtil.addDerivedTags;
|
||||||
import static org.openmetadata.service.resources.tags.TagLabelUtil.checkMutuallyExclusive;
|
import static org.openmetadata.service.resources.tags.TagLabelUtil.checkMutuallyExclusive;
|
||||||
@ -25,12 +27,14 @@ import static org.openmetadata.service.util.EntityUtil.taskMatch;
|
|||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import org.apache.commons.lang3.tuple.Triple;
|
||||||
import org.jdbi.v3.sqlobject.transaction.Transaction;
|
import org.jdbi.v3.sqlobject.transaction.Transaction;
|
||||||
import org.openmetadata.schema.EntityInterface;
|
import org.openmetadata.schema.EntityInterface;
|
||||||
import org.openmetadata.schema.api.feed.ResolveTask;
|
import org.openmetadata.schema.api.feed.ResolveTask;
|
||||||
import org.openmetadata.schema.entity.data.Pipeline;
|
import org.openmetadata.schema.entity.data.Pipeline;
|
||||||
import org.openmetadata.schema.entity.data.PipelineStatus;
|
import org.openmetadata.schema.entity.data.PipelineStatus;
|
||||||
import org.openmetadata.schema.entity.services.PipelineService;
|
import org.openmetadata.schema.entity.services.PipelineService;
|
||||||
|
import org.openmetadata.schema.entity.teams.User;
|
||||||
import org.openmetadata.schema.type.EntityReference;
|
import org.openmetadata.schema.type.EntityReference;
|
||||||
import org.openmetadata.schema.type.Include;
|
import org.openmetadata.schema.type.Include;
|
||||||
import org.openmetadata.schema.type.Status;
|
import org.openmetadata.schema.type.Status;
|
||||||
@ -133,6 +137,7 @@ public class PipelineRepository extends EntityRepository<Pipeline> {
|
|||||||
pipeline.setService(getContainer(pipeline.getId()));
|
pipeline.setService(getContainer(pipeline.getId()));
|
||||||
pipeline.setSourceHash(fields.contains("sourceHash") ? pipeline.getSourceHash() : null);
|
pipeline.setSourceHash(fields.contains("sourceHash") ? pipeline.getSourceHash() : null);
|
||||||
getTaskTags(fields.contains(FIELD_TAGS), pipeline.getTasks());
|
getTaskTags(fields.contains(FIELD_TAGS), pipeline.getTasks());
|
||||||
|
getTaskOwners(fields.contains(FIELD_OWNER), pipeline.getTasks());
|
||||||
pipeline.withPipelineStatus(
|
pipeline.withPipelineStatus(
|
||||||
fields.contains("pipelineStatus")
|
fields.contains("pipelineStatus")
|
||||||
? getPipelineStatus(pipeline)
|
? getPipelineStatus(pipeline)
|
||||||
@ -146,6 +151,14 @@ public class PipelineRepository extends EntityRepository<Pipeline> {
|
|||||||
fields.contains("pipelineStatus") ? pipeline.getPipelineStatus() : null);
|
fields.contains("pipelineStatus") ? pipeline.getPipelineStatus() : null);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void postDelete(Pipeline entity) {
|
||||||
|
// Cleanup all the fieldRelationship for task ownership. User -[owns]-> Task
|
||||||
|
for (Task task : listOrEmpty(entity.getTasks())) {
|
||||||
|
deleteTaskOwnerRelationship(task);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private PipelineStatus getPipelineStatus(Pipeline pipeline) {
|
private PipelineStatus getPipelineStatus(Pipeline pipeline) {
|
||||||
return JsonUtils.readValue(
|
return JsonUtils.readValue(
|
||||||
getLatestExtensionFromTimeSeries(
|
getLatestExtensionFromTimeSeries(
|
||||||
@ -232,6 +245,11 @@ public class PipelineRepository extends EntityRepository<Pipeline> {
|
|||||||
@Override
|
@Override
|
||||||
public void prepare(Pipeline pipeline, boolean update) {
|
public void prepare(Pipeline pipeline, boolean update) {
|
||||||
populateService(pipeline);
|
populateService(pipeline);
|
||||||
|
// Tasks can have owners
|
||||||
|
for (Task task : listOrEmpty(pipeline.getTasks())) {
|
||||||
|
EntityReference owner = validateOwner(task.getOwner());
|
||||||
|
task.setOwner(owner);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -241,15 +259,32 @@ public class PipelineRepository extends EntityRepository<Pipeline> {
|
|||||||
pipeline.withService(null);
|
pipeline.withService(null);
|
||||||
|
|
||||||
// Don't store column tags as JSON but build it on the fly based on relationships
|
// Don't store column tags as JSON but build it on the fly based on relationships
|
||||||
List<Task> taskWithTags = pipeline.getTasks();
|
List<Task> taskWithTagsAndOwners = pipeline.getTasks();
|
||||||
pipeline.setTasks(cloneWithoutTags(taskWithTags));
|
pipeline.setTasks(cloneWithoutTagsAndOwners(taskWithTagsAndOwners));
|
||||||
store(pipeline, update);
|
store(pipeline, update);
|
||||||
pipeline.withService(service).withTasks(taskWithTags);
|
pipeline.withService(service).withTasks(taskWithTagsAndOwners);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void storeRelationships(Pipeline pipeline) {
|
public void storeRelationships(Pipeline pipeline) {
|
||||||
addServiceRelationship(pipeline, pipeline.getService());
|
addServiceRelationship(pipeline, pipeline.getService());
|
||||||
|
|
||||||
|
for (Task task : listOrEmpty(pipeline.getTasks())) {
|
||||||
|
if (task.getOwner() != null) {
|
||||||
|
daoCollection
|
||||||
|
.fieldRelationshipDAO()
|
||||||
|
.insert(
|
||||||
|
FullyQualifiedName.buildHash(
|
||||||
|
task.getOwner().getFullyQualifiedName()), // from FQN hash
|
||||||
|
FullyQualifiedName.buildHash(task.getFullyQualifiedName()), // to FQN hash
|
||||||
|
task.getOwner().getFullyQualifiedName(), // from FQN
|
||||||
|
task.getFullyQualifiedName(), // to FQN
|
||||||
|
task.getOwner().getType(), // from type
|
||||||
|
Entity.TASK, // to type
|
||||||
|
OWNS.ordinal(),
|
||||||
|
null);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -288,6 +323,41 @@ public class PipelineRepository extends EntityRepository<Pipeline> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void getTaskOwners(boolean setOwner, List<Task> tasks) {
|
||||||
|
for (Task t : listOrEmpty(tasks)) {
|
||||||
|
if (t.getOwner() == null) {
|
||||||
|
t.setOwner(setOwner ? getTaskOwner(t.getFullyQualifiedName()) : t.getOwner());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private EntityReference getTaskOwner(String taskFullyQualifiedName) {
|
||||||
|
EntityReference ownerRef = null;
|
||||||
|
|
||||||
|
List<Triple<String, String, String>> owners =
|
||||||
|
daoCollection
|
||||||
|
.fieldRelationshipDAO()
|
||||||
|
.findFrom(
|
||||||
|
FullyQualifiedName.buildHash(taskFullyQualifiedName), Entity.TASK, OWNS.ordinal());
|
||||||
|
|
||||||
|
// Triple<fromFQN, fromType, json>
|
||||||
|
for (Triple<String, String, String> owner : owners) {
|
||||||
|
if (owner.getMiddle().equals(Entity.USER)) {
|
||||||
|
User user = daoCollection.userDAO().findEntityByName(owner.getLeft(), Include.NON_DELETED);
|
||||||
|
ownerRef =
|
||||||
|
new EntityReference()
|
||||||
|
.withId(user.getId())
|
||||||
|
.withName(user.getName())
|
||||||
|
.withFullyQualifiedName(user.getFullyQualifiedName())
|
||||||
|
.withDescription(user.getDescription())
|
||||||
|
.withDisplayName(user.getDisplayName())
|
||||||
|
.withHref(user.getHref())
|
||||||
|
.withDeleted(user.getDeleted());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ownerRef;
|
||||||
|
}
|
||||||
|
|
||||||
private void setTaskFQN(String parentFQN, List<Task> tasks) {
|
private void setTaskFQN(String parentFQN, List<Task> tasks) {
|
||||||
if (tasks != null) {
|
if (tasks != null) {
|
||||||
tasks.forEach(
|
tasks.forEach(
|
||||||
@ -320,16 +390,16 @@ public class PipelineRepository extends EntityRepository<Pipeline> {
|
|||||||
pipeline.setServiceType(service.getServiceType());
|
pipeline.setServiceType(service.getServiceType());
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<Task> cloneWithoutTags(List<Task> tasks) {
|
private List<Task> cloneWithoutTagsAndOwners(List<Task> tasks) {
|
||||||
if (nullOrEmpty(tasks)) {
|
if (nullOrEmpty(tasks)) {
|
||||||
return tasks;
|
return tasks;
|
||||||
}
|
}
|
||||||
List<Task> copy = new ArrayList<>();
|
List<Task> copy = new ArrayList<>();
|
||||||
tasks.forEach(t -> copy.add(cloneWithoutTags(t)));
|
tasks.forEach(t -> copy.add(cloneWithoutTagsAndOwners(t)));
|
||||||
return copy;
|
return copy;
|
||||||
}
|
}
|
||||||
|
|
||||||
private Task cloneWithoutTags(Task task) {
|
private Task cloneWithoutTagsAndOwners(Task task) {
|
||||||
return new Task()
|
return new Task()
|
||||||
.withDescription(task.getDescription())
|
.withDescription(task.getDescription())
|
||||||
.withName(task.getName())
|
.withName(task.getName())
|
||||||
@ -343,6 +413,20 @@ public class PipelineRepository extends EntityRepository<Pipeline> {
|
|||||||
.withEndDate(task.getEndDate());
|
.withEndDate(task.getEndDate());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected void deleteTaskOwnerRelationship(Task task) {
|
||||||
|
// If the deleted task has owners, we need to remove the field relationship
|
||||||
|
if (task.getOwner() != null) {
|
||||||
|
daoCollection
|
||||||
|
.fieldRelationshipDAO()
|
||||||
|
.delete(
|
||||||
|
FullyQualifiedName.buildHash(task.getOwner().getFullyQualifiedName()),
|
||||||
|
FullyQualifiedName.buildHash(task.getFullyQualifiedName()),
|
||||||
|
task.getOwner().getType(),
|
||||||
|
Entity.TASK,
|
||||||
|
OWNS.ordinal());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/** Handles entity updated from PUT and POST operation. */
|
/** Handles entity updated from PUT and POST operation. */
|
||||||
public class PipelineUpdater extends EntityUpdater {
|
public class PipelineUpdater extends EntityUpdater {
|
||||||
public PipelineUpdater(Pipeline original, Pipeline updated, Operation operation) {
|
public PipelineUpdater(Pipeline original, Pipeline updated, Operation operation) {
|
||||||
@ -400,7 +484,10 @@ public class PipelineRepository extends EntityRepository<Pipeline> {
|
|||||||
recordListChange(TASKS_FIELD, origTasks, updatedTasks, added, deleted, taskMatch);
|
recordListChange(TASKS_FIELD, origTasks, updatedTasks, added, deleted, taskMatch);
|
||||||
applyTaskTags(added);
|
applyTaskTags(added);
|
||||||
deleted.forEach(
|
deleted.forEach(
|
||||||
d -> daoCollection.tagUsageDAO().deleteTagsByTarget(d.getFullyQualifiedName()));
|
d -> {
|
||||||
|
daoCollection.tagUsageDAO().deleteTagsByTarget(d.getFullyQualifiedName());
|
||||||
|
deleteTaskOwnerRelationship(d);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -94,6 +94,15 @@
|
|||||||
"href": {
|
"href": {
|
||||||
"type": "text"
|
"type": "text"
|
||||||
},
|
},
|
||||||
|
"email": {
|
||||||
|
"type": "text",
|
||||||
|
"fields": {
|
||||||
|
"keyword": {
|
||||||
|
"type": "keyword",
|
||||||
|
"ignore_above": 256
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"domain" : {
|
"domain" : {
|
||||||
"properties": {
|
"properties": {
|
||||||
"id": {
|
"id": {
|
||||||
|
@ -227,6 +227,27 @@ public class PipelineResourceTest extends EntityResourceTest<Pipeline, CreatePip
|
|||||||
assertEquals("ta.sk", actualTask.getName());
|
assertEquals("ta.sk", actualTask.getName());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void post_pipelineWithTaskWithOwner(TestInfo test) throws IOException {
|
||||||
|
CreatePipeline create = createRequest(test);
|
||||||
|
Task task =
|
||||||
|
new Task()
|
||||||
|
.withName("task")
|
||||||
|
.withDescription("description")
|
||||||
|
.withSourceUrl("http://localhost:0")
|
||||||
|
.withOwner(USER1_REF);
|
||||||
|
create.setTasks(List.of(task));
|
||||||
|
Pipeline entity = createAndCheckEntity(create, ADMIN_AUTH_HEADERS);
|
||||||
|
Task actualTask = entity.getTasks().get(0);
|
||||||
|
assertEquals(USER1_REF.getName(), actualTask.getOwner().getName());
|
||||||
|
|
||||||
|
// We can GET the task retrieving the owner info
|
||||||
|
Pipeline storedPipeline =
|
||||||
|
getPipelineByName(entity.getFullyQualifiedName(), "owner,tasks", ADMIN_AUTH_HEADERS);
|
||||||
|
Task storedTask = storedPipeline.getTasks().get(0);
|
||||||
|
assertEquals(USER1_REF.getName(), storedTask.getOwner().getName());
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void put_PipelineUrlUpdate_200(TestInfo test) throws IOException {
|
void put_PipelineUrlUpdate_200(TestInfo test) throws IOException {
|
||||||
CreatePipeline request =
|
CreatePipeline request =
|
||||||
|
@ -117,6 +117,10 @@
|
|||||||
"$ref": "../../type/tagLabel.json"
|
"$ref": "../../type/tagLabel.json"
|
||||||
},
|
},
|
||||||
"default": null
|
"default": null
|
||||||
|
},
|
||||||
|
"owner": {
|
||||||
|
"description": "Owner of this task.",
|
||||||
|
"$ref": "../../type/entityReference.json"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"required": ["name"],
|
"required": ["name"],
|
||||||
|
Loading…
x
Reference in New Issue
Block a user