mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-11-15 02:13:44 +00:00
* Fix sample data DAG * Fix callback imports * Use --set-default-enum-member in generate * Format * Add faulty merge hard_delete * Fix airflow lineage, improve naming and fix lineage tests * Add mysql url test * Add mysql url test * Update CI name * Fix test ometa endpoint * Format * Fix metadata config
This commit is contained in:
parent
d429f0b868
commit
93525aea0d
2
Makefile
2
Makefile
@ -56,7 +56,7 @@ py_format_check: ## Check if Python sources are correctly formatted
|
|||||||
generate: ## Generate the pydantic models from the JSON Schemas to the ingestion module
|
generate: ## Generate the pydantic models from the JSON Schemas to the ingestion module
|
||||||
@echo "Running Datamodel Code Generator"
|
@echo "Running Datamodel Code Generator"
|
||||||
@echo "Make sure to first run the install_dev recipe"
|
@echo "Make sure to first run the install_dev recipe"
|
||||||
datamodel-codegen --input catalog-rest-service/src/main/resources/json --input-file-type jsonschema --output ingestion/src/metadata/generated
|
datamodel-codegen --input catalog-rest-service/src/main/resources/json --input-file-type jsonschema --output ingestion/src/metadata/generated --set-default-enum-member
|
||||||
$(MAKE) install
|
$(MAKE) install
|
||||||
|
|
||||||
## Ingestion tests & QA
|
## Ingestion tests & QA
|
||||||
|
|||||||
@ -33,7 +33,7 @@ curl --location --request PATCH 'localhost:8080/api/v1/dags/sample_data' \
|
|||||||
--data-raw '{
|
--data-raw '{
|
||||||
"is_paused": false
|
"is_paused": false
|
||||||
}'
|
}'
|
||||||
until curl -s -f -o /dev/null "http://localhost:8585/api/v1/tables/name/bigquery_gcp:shopify:fact_sale"; do
|
until curl -s -f -o /dev/null "http://localhost:8585/api/v1/tables/name/bigquery_gcp.ecommerce_db.shopify.fact_sale"; do
|
||||||
printf '.'
|
printf '.'
|
||||||
sleep 2
|
sleep 2
|
||||||
done
|
done
|
||||||
|
|||||||
@ -37,7 +37,11 @@ config = """
|
|||||||
"source": {
|
"source": {
|
||||||
"type": "sample-data",
|
"type": "sample-data",
|
||||||
"serviceName": "bigquery_gcp",
|
"serviceName": "bigquery_gcp",
|
||||||
"serviceConnection": "",
|
"serviceConnection": {
|
||||||
|
"config": {
|
||||||
|
"type": "BigQuery"
|
||||||
|
}
|
||||||
|
},
|
||||||
"sourceConfig": {}
|
"sourceConfig": {}
|
||||||
},
|
},
|
||||||
"sink": {
|
"sink": {
|
||||||
@ -46,14 +50,13 @@ config = """
|
|||||||
},
|
},
|
||||||
"workflowConfig": {
|
"workflowConfig": {
|
||||||
"openMetadataServerConfig": {
|
"openMetadataServerConfig": {
|
||||||
"api_endpoint": "http://localhost:8585/api",
|
"hostPort": "http://localhost:8585/api",
|
||||||
"auth_provider_type": "no-auth"
|
"authProvider": "no-auth"
|
||||||
},
|
|
||||||
"config": {
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -15,10 +15,7 @@ OpenMetadata Airflow Lineage Backend
|
|||||||
import logging
|
import logging
|
||||||
from typing import TYPE_CHECKING, Dict
|
from typing import TYPE_CHECKING, Dict
|
||||||
|
|
||||||
from airflow_provider_openmetadata.lineage.config import (
|
from airflow_provider_openmetadata.lineage.config.loader import get_lineage_config
|
||||||
get_lineage_config,
|
|
||||||
get_metadata_config,
|
|
||||||
)
|
|
||||||
from airflow_provider_openmetadata.lineage.utils import (
|
from airflow_provider_openmetadata.lineage.utils import (
|
||||||
add_status,
|
add_status,
|
||||||
get_xlets,
|
get_xlets,
|
||||||
@ -42,8 +39,7 @@ def failure_callback(context: Dict[str, str]) -> None:
|
|||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
config = get_lineage_config()
|
config = get_lineage_config()
|
||||||
metadata_config = get_metadata_config(config)
|
metadata = OpenMetadata(config.metadata_config)
|
||||||
client = OpenMetadata(metadata_config)
|
|
||||||
|
|
||||||
operator: "BaseOperator" = context["task"]
|
operator: "BaseOperator" = context["task"]
|
||||||
|
|
||||||
@ -54,13 +50,13 @@ def failure_callback(context: Dict[str, str]) -> None:
|
|||||||
|
|
||||||
# Get the pipeline created or updated during the lineage
|
# Get the pipeline created or updated during the lineage
|
||||||
pipeline = parse_lineage(
|
pipeline = parse_lineage(
|
||||||
config, context, operator, op_inlets, op_outlets, client
|
config, context, operator, op_inlets, op_outlets, metadata
|
||||||
)
|
)
|
||||||
|
|
||||||
add_status(
|
add_status(
|
||||||
operator=operator,
|
operator=operator,
|
||||||
pipeline=pipeline,
|
pipeline=pipeline,
|
||||||
client=client,
|
metadata=metadata,
|
||||||
context=context,
|
context=context,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -79,18 +75,17 @@ def success_callback(context: Dict[str, str]) -> None:
|
|||||||
try:
|
try:
|
||||||
|
|
||||||
config = get_lineage_config()
|
config = get_lineage_config()
|
||||||
metadata_config = get_metadata_config(config)
|
metadata = OpenMetadata(config.metadata_config)
|
||||||
client = OpenMetadata(metadata_config)
|
|
||||||
|
|
||||||
operator: "BaseOperator" = context["task"]
|
operator: "BaseOperator" = context["task"]
|
||||||
dag: "DAG" = context["dag"]
|
dag: "DAG" = context["dag"]
|
||||||
|
|
||||||
operator.log.info("Updating pipeline status on success...")
|
operator.log.info("Updating pipeline status on success...")
|
||||||
|
|
||||||
airflow_service_entity = client.get_by_name(
|
airflow_service_entity = metadata.get_by_name(
|
||||||
entity=PipelineService, fqdn=config.airflow_service_name
|
entity=PipelineService, fqdn=config.airflow_service_name
|
||||||
)
|
)
|
||||||
pipeline: Pipeline = client.get_by_name(
|
pipeline: Pipeline = metadata.get_by_name(
|
||||||
entity=Pipeline,
|
entity=Pipeline,
|
||||||
fqdn=f"{airflow_service_entity.name}.{dag.dag_id}",
|
fqdn=f"{airflow_service_entity.name}.{dag.dag_id}",
|
||||||
)
|
)
|
||||||
@ -98,7 +93,7 @@ def success_callback(context: Dict[str, str]) -> None:
|
|||||||
add_status(
|
add_status(
|
||||||
operator=operator,
|
operator=operator,
|
||||||
pipeline=pipeline,
|
pipeline=pipeline,
|
||||||
client=client,
|
metadata=metadata,
|
||||||
context=context,
|
context=context,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@ -79,12 +79,12 @@ class OpenMetadataLineageBackend(LineageBackend):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
config: AirflowLineageConfig = get_lineage_config()
|
config: AirflowLineageConfig = get_lineage_config()
|
||||||
client = OpenMetadata(config.metadata_config)
|
metadata = OpenMetadata(config.metadata_config)
|
||||||
|
|
||||||
op_inlets = get_xlets(operator, "_inlets")
|
op_inlets = get_xlets(operator, "_inlets")
|
||||||
op_outlets = get_xlets(operator, "_outlets")
|
op_outlets = get_xlets(operator, "_outlets")
|
||||||
|
|
||||||
parse_lineage(config, context, operator, op_inlets, op_outlets, client)
|
parse_lineage(config, context, operator, op_inlets, op_outlets, metadata)
|
||||||
except Exception as exc: # pylint: disable=broad-except
|
except Exception as exc: # pylint: disable=broad-except
|
||||||
operator.log.error(traceback.format_exc())
|
operator.log.error(traceback.format_exc())
|
||||||
operator.log.error(exc)
|
operator.log.error(exc)
|
||||||
|
|||||||
@ -130,7 +130,7 @@ def create_or_update_pipeline( # pylint: disable=too-many-locals
|
|||||||
operator: "BaseOperator",
|
operator: "BaseOperator",
|
||||||
dag: "DAG",
|
dag: "DAG",
|
||||||
airflow_service_entity: PipelineService,
|
airflow_service_entity: PipelineService,
|
||||||
client: OpenMetadata,
|
metadata: OpenMetadata,
|
||||||
) -> Pipeline:
|
) -> Pipeline:
|
||||||
"""
|
"""
|
||||||
Prepare the upsert of pipeline entity with the given task
|
Prepare the upsert of pipeline entity with the given task
|
||||||
@ -145,7 +145,7 @@ def create_or_update_pipeline( # pylint: disable=too-many-locals
|
|||||||
:param operator: task being examined by lineage
|
:param operator: task being examined by lineage
|
||||||
:param dag: airflow dag
|
:param dag: airflow dag
|
||||||
:param airflow_service_entity: PipelineService
|
:param airflow_service_entity: PipelineService
|
||||||
:param client: OpenMetadata API client
|
:param metadata: OpenMetadata API client
|
||||||
:return: PipelineEntity
|
:return: PipelineEntity
|
||||||
"""
|
"""
|
||||||
pipeline_service_url = conf.get("webserver", "base_url")
|
pipeline_service_url = conf.get("webserver", "base_url")
|
||||||
@ -182,9 +182,9 @@ def create_or_update_pipeline( # pylint: disable=too-many-locals
|
|||||||
operator.log.info(
|
operator.log.info(
|
||||||
f"Checking if the pipeline {airflow_service_entity.name}.{dag.dag_id} exists. If not, we will create it."
|
f"Checking if the pipeline {airflow_service_entity.name}.{dag.dag_id} exists. If not, we will create it."
|
||||||
)
|
)
|
||||||
current_pipeline: Pipeline = client.get_by_name(
|
current_pipeline: Pipeline = metadata.get_by_name(
|
||||||
entity=Pipeline,
|
entity=Pipeline,
|
||||||
fqdn=f"{airflow_service_entity.name}.{dag.dag_id}",
|
fqdn=f"{airflow_service_entity.name.__root__}.{dag.dag_id}",
|
||||||
fields=["tasks"],
|
fields=["tasks"],
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -206,16 +206,16 @@ def create_or_update_pipeline( # pylint: disable=too-many-locals
|
|||||||
owner=current_pipeline.owner if current_pipeline else None,
|
owner=current_pipeline.owner if current_pipeline else None,
|
||||||
tags=current_pipeline.tags if current_pipeline else None,
|
tags=current_pipeline.tags if current_pipeline else None,
|
||||||
)
|
)
|
||||||
pipeline = client.create_or_update(pipeline_request)
|
pipeline: Pipeline = metadata.create_or_update(pipeline_request)
|
||||||
|
|
||||||
# Add the task we are processing in the lineage backend
|
# Add the task we are processing in the lineage backend
|
||||||
operator.log.info("Adding tasks to pipeline...")
|
operator.log.info("Adding tasks to pipeline...")
|
||||||
updated_pipeline = client.add_task_to_pipeline(pipeline, task)
|
updated_pipeline = metadata.add_task_to_pipeline(pipeline, task)
|
||||||
|
|
||||||
# Clean pipeline
|
# Clean pipeline
|
||||||
try:
|
try:
|
||||||
operator.log.info("Cleaning pipeline tasks...")
|
operator.log.info("Cleaning pipeline tasks...")
|
||||||
updated_pipeline = client.clean_pipeline_tasks(updated_pipeline, dag.task_ids)
|
updated_pipeline = metadata.clean_pipeline_tasks(updated_pipeline, dag.task_ids)
|
||||||
except Exception as exc: # pylint: disable=broad-except
|
except Exception as exc: # pylint: disable=broad-except
|
||||||
operator.log.warning(f"Error cleaning pipeline tasks {exc}")
|
operator.log.warning(f"Error cleaning pipeline tasks {exc}")
|
||||||
|
|
||||||
@ -252,7 +252,7 @@ def get_dag_status(all_tasks: List[str], task_status: List[TaskStatus]):
|
|||||||
def add_status(
|
def add_status(
|
||||||
operator: "BaseOperator",
|
operator: "BaseOperator",
|
||||||
pipeline: Pipeline,
|
pipeline: Pipeline,
|
||||||
client: OpenMetadata,
|
metadata: OpenMetadata,
|
||||||
context: Dict,
|
context: Dict,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
@ -269,7 +269,7 @@ def add_status(
|
|||||||
|
|
||||||
# Check if we already have a pipelineStatus for
|
# Check if we already have a pipelineStatus for
|
||||||
# our execution_date that we should update
|
# our execution_date that we should update
|
||||||
pipeline_status: List[PipelineStatus] = client.get_by_id(
|
pipeline_status: List[PipelineStatus] = metadata.get_by_id(
|
||||||
entity=Pipeline, entity_id=pipeline.id, fields=["pipelineStatus"]
|
entity=Pipeline, entity_id=pipeline.id, fields=["pipelineStatus"]
|
||||||
).pipelineStatus
|
).pipelineStatus
|
||||||
|
|
||||||
@ -303,7 +303,7 @@ def add_status(
|
|||||||
)
|
)
|
||||||
|
|
||||||
operator.log.info(f"Added status to DAG {updated_status}")
|
operator.log.info(f"Added status to DAG {updated_status}")
|
||||||
client.add_pipeline_status(pipeline=pipeline, status=updated_status)
|
metadata.add_pipeline_status(pipeline=pipeline, status=updated_status)
|
||||||
|
|
||||||
|
|
||||||
# pylint: disable=too-many-arguments,too-many-locals
|
# pylint: disable=too-many-arguments,too-many-locals
|
||||||
@ -313,7 +313,7 @@ def parse_lineage(
|
|||||||
operator: "BaseOperator",
|
operator: "BaseOperator",
|
||||||
inlets: List,
|
inlets: List,
|
||||||
outlets: List,
|
outlets: List,
|
||||||
client: OpenMetadata,
|
metadata: OpenMetadata,
|
||||||
) -> Optional[Pipeline]:
|
) -> Optional[Pipeline]:
|
||||||
"""
|
"""
|
||||||
Main logic to extract properties from DAG and the
|
Main logic to extract properties from DAG and the
|
||||||
@ -325,7 +325,7 @@ def parse_lineage(
|
|||||||
:param operator: task being executed
|
:param operator: task being executed
|
||||||
:param inlets: list of upstream tables
|
:param inlets: list of upstream tables
|
||||||
:param outlets: list of downstream tables
|
:param outlets: list of downstream tables
|
||||||
:param client: OpenMetadata client
|
:param metadata: OpenMetadata client
|
||||||
"""
|
"""
|
||||||
operator.log.info("Parsing Lineage for OpenMetadata")
|
operator.log.info("Parsing Lineage for OpenMetadata")
|
||||||
|
|
||||||
@ -335,19 +335,19 @@ def parse_lineage(
|
|||||||
try:
|
try:
|
||||||
|
|
||||||
airflow_service_entity = get_or_create_pipeline_service(
|
airflow_service_entity = get_or_create_pipeline_service(
|
||||||
operator, client, config
|
operator, metadata, config
|
||||||
)
|
)
|
||||||
pipeline = create_or_update_pipeline(
|
pipeline = create_or_update_pipeline(
|
||||||
task_instance=task_instance,
|
task_instance=task_instance,
|
||||||
operator=operator,
|
operator=operator,
|
||||||
dag=dag,
|
dag=dag,
|
||||||
airflow_service_entity=airflow_service_entity,
|
airflow_service_entity=airflow_service_entity,
|
||||||
client=client,
|
metadata=metadata,
|
||||||
)
|
)
|
||||||
|
|
||||||
operator.log.info("Parsing Lineage")
|
operator.log.info("Parsing Lineage")
|
||||||
for table in inlets if inlets else []:
|
for table in inlets if inlets else []:
|
||||||
table_entity = client.get_by_name(entity=Table, fqdn=table)
|
table_entity = metadata.get_by_name(entity=Table, fqdn=table)
|
||||||
operator.log.debug(f"from entity {table_entity}")
|
operator.log.debug(f"from entity {table_entity}")
|
||||||
lineage = AddLineageRequest(
|
lineage = AddLineageRequest(
|
||||||
edge=EntitiesEdge(
|
edge=EntitiesEdge(
|
||||||
@ -356,10 +356,10 @@ def parse_lineage(
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
operator.log.debug(f"From lineage {lineage}")
|
operator.log.debug(f"From lineage {lineage}")
|
||||||
client.add_lineage(lineage)
|
metadata.add_lineage(lineage)
|
||||||
|
|
||||||
for table in outlets if outlets else []:
|
for table in outlets if outlets else []:
|
||||||
table_entity = client.get_by_name(entity=Table, fqdn=table)
|
table_entity = metadata.get_by_name(entity=Table, fqdn=table)
|
||||||
operator.log.debug(f"To entity {table_entity}")
|
operator.log.debug(f"To entity {table_entity}")
|
||||||
lineage = AddLineageRequest(
|
lineage = AddLineageRequest(
|
||||||
edge=EntitiesEdge(
|
edge=EntitiesEdge(
|
||||||
@ -368,7 +368,7 @@ def parse_lineage(
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
operator.log.debug(f"To lineage {lineage}")
|
operator.log.debug(f"To lineage {lineage}")
|
||||||
client.add_lineage(lineage)
|
metadata.add_lineage(lineage)
|
||||||
|
|
||||||
return pipeline
|
return pipeline
|
||||||
|
|
||||||
@ -382,19 +382,19 @@ def parse_lineage(
|
|||||||
|
|
||||||
|
|
||||||
def get_or_create_pipeline_service(
|
def get_or_create_pipeline_service(
|
||||||
operator: "BaseOperator", client: OpenMetadata, config: AirflowLineageConfig
|
operator: "BaseOperator", metadata: OpenMetadata, config: AirflowLineageConfig
|
||||||
) -> PipelineService:
|
) -> PipelineService:
|
||||||
"""
|
"""
|
||||||
Check if we already have the airflow instance as a PipelineService,
|
Check if we already have the airflow instance as a PipelineService,
|
||||||
otherwise create it.
|
otherwise create it.
|
||||||
|
|
||||||
:param operator: task from which we extract the lineage
|
:param operator: task from which we extract the lineage
|
||||||
:param client: OpenMetadata API wrapper
|
:param metadata: OpenMetadata API wrapper
|
||||||
:param config: lineage config
|
:param config: lineage config
|
||||||
:return: PipelineService
|
:return: PipelineService
|
||||||
"""
|
"""
|
||||||
operator.log.info("Get Airflow Service ID")
|
operator.log.info("Get Airflow Service ID")
|
||||||
airflow_service_entity = client.get_by_name(
|
airflow_service_entity = metadata.get_by_name(
|
||||||
entity=PipelineService, fqdn=config.airflow_service_name
|
entity=PipelineService, fqdn=config.airflow_service_name
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -404,7 +404,7 @@ def get_or_create_pipeline_service(
|
|||||||
serviceType=PipelineServiceType.Airflow,
|
serviceType=PipelineServiceType.Airflow,
|
||||||
pipelineUrl=conf.get("webserver", "base_url"),
|
pipelineUrl=conf.get("webserver", "base_url"),
|
||||||
)
|
)
|
||||||
airflow_service_entity = client.create_or_update(pipeline_service)
|
airflow_service_entity = metadata.create_or_update(pipeline_service)
|
||||||
operator.log.info("Created airflow service entity {}", airflow_service_entity)
|
operator.log.info("Created airflow service entity {}", airflow_service_entity)
|
||||||
|
|
||||||
return airflow_service_entity
|
return airflow_service_entity
|
||||||
|
|||||||
@ -559,6 +559,7 @@ class OpenMetadata(
|
|||||||
entity: Type[T],
|
entity: Type[T],
|
||||||
entity_id: Union[str, basic.Uuid],
|
entity_id: Union[str, basic.Uuid],
|
||||||
recursive: bool = False,
|
recursive: bool = False,
|
||||||
|
hard_delete: bool = False,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
API call to delete an entity from entity ID
|
API call to delete an entity from entity ID
|
||||||
@ -570,7 +571,8 @@ class OpenMetadata(
|
|||||||
None
|
None
|
||||||
"""
|
"""
|
||||||
url = f"{self.get_suffix(entity)}/{model_str(entity_id)}"
|
url = f"{self.get_suffix(entity)}/{model_str(entity_id)}"
|
||||||
url += f"?recursive=true" if recursive else ""
|
url += f"?recursive={str(recursive).lower()}"
|
||||||
|
url += f"&hardDelete={str(hard_delete).lower()}"
|
||||||
self.client.delete(url)
|
self.client.delete(url)
|
||||||
|
|
||||||
def compute_percentile(self, entity: Union[Type[T], str], date: str) -> None:
|
def compute_percentile(self, entity: Union[Type[T], str], date: str) -> None:
|
||||||
|
|||||||
@ -28,16 +28,26 @@ from airflow_provider_openmetadata.lineage.openmetadata import (
|
|||||||
)
|
)
|
||||||
from airflow_provider_openmetadata.lineage.utils import get_xlets
|
from airflow_provider_openmetadata.lineage.utils import get_xlets
|
||||||
from metadata.generated.schema.api.data.createDatabase import CreateDatabaseRequest
|
from metadata.generated.schema.api.data.createDatabase import CreateDatabaseRequest
|
||||||
|
from metadata.generated.schema.api.data.createDatabaseSchema import (
|
||||||
|
CreateDatabaseSchemaRequest,
|
||||||
|
)
|
||||||
from metadata.generated.schema.api.data.createTable import CreateTableRequest
|
from metadata.generated.schema.api.data.createTable import CreateTableRequest
|
||||||
from metadata.generated.schema.api.services.createDatabaseService import (
|
from metadata.generated.schema.api.services.createDatabaseService import (
|
||||||
CreateDatabaseServiceRequest,
|
CreateDatabaseServiceRequest,
|
||||||
)
|
)
|
||||||
from metadata.generated.schema.entity.data.pipeline import Pipeline
|
from metadata.generated.schema.entity.data.pipeline import Pipeline
|
||||||
from metadata.generated.schema.entity.data.table import Column, DataType
|
from metadata.generated.schema.entity.data.table import Column, DataType
|
||||||
|
from metadata.generated.schema.entity.services.connections.database.mysqlConnection import (
|
||||||
|
MysqlConnection,
|
||||||
|
)
|
||||||
from metadata.generated.schema.entity.services.databaseService import (
|
from metadata.generated.schema.entity.services.databaseService import (
|
||||||
DatabaseConnection,
|
DatabaseConnection,
|
||||||
|
DatabaseService,
|
||||||
DatabaseServiceType,
|
DatabaseServiceType,
|
||||||
)
|
)
|
||||||
|
from metadata.generated.schema.metadataIngestion.workflow import (
|
||||||
|
OpenMetadataServerConfig,
|
||||||
|
)
|
||||||
from metadata.generated.schema.type.entityReference import EntityReference
|
from metadata.generated.schema.type.entityReference import EntityReference
|
||||||
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
||||||
|
|
||||||
@ -47,7 +57,7 @@ class AirflowLineageTest(TestCase):
|
|||||||
Run this test installing the necessary airflow version
|
Run this test installing the necessary airflow version
|
||||||
"""
|
"""
|
||||||
|
|
||||||
server_config = MetadataServerConfig(api_endpoint="http://localhost:8585/api")
|
server_config = OpenMetadataServerConfig(hostPort="http://localhost:8585/api")
|
||||||
metadata = OpenMetadata(server_config)
|
metadata = OpenMetadata(server_config)
|
||||||
|
|
||||||
assert metadata.health_check()
|
assert metadata.health_check()
|
||||||
@ -55,7 +65,13 @@ class AirflowLineageTest(TestCase):
|
|||||||
service = CreateDatabaseServiceRequest(
|
service = CreateDatabaseServiceRequest(
|
||||||
name="test-service-table-lineage",
|
name="test-service-table-lineage",
|
||||||
serviceType=DatabaseServiceType.MySQL,
|
serviceType=DatabaseServiceType.MySQL,
|
||||||
databaseConnection=DatabaseConnection(hostPort="localhost"),
|
connection=DatabaseConnection(
|
||||||
|
config=MysqlConnection(
|
||||||
|
username="username",
|
||||||
|
password="password",
|
||||||
|
hostPort="http://localhost:1234",
|
||||||
|
)
|
||||||
|
),
|
||||||
)
|
)
|
||||||
service_type = "databaseService"
|
service_type = "databaseService"
|
||||||
|
|
||||||
@ -67,26 +83,36 @@ class AirflowLineageTest(TestCase):
|
|||||||
Prepare ingredients: Table Entity + DAG
|
Prepare ingredients: Table Entity + DAG
|
||||||
"""
|
"""
|
||||||
|
|
||||||
cls.service_entity = cls.metadata.create_or_update(data=cls.service)
|
service_entity = cls.metadata.create_or_update(data=cls.service)
|
||||||
|
|
||||||
cls.create_db = CreateDatabaseRequest(
|
create_db = CreateDatabaseRequest(
|
||||||
name="test-db",
|
name="test-db",
|
||||||
service=EntityReference(id=cls.service_entity.id, type="databaseService"),
|
service=EntityReference(id=service_entity.id, type="databaseService"),
|
||||||
)
|
)
|
||||||
|
|
||||||
cls.create_db_entity = cls.metadata.create_or_update(data=cls.create_db)
|
create_db_entity = cls.metadata.create_or_update(data=create_db)
|
||||||
|
|
||||||
cls.db_reference = EntityReference(
|
db_reference = EntityReference(
|
||||||
id=cls.create_db_entity.id, name="test-db", type="database"
|
id=create_db_entity.id, name="test-db", type="database"
|
||||||
)
|
)
|
||||||
|
|
||||||
cls.create = CreateTableRequest(
|
create_schema = CreateDatabaseSchemaRequest(
|
||||||
|
name="test-schema", database=db_reference
|
||||||
|
)
|
||||||
|
|
||||||
|
create_schema_entity = cls.metadata.create_or_update(data=create_schema)
|
||||||
|
|
||||||
|
schema_reference = EntityReference(
|
||||||
|
id=create_schema_entity.id, name="test-schema", type="databaseSchema"
|
||||||
|
)
|
||||||
|
|
||||||
|
create = CreateTableRequest(
|
||||||
name="lineage-test",
|
name="lineage-test",
|
||||||
database=cls.db_reference,
|
databaseSchema=schema_reference,
|
||||||
columns=[Column(name="id", dataType=DataType.BIGINT)],
|
columns=[Column(name="id", dataType=DataType.BIGINT)],
|
||||||
)
|
)
|
||||||
|
|
||||||
cls.table = cls.metadata.create_or_update(data=cls.create)
|
cls.table = cls.metadata.create_or_update(data=create)
|
||||||
|
|
||||||
with DAG(
|
with DAG(
|
||||||
"lineage",
|
"lineage",
|
||||||
@ -98,13 +124,21 @@ class AirflowLineageTest(TestCase):
|
|||||||
t1 = BashOperator( # Using BashOperator as a random example
|
t1 = BashOperator( # Using BashOperator as a random example
|
||||||
task_id="task1",
|
task_id="task1",
|
||||||
bash_command="date",
|
bash_command="date",
|
||||||
outlets={"tables": ["test-service-table-lineage:test-db:lineage-test"]},
|
outlets={
|
||||||
|
"tables": [
|
||||||
|
"test-service-table-lineage.test-db.test-schema.lineage-test"
|
||||||
|
]
|
||||||
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
t2 = BashOperator( # Using BashOperator as a random example
|
t2 = BashOperator( # Using BashOperator as a random example
|
||||||
task_id="task2",
|
task_id="task2",
|
||||||
bash_command="sleep 5",
|
bash_command="sleep 5",
|
||||||
inlets={"tables": ["test-service-table-lineage:test-db:lineage-test"]},
|
inlets={
|
||||||
|
"tables": [
|
||||||
|
"test-service-table-lineage.test-db.test-schema.lineage-test"
|
||||||
|
]
|
||||||
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
t3 = BashOperator(
|
t3 = BashOperator(
|
||||||
@ -116,6 +150,25 @@ class AirflowLineageTest(TestCase):
|
|||||||
|
|
||||||
cls.dag = dag
|
cls.dag = dag
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def tearDownClass(cls) -> None:
|
||||||
|
"""
|
||||||
|
Clean up
|
||||||
|
"""
|
||||||
|
|
||||||
|
service_id = str(
|
||||||
|
cls.metadata.get_by_name(
|
||||||
|
entity=DatabaseService, fqdn="test-service-table-lineage"
|
||||||
|
).id.__root__
|
||||||
|
)
|
||||||
|
|
||||||
|
cls.metadata.delete(
|
||||||
|
entity=DatabaseService,
|
||||||
|
entity_id=service_id,
|
||||||
|
recursive=True,
|
||||||
|
hard_delete=True,
|
||||||
|
)
|
||||||
|
|
||||||
def test_xlets(self):
|
def test_xlets(self):
|
||||||
"""
|
"""
|
||||||
Verify that we can extract inlets and outlets
|
Verify that we can extract inlets and outlets
|
||||||
@ -123,12 +176,12 @@ class AirflowLineageTest(TestCase):
|
|||||||
|
|
||||||
self.assertIsNone(get_xlets(self.dag.get_task("task1"), "_inlets"))
|
self.assertIsNone(get_xlets(self.dag.get_task("task1"), "_inlets"))
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
["test-service-table-lineage:test-db:lineage-test"],
|
["test-service-table-lineage.test-db.test-schema.lineage-test"],
|
||||||
get_xlets(self.dag.get_task("task1"), "_outlets"),
|
get_xlets(self.dag.get_task("task1"), "_outlets"),
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
["test-service-table-lineage:test-db:lineage-test"],
|
["test-service-table-lineage.test-db.test-schema.lineage-test"],
|
||||||
get_xlets(self.dag.get_task("task2"), "_inlets"),
|
get_xlets(self.dag.get_task("task2"), "_inlets"),
|
||||||
)
|
)
|
||||||
self.assertIsNone(get_xlets(self.dag.get_task("task2"), "_outlets"))
|
self.assertIsNone(get_xlets(self.dag.get_task("task2"), "_outlets"))
|
||||||
@ -158,19 +211,24 @@ class AirflowLineageTest(TestCase):
|
|||||||
)
|
)
|
||||||
|
|
||||||
self.assertIsNotNone(
|
self.assertIsNotNone(
|
||||||
self.metadata.get_by_name(entity=Pipeline, fqdn="local_airflow_3:lineage")
|
self.metadata.get_by_name(entity=Pipeline, fqdn="local_airflow_3.lineage")
|
||||||
)
|
)
|
||||||
|
|
||||||
lineage = self.metadata.get_lineage_by_name(
|
lineage = self.metadata.get_lineage_by_name(
|
||||||
entity=Pipeline, fqdn="local_airflow_3:lineage"
|
entity=Pipeline, fqdn="local_airflow_3.lineage"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
print(lineage)
|
||||||
|
|
||||||
nodes = {node["id"] for node in lineage["nodes"]}
|
nodes = {node["id"] for node in lineage["nodes"]}
|
||||||
self.assertIn(str(self.table.id.__root__), nodes)
|
self.assertIn(str(self.table.id.__root__), nodes)
|
||||||
|
|
||||||
def test_lineage_task_group(self):
|
def test_lineage_task_group(self):
|
||||||
"""
|
"""
|
||||||
Test end to end for task groups
|
Test end to end for task groups.
|
||||||
|
|
||||||
|
Run the lineage execution mimicking
|
||||||
|
the execution of three tasks
|
||||||
"""
|
"""
|
||||||
|
|
||||||
with DAG(
|
with DAG(
|
||||||
@ -210,8 +268,42 @@ class AirflowLineageTest(TestCase):
|
|||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
self.backend.send_lineage(
|
||||||
|
operator=dag.get_task("group1.task2"),
|
||||||
|
context={
|
||||||
|
"dag": dag,
|
||||||
|
"task": dag.get_task("group1.task2"),
|
||||||
|
"task_instance": TaskInstance(
|
||||||
|
task=dag.get_task("group1.task2"),
|
||||||
|
execution_date=datetime.strptime(
|
||||||
|
"2022-03-15T08:13:45", "%Y-%m-%dT%H:%M:%S"
|
||||||
|
),
|
||||||
|
run_id="scheduled__2022-03-15T08:13:45.967068+00:00",
|
||||||
|
state="running",
|
||||||
|
),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
self.backend.send_lineage(
|
||||||
|
operator=dag.get_task("end"),
|
||||||
|
context={
|
||||||
|
"dag": dag,
|
||||||
|
"task": dag.get_task("end"),
|
||||||
|
"task_instance": TaskInstance(
|
||||||
|
task=dag.get_task("end"),
|
||||||
|
execution_date=datetime.strptime(
|
||||||
|
"2022-03-15T08:13:45", "%Y-%m-%dT%H:%M:%S"
|
||||||
|
),
|
||||||
|
run_id="scheduled__2022-03-15T08:13:45.967068+00:00",
|
||||||
|
state="running",
|
||||||
|
),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
pipeline = self.metadata.get_by_name(
|
pipeline = self.metadata.get_by_name(
|
||||||
entity=Pipeline, fqdn="local_airflow_3:task_group_lineage", fields=["tasks"]
|
entity=Pipeline, fqdn="local_airflow_3.task_group_lineage", fields=["tasks"]
|
||||||
)
|
)
|
||||||
self.assertIsNotNone(pipeline)
|
self.assertIsNotNone(pipeline)
|
||||||
self.assertIn("group1.task1", {task.name for task in pipeline.tasks})
|
self.assertIn("group1.task1", {task.name for task in pipeline.tasks})
|
||||||
|
self.assertIn("group1.task2", {task.name for task in pipeline.tasks})
|
||||||
|
self.assertIn("end", {task.name for task in pipeline.tasks})
|
||||||
|
|||||||
@ -25,6 +25,9 @@ from sqlalchemy.orm import declarative_base
|
|||||||
|
|
||||||
from metadata.config.common import WorkflowExecutionError
|
from metadata.config.common import WorkflowExecutionError
|
||||||
from metadata.generated.schema.entity.data.table import Table
|
from metadata.generated.schema.entity.data.table import Table
|
||||||
|
from metadata.generated.schema.metadataIngestion.workflow import (
|
||||||
|
OpenMetadataServerConfig,
|
||||||
|
)
|
||||||
from metadata.ingestion.api.workflow import Workflow
|
from metadata.ingestion.api.workflow import Workflow
|
||||||
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
||||||
from metadata.orm_profiler.api.workflow import ProfilerWorkflow
|
from metadata.orm_profiler.api.workflow import ProfilerWorkflow
|
||||||
@ -73,7 +76,7 @@ class ProfilerWorkflowTest(TestCase):
|
|||||||
)
|
)
|
||||||
session = create_and_bind_session(engine)
|
session = create_and_bind_session(engine)
|
||||||
|
|
||||||
server_config = MetadataServerConfig(api_endpoint="http://localhost:8585/api")
|
server_config = OpenMetadataServerConfig(hostPort="http://localhost:8585/api")
|
||||||
metadata = OpenMetadata(server_config)
|
metadata = OpenMetadata(server_config)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|||||||
@ -22,6 +22,7 @@ from metadata.generated.schema.api.teams.createUser import CreateUserRequest
|
|||||||
from metadata.generated.schema.entity.data.chart import Chart
|
from metadata.generated.schema.entity.data.chart import Chart
|
||||||
from metadata.generated.schema.entity.data.dashboard import Dashboard
|
from metadata.generated.schema.entity.data.dashboard import Dashboard
|
||||||
from metadata.generated.schema.entity.data.database import Database
|
from metadata.generated.schema.entity.data.database import Database
|
||||||
|
from metadata.generated.schema.entity.data.databaseSchema import DatabaseSchema
|
||||||
from metadata.generated.schema.entity.data.metrics import Metrics
|
from metadata.generated.schema.entity.data.metrics import Metrics
|
||||||
from metadata.generated.schema.entity.data.mlmodel import MlModel
|
from metadata.generated.schema.entity.data.mlmodel import MlModel
|
||||||
from metadata.generated.schema.entity.data.pipeline import Pipeline
|
from metadata.generated.schema.entity.data.pipeline import Pipeline
|
||||||
@ -33,6 +34,9 @@ from metadata.generated.schema.entity.services.databaseService import DatabaseSe
|
|||||||
from metadata.generated.schema.entity.services.messagingService import MessagingService
|
from metadata.generated.schema.entity.services.messagingService import MessagingService
|
||||||
from metadata.generated.schema.entity.services.pipelineService import PipelineService
|
from metadata.generated.schema.entity.services.pipelineService import PipelineService
|
||||||
from metadata.generated.schema.entity.teams.user import User
|
from metadata.generated.schema.entity.teams.user import User
|
||||||
|
from metadata.generated.schema.metadataIngestion.workflow import (
|
||||||
|
OpenMetadataServerConfig,
|
||||||
|
)
|
||||||
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
||||||
|
|
||||||
|
|
||||||
@ -42,7 +46,7 @@ class OMetaEndpointTest(TestCase):
|
|||||||
from the generated entity classes
|
from the generated entity classes
|
||||||
"""
|
"""
|
||||||
|
|
||||||
server_config = MetadataServerConfig(api_endpoint="http://localhost:8585/api")
|
server_config = OpenMetadataServerConfig(hostPort="http://localhost:8585/api")
|
||||||
metadata = OpenMetadata(server_config)
|
metadata = OpenMetadata(server_config)
|
||||||
|
|
||||||
def test_entities_suffix(self):
|
def test_entities_suffix(self):
|
||||||
@ -54,6 +58,7 @@ class OMetaEndpointTest(TestCase):
|
|||||||
|
|
||||||
# Db
|
# Db
|
||||||
self.assertEqual(self.metadata.get_suffix(Database), "/databases")
|
self.assertEqual(self.metadata.get_suffix(Database), "/databases")
|
||||||
|
self.assertEqual(self.metadata.get_suffix(DatabaseSchema), "/databaseSchemas")
|
||||||
self.assertEqual(self.metadata.get_suffix(Table), "/tables")
|
self.assertEqual(self.metadata.get_suffix(Table), "/tables")
|
||||||
|
|
||||||
# Dashboards
|
# Dashboards
|
||||||
|
|||||||
@ -21,6 +21,9 @@ from sklearn.tree import DecisionTreeClassifier
|
|||||||
|
|
||||||
from metadata.generated.schema.api.data.createMlModel import CreateMlModelRequest
|
from metadata.generated.schema.api.data.createMlModel import CreateMlModelRequest
|
||||||
from metadata.generated.schema.entity.data.mlmodel import MlModel
|
from metadata.generated.schema.entity.data.mlmodel import MlModel
|
||||||
|
from metadata.generated.schema.metadataIngestion.workflow import (
|
||||||
|
OpenMetadataServerConfig,
|
||||||
|
)
|
||||||
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
||||||
|
|
||||||
|
|
||||||
@ -29,7 +32,7 @@ class OMetaModelMixinTest(TestCase):
|
|||||||
Test the MlModel integrations from MlModel Mixin
|
Test the MlModel integrations from MlModel Mixin
|
||||||
"""
|
"""
|
||||||
|
|
||||||
server_config = MetadataServerConfig(api_endpoint="http://localhost:8585/api")
|
server_config = OpenMetadataServerConfig(hostPort="http://localhost:8585/api")
|
||||||
metadata = OpenMetadata(server_config)
|
metadata = OpenMetadata(server_config)
|
||||||
|
|
||||||
iris = datasets.load_iris()
|
iris = datasets.load_iris()
|
||||||
|
|||||||
32
ingestion/tests/unit/test_source_url.py
Normal file
32
ingestion/tests/unit/test_source_url.py
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
# Copyright 2021 Collate
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
"""
|
||||||
|
OpenMetadata source URL building tests
|
||||||
|
"""
|
||||||
|
|
||||||
|
from metadata.generated.schema.entity.services.connections.database.mysqlConnection import (
|
||||||
|
MysqlConnection,
|
||||||
|
)
|
||||||
|
from metadata.utils.source_connections import get_connection_url
|
||||||
|
|
||||||
|
|
||||||
|
def test_mysql_url():
|
||||||
|
"""
|
||||||
|
Validate MySQL URL building
|
||||||
|
"""
|
||||||
|
connection = MysqlConnection(
|
||||||
|
username="username",
|
||||||
|
password="password",
|
||||||
|
hostPort="localhost:1234",
|
||||||
|
)
|
||||||
|
url = get_connection_url(connection)
|
||||||
|
assert url == "mysql+pymysql://username:password@localhost:1234"
|
||||||
Loading…
x
Reference in New Issue
Block a user