Added DBT to airflow, modified tables.json - added profile data, fixed user's displayName (#1501)

* User's Name fixed, added profile data, added dbt pipeline under airflow

* service name changed
This commit is contained in:
Ayush Shah 2021-12-01 22:02:29 +05:30 committed by GitHub
parent 5794b5f4f8
commit 0e205d93dd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 14066 additions and 9162 deletions

View File

@ -30,7 +30,7 @@ default_args = {
"retries": 3,
"retry_delay": timedelta(minutes=2),
"execution_timeout": timedelta(minutes=60),
"schedule_interval": "*/5 * * * *",
"schedule_interval": "0 */1 * * *",
}
config = """

View File

@ -0,0 +1,86 @@
# Copyright 2021 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import pathlib
from datetime import timedelta
from airflow import DAG
try:
from airflow.operators.python import PythonOperator
except ModuleNotFoundError:
from airflow.operators.python_operator import PythonOperator
from airflow.utils.dates import days_ago
from metadata.config.common import load_config_file
from metadata.ingestion.api.workflow import Workflow
default_args = {
"owner": "user_name",
"email": ["username@org.com"],
"email_on_failure": False,
"retries": 3,
"retry_delay": timedelta(seconds=10),
"execution_timeout": timedelta(minutes=60),
}
config = """
{
"source": {
"type": "dbt",
"config": {
"service_name": "bigquery_dbt",
"service_type": "BigQuery",
"catalog_file": "./examples/sample_data/dbt/catalog.json",
"manifest_file": "./examples/sample_data/dbt/manifest.json",
"run_results_file": "./examples/sample_data/dbt/run_results.json",
"database": "shopify"
}
},
"sink": {
"type": "metadata-rest",
"config": {}
},
"metadata_server": {
"type": "metadata-server",
"config": {
"api_endpoint": "http://localhost:8585/api",
"auth_provider_type": "no-auth"
}
}
}
"""
def metadata_ingestion_workflow():
workflow_config = json.loads(config)
workflow = Workflow.create(workflow_config)
workflow.execute()
workflow.raise_from_status()
workflow.print_status()
workflow.stop()
with DAG(
"sample_dbt",
default_args=default_args,
description="An example DAG which runs a OpenMetadata ingestion workflow",
schedule_interval=timedelta(days=1),
start_date=days_ago(1),
is_paused_upon_creation=False,
catchup=False,
) as dag:
ingest_task = PythonOperator(
task_id="ingest_using_recipe",
python_callable=metadata_ingestion_workflow,
)

File diff suppressed because it is too large Load Diff

View File

@ -20,6 +20,7 @@ airflow users create \
--password admin
airflow db upgrade
(while ! wget -O /dev/null -o /dev/null http://ingestion:8080; do sleep 5; done; sleep 5; curl -u admin:admin --data '{"dag_run_id":"sample_data_1"}' -H "Content-type: application/json" -X POST http://ingestion:8080/api/v1/dags/sample_data/dagRuns) &
(while ! wget -O /dev/null -o /dev/null http://openmetadata-server:8585/api/v1/tables/name/bigquery_gcp.shopify.fact_sale; do sleep 5; done; sleep 6; curl -u admin:admin --data '{"dag_run_id":"sample_usage_1"}' -H "Content-type: application/json" -X POST http://ingestion:8080/api/v1/dags/sample_usage/dagRuns) &
(while ! wget -O /dev/null -o /dev/null http://openmetadata-server:8585/api/v1/tables/name/bigquery_gcp.shopify.fact_sale; do sleep 5; done; sleep 7; curl -u admin:admin --data '{"dag_run_id":"index_metadata_1"}' -H "Content-type: application/json" -X POST http://ingestion:8080/api/v1/dags/index_metadata/dagRuns) &
(while ! wget -O /dev/null -o /dev/null http://openmetadata-server:8585/api/v1/tables/name/bigquery_gcp.shopify.fact_sale; do sleep 5; done; sleep 7; curl -u admin:admin --data '{"dag_run_id":"sample_usage_1"}' -H "Content-type: application/json" -X POST http://ingestion:8080/api/v1/dags/sample_usage/dagRuns) &
(while ! wget -O /dev/null -o /dev/null http://openmetadata-server:8585/api/v1/tables/name/bigquery_gcp.shopify.fact_sale; do sleep 5; done; sleep 9; curl -u admin:admin --data '{"dag_run_id":"sample_dbt_1"}' -H "Content-type: application/json" -X POST http://ingestion:8080/api/v1/dags/sample_dbt/dagRuns) &
(while ! wget -O /dev/null -o /dev/null http://openmetadata-server:8585/api/v1/tables/name/bigquery_gcp.shopify.fact_sale; do sleep 5; done; sleep 10; curl -u admin:admin --data '{"dag_run_id":"index_metadata_1"}' -H "Content-type: application/json" -X POST http://ingestion:8080/api/v1/dags/index_metadata/dagRuns) &
airflow standalone

View File

@ -388,14 +388,14 @@ class MetadataRestSink(Sink):
metadata_user = CreateUserEntityRequest(
name=record.name.__root__,
displayName=record.name.__root__,
displayName=record.displayName,
email=record.email,
teams=teams,
)
try:
self.metadata.create_or_update(metadata_user)
self.status.records_written(record.name.__root__)
logger.info("Sink: {}".format(record.name.__root__))
self.status.records_written(record.displayName)
logger.info("Sink: {}".format(record.displayName))
except Exception as err:
logger.error(traceback.format_exc())
logger.error(traceback.print_exc())