Fix#6027: Improve logging in OpenMetadata Airflow APIs (#6920)

Fix#6027: Improve logging in OpenMetadata Airflow APIs (#6920)
This commit is contained in:
Nahuel 2022-08-26 07:29:38 +02:00 committed by GitHub
parent 075a0196cf
commit bdbbca0efe
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
21 changed files with 210 additions and 81 deletions

View File

@ -12,23 +12,24 @@
Register error handlers Register error handlers
""" """
import logging
from openmetadata_managed_apis.api.app import blueprint from openmetadata_managed_apis.api.app import blueprint
from openmetadata_managed_apis.api.response import ApiResponse from openmetadata_managed_apis.api.response import ApiResponse
from openmetadata_managed_apis.api.utils import MissingArgException from openmetadata_managed_apis.api.utils import MissingArgException
from openmetadata_managed_apis.utils.logger import api_logger
from werkzeug.exceptions import HTTPException from werkzeug.exceptions import HTTPException
logger = api_logger()
@blueprint.app_errorhandler(Exception) @blueprint.app_errorhandler(Exception)
def handle_any_error(e): def handle_any_error(exc):
logging.exception("Wild exception") logger.exception("Wild exception: {exc}")
if isinstance(e, HTTPException): if isinstance(exc, HTTPException):
return ApiResponse.error(e.code, repr(e)) return ApiResponse.error(exc.code, repr(exc))
return ApiResponse.server_error(repr(e)) return ApiResponse.server_error(repr(exc))
@blueprint.app_errorhandler(MissingArgException) @blueprint.app_errorhandler(MissingArgException)
def handle_missing_arg(e): def handle_missing_arg(exc):
logging.exception("Missing Argument Exception") logger.exception(f"Missing Argument Exception: {exc}")
return ApiResponse.bad_request(repr(e)) return ApiResponse.bad_request(repr(exc))

View File

@ -11,7 +11,6 @@
""" """
Delete the DAG in Airflow's db, as well as the python file Delete the DAG in Airflow's db, as well as the python file
""" """
import logging
import traceback import traceback
from airflow.api_connexion import security from airflow.api_connexion import security
@ -22,8 +21,11 @@ from openmetadata_managed_apis.api.app import blueprint
from openmetadata_managed_apis.api.response import ApiResponse from openmetadata_managed_apis.api.response import ApiResponse
from openmetadata_managed_apis.api.utils import get_arg_dag_id from openmetadata_managed_apis.api.utils import get_arg_dag_id
from openmetadata_managed_apis.operations.delete import delete_dag_id from openmetadata_managed_apis.operations.delete import delete_dag_id
from openmetadata_managed_apis.utils.logger import routes_logger
from werkzeug.utils import secure_filename from werkzeug.utils import secure_filename
logger = routes_logger()
@blueprint.route("/delete", methods=["DELETE"]) @blueprint.route("/delete", methods=["DELETE"])
@csrf.exempt @csrf.exempt
@ -45,8 +47,11 @@ def delete_dag() -> Response:
return delete_dag_id(secure_dag_id) return delete_dag_id(secure_dag_id)
except Exception as exc: except Exception as exc:
logging.info(f"Failed to delete dag {dag_id} [secured: {secure_dag_id}]") logger.debug(traceback.format_exc())
logger.error(
f"Failed to delete dag [{dag_id}] [secured: {secure_dag_id}]: {exc}"
)
return ApiResponse.error( return ApiResponse.error(
status=ApiResponse.STATUS_SERVER_ERROR, status=ApiResponse.STATUS_SERVER_ERROR,
error=f"Failed to delete {dag_id} [secured: {secure_dag_id}] due to {exc} - {traceback.format_exc()}", error=f"Failed to delete [{dag_id}] [secured: {secure_dag_id}] due to [{exc}] ",
) )

View File

@ -20,12 +20,15 @@ from flask import Response, request
from openmetadata_managed_apis.api.app import blueprint from openmetadata_managed_apis.api.app import blueprint
from openmetadata_managed_apis.api.response import ApiResponse from openmetadata_managed_apis.api.response import ApiResponse
from openmetadata_managed_apis.operations.deploy import DagDeployer from openmetadata_managed_apis.operations.deploy import DagDeployer
from openmetadata_managed_apis.utils.logger import routes_logger
from pydantic import ValidationError from pydantic import ValidationError
from metadata.generated.schema.entity.services.ingestionPipelines.ingestionPipeline import ( from metadata.generated.schema.entity.services.ingestionPipelines.ingestionPipeline import (
IngestionPipeline, IngestionPipeline,
) )
logger = routes_logger()
@blueprint.route("/deploy", methods=["POST"]) @blueprint.route("/deploy", methods=["POST"])
@csrf.exempt @csrf.exempt
@ -48,13 +51,19 @@ def deploy_dag() -> Response:
return response return response
except ValidationError as err: except ValidationError as err:
logger.debug(traceback.format_exc())
logger.error(
f"Request Validation Error parsing payload [{json_request}]. IngestionPipeline expected: {err}"
)
return ApiResponse.error( return ApiResponse.error(
status=ApiResponse.STATUS_BAD_REQUEST, status=ApiResponse.STATUS_BAD_REQUEST,
error=f"Request Validation Error parsing payload {json_request}. IngestionPipeline expected - {err}", error=f"Request Validation Error parsing payload. IngestionPipeline expected: {err}",
) )
except Exception as err: except Exception as exc:
logger.debug(traceback.format_exc())
logger.error(f"Internal error deploying [{json_request}] due to [{exc}] ")
return ApiResponse.error( return ApiResponse.error(
status=ApiResponse.STATUS_SERVER_ERROR, status=ApiResponse.STATUS_SERVER_ERROR,
error=f"Internal error deploying {json_request} - {err} - {traceback.format_exc()}", error=f"Internal error while deploying due to [{exc}] ",
) )

View File

@ -11,7 +11,6 @@
""" """
Disable/Pause a dag Disable/Pause a dag
""" """
import logging
import traceback import traceback
from airflow.api_connexion import security from airflow.api_connexion import security
@ -22,6 +21,9 @@ from openmetadata_managed_apis.api.app import blueprint
from openmetadata_managed_apis.api.response import ApiResponse from openmetadata_managed_apis.api.response import ApiResponse
from openmetadata_managed_apis.api.utils import get_request_dag_id from openmetadata_managed_apis.api.utils import get_request_dag_id
from openmetadata_managed_apis.operations.state import disable_dag from openmetadata_managed_apis.operations.state import disable_dag
from openmetadata_managed_apis.utils.logger import routes_logger
logger = routes_logger()
@blueprint.route("/disable", methods=["POST"]) @blueprint.route("/disable", methods=["POST"])
@ -37,8 +39,9 @@ def disable() -> Response:
return disable_dag(dag_id) return disable_dag(dag_id)
except Exception as exc: except Exception as exc:
logging.info(f"Failed to get last run logs for '{dag_id}'") logger.debug(traceback.format_exc())
logger.error(f"Failed to get last run logs for [{dag_id}]: {exc}")
return ApiResponse.error( return ApiResponse.error(
status=ApiResponse.STATUS_SERVER_ERROR, status=ApiResponse.STATUS_SERVER_ERROR,
error=f"Failed to get last run logs for '{dag_id}' due to {exc} - {traceback.format_exc()}", error=f"Failed to get last run logs for [{dag_id}] due to {exc} ",
) )

View File

@ -11,7 +11,6 @@
""" """
Enable/unpause a DAG Enable/unpause a DAG
""" """
import logging
import traceback import traceback
from airflow.api_connexion import security from airflow.api_connexion import security
@ -22,6 +21,9 @@ from openmetadata_managed_apis.api.app import blueprint
from openmetadata_managed_apis.api.response import ApiResponse from openmetadata_managed_apis.api.response import ApiResponse
from openmetadata_managed_apis.api.utils import get_request_dag_id from openmetadata_managed_apis.api.utils import get_request_dag_id
from openmetadata_managed_apis.operations.state import enable_dag from openmetadata_managed_apis.operations.state import enable_dag
from openmetadata_managed_apis.utils.logger import routes_logger
logger = routes_logger()
@blueprint.route("/enable", methods=["POST"]) @blueprint.route("/enable", methods=["POST"])
@ -37,8 +39,9 @@ def enable() -> Response:
return enable_dag(dag_id) return enable_dag(dag_id)
except Exception as exc: except Exception as exc:
logging.info(f"Failed to get last run logs for '{dag_id}'") logger.debug(traceback.format_exc())
logger.error(f"Failed to get last run logs for [{dag_id}]: {exc}")
return ApiResponse.error( return ApiResponse.error(
status=ApiResponse.STATUS_SERVER_ERROR, status=ApiResponse.STATUS_SERVER_ERROR,
error=f"Failed to get last run logs for '{dag_id}' due to {exc} - {traceback.format_exc()}", error=f"Failed to get last run logs for [{dag_id}] due to {exc} ",
) )

View File

@ -13,6 +13,8 @@ Health endpoint. Globally accessible
""" """
import traceback import traceback
from openmetadata_managed_apis.utils.logger import routes_logger
try: try:
from importlib.metadata import version from importlib.metadata import version
except ImportError: except ImportError:
@ -22,6 +24,8 @@ from airflow.www.app import csrf
from openmetadata_managed_apis.api.app import blueprint from openmetadata_managed_apis.api.app import blueprint
from openmetadata_managed_apis.api.response import ApiResponse from openmetadata_managed_apis.api.response import ApiResponse
logger = routes_logger()
@blueprint.route("/health", methods=["GET"]) @blueprint.route("/health", methods=["GET"])
@csrf.exempt @csrf.exempt
@ -34,8 +38,11 @@ def health():
return ApiResponse.success( return ApiResponse.success(
{"status": "healthy", "version": version("openmetadata-ingestion")} {"status": "healthy", "version": version("openmetadata-ingestion")}
) )
except Exception as err: except Exception as exc:
msg = f"Internal error obtaining REST status due to [{exc}] "
logger.debug(traceback.format_exc())
logger.error(msg)
return ApiResponse.error( return ApiResponse.error(
status=ApiResponse.STATUS_SERVER_ERROR, status=ApiResponse.STATUS_SERVER_ERROR,
error=f"Internal error obtaining REST status - {err} - {traceback.format_exc()}", error=msg,
) )

View File

@ -14,6 +14,7 @@ IP endpoint
import traceback import traceback
import requests import requests
from openmetadata_managed_apis.utils.logger import routes_logger
try: try:
from importlib.metadata import version from importlib.metadata import version
@ -26,6 +27,8 @@ from airflow.www.app import csrf
from openmetadata_managed_apis.api.app import blueprint from openmetadata_managed_apis.api.app import blueprint
from openmetadata_managed_apis.api.response import ApiResponse from openmetadata_managed_apis.api.response import ApiResponse
logger = routes_logger()
@blueprint.route("/ip", methods=["GET"]) @blueprint.route("/ip", methods=["GET"])
@csrf.exempt @csrf.exempt
@ -38,8 +41,11 @@ def get_host_ip():
try: try:
return ApiResponse.success({"ip": requests.get("https://api.ipify.org").text}) return ApiResponse.success({"ip": requests.get("https://api.ipify.org").text})
except Exception as err: except Exception as exc:
msg = f"Internal error obtaining host IP due to [{exc}] "
logger.debug(traceback.format_exc())
logger.error(msg)
return ApiResponse.error( return ApiResponse.error(
status=ApiResponse.STATUS_SERVER_ERROR, status=ApiResponse.STATUS_SERVER_ERROR,
error=f"Internal error obtaining host IP - {err} - {traceback.format_exc()}", error=msg,
) )

View File

@ -11,7 +11,6 @@
""" """
Kill all not finished runs Kill all not finished runs
""" """
import logging
import traceback import traceback
from airflow.api_connexion import security from airflow.api_connexion import security
@ -22,6 +21,9 @@ from openmetadata_managed_apis.api.app import blueprint
from openmetadata_managed_apis.api.response import ApiResponse from openmetadata_managed_apis.api.response import ApiResponse
from openmetadata_managed_apis.api.utils import get_request_dag_id from openmetadata_managed_apis.api.utils import get_request_dag_id
from openmetadata_managed_apis.operations.kill_all import kill_all from openmetadata_managed_apis.operations.kill_all import kill_all
from openmetadata_managed_apis.utils.logger import routes_logger
logger = routes_logger()
@blueprint.route("/kill", methods=["POST"]) @blueprint.route("/kill", methods=["POST"])
@ -38,8 +40,9 @@ def kill() -> Response:
return kill_all(dag_id) return kill_all(dag_id)
except Exception as exc: except Exception as exc:
logging.info(f"Failed to get kill runs for '{dag_id}'") logger.debug(traceback.format_exc())
logger.error(f"Failed to get kill runs for [{dag_id}]: {exc}")
return ApiResponse.error( return ApiResponse.error(
status=ApiResponse.STATUS_SERVER_ERROR, status=ApiResponse.STATUS_SERVER_ERROR,
error=f"Failed to kill runs for '{dag_id}' due to {exc} - {traceback.format_exc()}", error=f"Failed to kill runs for [{dag_id}] due to [{exc}] ",
) )

View File

@ -11,7 +11,6 @@
""" """
Return the last DagRun logs for each task Return the last DagRun logs for each task
""" """
import logging
import traceback import traceback
from airflow.api_connexion import security from airflow.api_connexion import security
@ -22,6 +21,9 @@ from openmetadata_managed_apis.api.app import blueprint
from openmetadata_managed_apis.api.response import ApiResponse from openmetadata_managed_apis.api.response import ApiResponse
from openmetadata_managed_apis.api.utils import get_arg_dag_id from openmetadata_managed_apis.api.utils import get_arg_dag_id
from openmetadata_managed_apis.operations.last_dag_logs import last_dag_logs from openmetadata_managed_apis.operations.last_dag_logs import last_dag_logs
from openmetadata_managed_apis.utils.logger import routes_logger
logger = routes_logger()
@blueprint.route("/last_dag_logs", methods=["GET"]) @blueprint.route("/last_dag_logs", methods=["GET"])
@ -38,8 +40,9 @@ def last_logs() -> Response:
return last_dag_logs(dag_id) return last_dag_logs(dag_id)
except Exception as exc: except Exception as exc:
logging.info(f"Failed to get last run logs for '{dag_id}'") logger.debug(traceback.format_exc())
logger.error(f"Failed to get last run logs for [{dag_id}]: {exc}")
return ApiResponse.error( return ApiResponse.error(
status=ApiResponse.STATUS_SERVER_ERROR, status=ApiResponse.STATUS_SERVER_ERROR,
error=f"Failed to get last run logs for '{dag_id}' due to {exc} - {traceback.format_exc()}", error=f"Failed to get last run logs for [{dag_id}] due to [{exc}] ",
) )

View File

@ -11,7 +11,6 @@
""" """
Return a list of the 10 last status for the ingestion Pipeline Return a list of the 10 last status for the ingestion Pipeline
""" """
import logging
import traceback import traceback
from airflow.api_connexion import security from airflow.api_connexion import security
@ -22,6 +21,9 @@ from openmetadata_managed_apis.api.app import blueprint
from openmetadata_managed_apis.api.response import ApiResponse from openmetadata_managed_apis.api.response import ApiResponse
from openmetadata_managed_apis.api.utils import get_arg_dag_id from openmetadata_managed_apis.api.utils import get_arg_dag_id
from openmetadata_managed_apis.operations.status import status from openmetadata_managed_apis.operations.status import status
from openmetadata_managed_apis.utils.logger import routes_logger
logger = routes_logger()
@blueprint.route("/status", methods=["GET"]) @blueprint.route("/status", methods=["GET"])
@ -37,8 +39,9 @@ def dag_status() -> Response:
return status(dag_id) return status(dag_id)
except Exception as exc: except Exception as exc:
logging.info(f"Failed to get dag {dag_id} status") logger.debug(traceback.format_exc())
logger.error(f"Failed to get dag [{dag_id}] status: {exc}")
return ApiResponse.error( return ApiResponse.error(
status=ApiResponse.STATUS_SERVER_ERROR, status=ApiResponse.STATUS_SERVER_ERROR,
error=f"Failed to get status for {dag_id} due to {exc} - {traceback.format_exc()}", error=f"Failed to get status for [{dag_id}] due to [{exc}] ",
) )

View File

@ -20,10 +20,13 @@ from flask import Response, request
from openmetadata_managed_apis.api.app import blueprint from openmetadata_managed_apis.api.app import blueprint
from openmetadata_managed_apis.api.response import ApiResponse from openmetadata_managed_apis.api.response import ApiResponse
from openmetadata_managed_apis.operations.test_connection import test_source_connection from openmetadata_managed_apis.operations.test_connection import test_source_connection
from openmetadata_managed_apis.utils.logger import routes_logger
from pydantic import ValidationError from pydantic import ValidationError
from metadata.ingestion.api.parser import parse_test_connection_request_gracefully from metadata.ingestion.api.parser import parse_test_connection_request_gracefully
logger = routes_logger()
@blueprint.route("/test_connection", methods=["POST"]) @blueprint.route("/test_connection", methods=["POST"])
@csrf.exempt @csrf.exempt
@ -44,13 +47,19 @@ def test_connection() -> Response:
return response return response
except ValidationError as err: except ValidationError as err:
msg = f"Request Validation Error parsing payload. (Workflow)Source expected: {err}"
logger.debug(traceback.format_exc())
logger.error(msg)
return ApiResponse.error( return ApiResponse.error(
status=ApiResponse.STATUS_BAD_REQUEST, status=ApiResponse.STATUS_BAD_REQUEST,
error=f"Request Validation Error parsing payload. (Workflow)Source expected - {err}", error=msg,
) )
except Exception as err: except Exception as exc:
msg = f"Internal error testing connection due to [{exc}] "
logger.debug(traceback.format_exc())
logger.error(msg)
return ApiResponse.error( return ApiResponse.error(
status=ApiResponse.STATUS_SERVER_ERROR, status=ApiResponse.STATUS_SERVER_ERROR,
error=f"Internal error testing connection {err} - {traceback.format_exc()}", error=msg,
) )

View File

@ -11,7 +11,6 @@
""" """
Trigger endpoint Trigger endpoint
""" """
import logging
import traceback import traceback
from airflow.api_connexion import security from airflow.api_connexion import security
@ -22,6 +21,9 @@ from openmetadata_managed_apis.api.app import blueprint
from openmetadata_managed_apis.api.response import ApiResponse from openmetadata_managed_apis.api.response import ApiResponse
from openmetadata_managed_apis.api.utils import get_request_arg, get_request_dag_id from openmetadata_managed_apis.api.utils import get_request_arg, get_request_dag_id
from openmetadata_managed_apis.operations.trigger import trigger from openmetadata_managed_apis.operations.trigger import trigger
from openmetadata_managed_apis.utils.logger import routes_logger
logger = routes_logger()
@blueprint.route("/trigger", methods=["POST"]) @blueprint.route("/trigger", methods=["POST"])
@ -40,8 +42,9 @@ def trigger_dag() -> Response:
return response return response
except Exception as exc: except Exception as exc:
logging.info(f"Failed to trigger dag {dag_id}") logger.debug(traceback.format_exc())
logger.error(f"Failed to trigger dag [{dag_id}]: {exc}")
return ApiResponse.error( return ApiResponse.error(
status=ApiResponse.STATUS_SERVER_ERROR, status=ApiResponse.STATUS_SERVER_ERROR,
error=f"Workflow {dag_id} has filed to trigger due to {exc} - {traceback.format_exc()}", error=f"Workflow [{dag_id}] has filed to trigger due to [{exc}] ",
) )

View File

@ -10,10 +10,10 @@
# limitations under the License. # limitations under the License.
import importlib import importlib
import logging
import os import os
import re import re
import sys import sys
import traceback
from multiprocessing import Process from multiprocessing import Process
from typing import Optional from typing import Optional
@ -21,6 +21,9 @@ from airflow import settings
from airflow.jobs.scheduler_job import SchedulerJob from airflow.jobs.scheduler_job import SchedulerJob
from airflow.models import DagBag from airflow.models import DagBag
from flask import request from flask import request
from openmetadata_managed_apis.utils.logger import api_logger
logger = api_logger()
class MissingArgException(Exception): class MissingArgException(Exception):
@ -105,8 +108,9 @@ class ScanDagsTask(Process):
scheduler_job.run() scheduler_job.run()
try: try:
scheduler_job.kill() scheduler_job.kill()
except Exception: except Exception as exc:
logging.info("Rescan Complete: Killed Job") logger.debug(traceback.format_exc())
logger.info(f"Rescan Complete: Killed Job: {exc}")
def scan_dags_job_background(): def scan_dags_job_background():

View File

@ -9,7 +9,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import logging
import pkgutil import pkgutil
import traceback import traceback
from pathlib import Path from pathlib import Path
@ -30,12 +29,15 @@ from openmetadata_managed_apis.api.utils import (
import_path, import_path,
scan_dags_job_background, scan_dags_job_background,
) )
from openmetadata_managed_apis.utils.logger import operations_logger
from metadata.generated.schema.entity.services.ingestionPipelines.ingestionPipeline import ( from metadata.generated.schema.entity.services.ingestionPipelines.ingestionPipeline import (
IngestionPipeline, IngestionPipeline,
) )
from metadata.ingestion.models.encoders import show_secrets_encoder from metadata.ingestion.models.encoders import show_secrets_encoder
logger = operations_logger()
class DeployDagException(Exception): class DeployDagException(Exception):
""" """
@ -51,7 +53,7 @@ class DagDeployer:
def __init__(self, ingestion_pipeline: IngestionPipeline): def __init__(self, ingestion_pipeline: IngestionPipeline):
logging.info( logger.info(
f"Received the following Airflow Configuration: {ingestion_pipeline.airflowConfig}" f"Received the following Airflow Configuration: {ingestion_pipeline.airflowConfig}"
) )
@ -66,7 +68,7 @@ class DagDeployer:
return the path for the Jinja rendering. return the path for the Jinja rendering.
""" """
logging.info(f"Saving file to {dag_config_file_path}") logger.info(f"Saving file to {dag_config_file_path}")
with open(dag_config_file_path, "w") as outfile: with open(dag_config_file_path, "w") as outfile:
outfile.write(self.ingestion_pipeline.json(encoder=show_secrets_encoder)) outfile.write(self.ingestion_pipeline.json(encoder=show_secrets_encoder))
@ -96,11 +98,12 @@ class DagDeployer:
try: try:
dag_file = import_path(str(dag_py_file)) dag_file = import_path(str(dag_py_file))
except Exception as exc: except Exception as exc:
logging.error(f"Failed to import dag_file {dag_py_file} due to {exc}") logger.debug(traceback.format_exc())
logger.error(f"Failed to import dag_file [{dag_py_file}]: {exc}")
raise exc raise exc
if dag_file is None: if dag_file is None:
raise DeployDagException(f"Failed to import dag_file {dag_py_file}") raise DeployDagException(f"Failed to import dag_file [{dag_py_file}]")
return str(dag_py_file) return str(dag_py_file)
@ -117,9 +120,9 @@ class DagDeployer:
with settings.Session() as session: with settings.Session() as session:
try: try:
dag_bag = get_dagbag() dag_bag = get_dagbag()
logging.info("dagbag size {}".format(dag_bag.size())) logger.info("dagbag size {}".format(dag_bag.size()))
found_dags = dag_bag.process_file(dag_py_file) found_dags = dag_bag.process_file(dag_py_file)
logging.info("processed dags {}".format(found_dags)) logger.info("processed dags {}".format(found_dags))
dag: DAG = dag_bag.get_dag(self.dag_id, session=session) dag: DAG = dag_bag.get_dag(self.dag_id, session=session)
# Sync to DB # Sync to DB
dag.sync_to_db(session=session) dag.sync_to_db(session=session)
@ -128,7 +131,7 @@ class DagDeployer:
.filter(DagModel.dag_id == self.dag_id) .filter(DagModel.dag_id == self.dag_id)
.first() .first()
) )
logging.info("dag_model:" + str(dag_model)) logger.info("dag_model:" + str(dag_model))
# Scheduler Job to scan dags # Scheduler Job to scan dags
scan_dags_job_background() scan_dags_job_background()
@ -136,20 +139,17 @@ class DagDeployer:
{"message": f"Workflow [{self.dag_id}] has been created"} {"message": f"Workflow [{self.dag_id}] has been created"}
) )
except Exception as exc: except Exception as exc:
logging.info(f"Failed to serialize the dag {exc}") msg = f"Workflow [{self.dag_id}] failed to refresh due to [{exc}]"
return ApiResponse.server_error( logger.debug(traceback.format_exc())
{ logger.error(msg)
"message": f"Workflow [{self.dag_id}] failed to refresh due to [{exc}] " return ApiResponse.server_error({f"message": msg})
+ f"- {traceback.format_exc()}"
}
)
def deploy(self): def deploy(self):
""" """
Run all methods to deploy the DAG Run all methods to deploy the DAG
""" """
dag_config_file_path = Path(DAG_GENERATED_CONFIGS) / f"{self.dag_id}.json" dag_config_file_path = Path(DAG_GENERATED_CONFIGS) / f"{self.dag_id}.json"
logging.info(f"Config file under {dag_config_file_path}") logger.info(f"Config file under {dag_config_file_path}")
dag_runner_config = self.store_airflow_pipeline_config(dag_config_file_path) dag_runner_config = self.store_airflow_pipeline_config(dag_config_file_path)
dag_py_file = self.store_and_validate_dag_file(dag_runner_config) dag_py_file = self.store_and_validate_dag_file(dag_runner_config)

View File

@ -17,7 +17,7 @@ from airflow import settings
from airflow.models import DagModel, DagRun, TaskInstance from airflow.models import DagModel, DagRun, TaskInstance
from airflow.utils.state import DagRunState, TaskInstanceState from airflow.utils.state import DagRunState, TaskInstanceState
from flask import Response from flask import Response
from openmetadata_managed_apis.api.response import ApiResponse, ResponseFormat from openmetadata_managed_apis.api.response import ApiResponse
def kill_all(dag_id: str) -> Response: def kill_all(dag_id: str) -> Response:

View File

@ -12,8 +12,11 @@
Module containing the logic to test a connection Module containing the logic to test a connection
from a WorkflowSource from a WorkflowSource
""" """
import traceback
from flask import Response from flask import Response
from openmetadata_managed_apis.api.response import ApiResponse from openmetadata_managed_apis.api.response import ApiResponse
from openmetadata_managed_apis.utils.logger import operations_logger
from openmetadata_managed_apis.workflows.ingestion.credentials_builder import ( from openmetadata_managed_apis.workflows.ingestion.credentials_builder import (
build_secrets_manager_credentials, build_secrets_manager_credentials,
) )
@ -28,6 +31,8 @@ from metadata.utils.connections import (
) )
from metadata.utils.secrets.secrets_manager_factory import get_secrets_manager from metadata.utils.secrets.secrets_manager_factory import get_secrets_manager
logger = operations_logger()
def test_source_connection( def test_source_connection(
test_service_connection: TestServiceConnectionRequest, test_service_connection: TestServiceConnectionRequest,
@ -55,10 +60,13 @@ def test_source_connection(
try: try:
test_connection(connection) test_connection(connection)
except SourceConnectionException as err: except SourceConnectionException as exc:
msg = f"Connection error from [{connection}]: {exc}"
logger.debug(traceback.format_exc())
logger.error(msg)
return ApiResponse.error( return ApiResponse.error(
status=ApiResponse.STATUS_SERVER_ERROR, status=ApiResponse.STATUS_SERVER_ERROR,
error=f"Connection error from {connection} - {err}", error=msg,
) )
return ApiResponse.success({"message": f"Connection with {connection} successful!"}) return ApiResponse.success({"message": f"Connection with {connection} successful!"})

View File

@ -0,0 +1,46 @@
import logging
from enum import Enum
from logging.handlers import RotatingFileHandler
from airflow.configuration import conf
BASE_LOGGING_FORMAT = (
"[%(asctime)s] %(levelname)-8s {%(name)s:%(module)s:%(lineno)d} - %(message)s"
)
class Loggers(Enum):
API_ROUTES = "AirflowAPIRoutes"
API = "AirflowAPI"
OPERATIONS = "AirflowOperations"
WORKFLOW = "AirflowWorkflow"
def build_logger(logger_name: str) -> logging.Logger:
logger = logging.getLogger(logger_name)
log_format = logging.Formatter(BASE_LOGGING_FORMAT)
rotating_log_handler = RotatingFileHandler(
f"{conf.get('logging', 'base_log_folder', fallback='')}/openmetadata_airflow_api.log",
maxBytes=1000000,
backupCount=10,
)
rotating_log_handler.setFormatter(log_format)
logger.addHandler(rotating_log_handler)
logger.setLevel(logging.DEBUG)
return logger
def routes_logger() -> logging.Logger:
return build_logger(Loggers.API_ROUTES.value)
def api_logger():
return build_logger(Loggers.API.value)
def operations_logger():
return build_logger(Loggers.OPERATIONS.value)
def workflow_logger():
return build_logger(Loggers.WORKFLOW.value)

View File

@ -54,6 +54,18 @@ from metadata.generated.schema.metadataIngestion.workflow import WorkflowConfig
from metadata.ingestion.api.workflow import Workflow from metadata.ingestion.api.workflow import Workflow
class InvalidServiceException(Exception):
"""
Exception to be thrown when couldn't fetch the service from server
"""
class ClientInitializationError(Exception):
"""
Exception to be thrown when couldn't initialize the Openmetadata Client
"""
def build_source(ingestion_pipeline: IngestionPipeline) -> WorkflowSource: def build_source(ingestion_pipeline: IngestionPipeline) -> WorkflowSource:
""" """
Use the service EntityReference to build the Source. Use the service EntityReference to build the Source.
@ -69,7 +81,10 @@ def build_source(ingestion_pipeline: IngestionPipeline) -> WorkflowSource:
build_secrets_manager_credentials(secrets_manager) build_secrets_manager_credentials(secrets_manager)
) )
metadata = OpenMetadata(config=ingestion_pipeline.openMetadataServerConnection) try:
metadata = OpenMetadata(config=ingestion_pipeline.openMetadataServerConnection)
except Exception as exc:
raise ClientInitializationError(f"Failed to initialize the client: {exc}")
service_type = ingestion_pipeline.service.type service_type = ingestion_pipeline.service.type
service: Optional[ service: Optional[
@ -105,7 +120,7 @@ def build_source(ingestion_pipeline: IngestionPipeline) -> WorkflowSource:
) )
if not service: if not service:
raise ValueError(f"Could not get service from type {service_type}") raise InvalidServiceException(f"Could not get service from type {service_type}")
return WorkflowSource( return WorkflowSource(
type=service.serviceType.value.lower(), type=service.serviceType.value.lower(),

View File

@ -9,18 +9,17 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import logging
from airflow import DAG from airflow import DAG
# these are params only used in the DAG factory, not in the tasks # these are params only used in the DAG factory, not in the tasks
from openmetadata_managed_apis.utils.logger import workflow_logger
from openmetadata_managed_apis.workflows.ingestion.registry import build_registry from openmetadata_managed_apis.workflows.ingestion.registry import build_registry
from metadata.generated.schema.entity.services.ingestionPipelines.ingestionPipeline import ( from metadata.generated.schema.entity.services.ingestionPipelines.ingestionPipeline import (
IngestionPipeline, IngestionPipeline,
) )
logger = logging.getLogger(__name__) logger = workflow_logger()
class WorkflowBuilder: class WorkflowBuilder:
@ -44,14 +43,14 @@ class WorkflowBuilder:
build_fn = build_registry.registry.get(dag_type) build_fn = build_registry.registry.get(dag_type)
if not build_fn: if not build_fn:
raise ValueError( msg = f"Cannot find build function for {dag_type} in {build_registry.registry}"
f"Cannot find build function for {dag_type} in {build_registry.registry}" logger.error(msg)
) raise ValueError(msg)
dag = build_fn(self.airflow_pipeline) dag = build_fn(self.airflow_pipeline)
if not isinstance(dag, DAG): if not isinstance(dag, DAG):
raise ValueError( msg = f"Invalid return type from {build_fn.__name__} when building {dag_type}."
f"Invalid return type from {build_fn.__name__} when building {dag_type}." logger.error(msg)
) raise ValueError(msg)
return dag return dag

View File

@ -14,13 +14,14 @@ based on incoming configs.
Called in dag_runner.j2 Called in dag_runner.j2
""" """
import logging
import pathlib import pathlib
import traceback
from typing import Any, Dict from typing import Any, Dict
from airflow.models import DAG from airflow.models import DAG
# these are params that cannot be a dag name # these are params that cannot be a dag name
from openmetadata_managed_apis.utils.logger import workflow_logger
from openmetadata_managed_apis.workflows.config import load_config_file from openmetadata_managed_apis.workflows.config import load_config_file
from openmetadata_managed_apis.workflows.workflow_builder import WorkflowBuilder from openmetadata_managed_apis.workflows.workflow_builder import WorkflowBuilder
@ -28,7 +29,7 @@ from metadata.generated.schema.entity.services.ingestionPipelines.ingestionPipel
IngestionPipeline, IngestionPipeline,
) )
logger = logging.getLogger(__name__) logger = workflow_logger()
class WorkflowCreationError(Exception): class WorkflowCreationError(Exception):
@ -60,10 +61,11 @@ class WorkflowFactory:
workflow_builder: WorkflowBuilder = WorkflowBuilder(self.airflow_pipeline) workflow_builder: WorkflowBuilder = WorkflowBuilder(self.airflow_pipeline)
try: try:
workflow = workflow_builder.build() workflow = workflow_builder.build()
except Exception as err: except Exception as exc:
raise WorkflowCreationError( msg = f"Failed to generate workflow [{self.airflow_pipeline.name.__root__}] verify config is correct: {exc}"
f"Failed to generate workflow {self.airflow_pipeline.name.__root__}. verify config is correct" logger.debug(traceback.format_exc())
) from err logger.error(msg)
raise WorkflowCreationError(msg) from exc
return workflow return workflow
@staticmethod @staticmethod
@ -74,7 +76,7 @@ class WorkflowFactory:
dag = self.build_dag() dag = self.build_dag()
self.dag = dag self.dag = dag
self.register_dag(dag, globals_namespace) self.register_dag(dag, globals_namespace)
logger.info("registered the dag") logger.info(f"Registered the dag: {dag.dag_id}")
def get_dag(self) -> DAG: def get_dag(self) -> DAG:
return self.dag return self.dag