mirror of
https://github.com/datahub-project/datahub.git
synced 2025-09-27 01:55:17 +00:00
fix(glue): fix typo in reported warning, report with flow_urn (#8138)
This commit is contained in:
parent
ab3fe0da81
commit
f3cf9b7d5a
@ -304,7 +304,9 @@ class GlueSource(StatefulIngestionSourceBase):
|
|||||||
|
|
||||||
return jobs
|
return jobs
|
||||||
|
|
||||||
def get_dataflow_graph(self, script_path: str) -> Optional[Dict[str, Any]]:
|
def get_dataflow_graph(
|
||||||
|
self, script_path: str, flow_urn: str
|
||||||
|
) -> Optional[Dict[str, Any]]:
|
||||||
"""
|
"""
|
||||||
Get the DAG of transforms and data sources/sinks for a job.
|
Get the DAG of transforms and data sources/sinks for a job.
|
||||||
|
|
||||||
@ -320,8 +322,8 @@ class GlueSource(StatefulIngestionSourceBase):
|
|||||||
|
|
||||||
# catch any other cases where the script path is invalid
|
# catch any other cases where the script path is invalid
|
||||||
if not script_path.startswith("s3://"):
|
if not script_path.startswith("s3://"):
|
||||||
self.report.report_warning(
|
self.report_warning(
|
||||||
script_path,
|
flow_urn,
|
||||||
f"Error parsing DAG for Glue job. The script {script_path} is not a valid S3 path.",
|
f"Error parsing DAG for Glue job. The script {script_path} is not a valid S3 path.",
|
||||||
)
|
)
|
||||||
self.report.num_job_script_location_invalid += 1
|
self.report.num_job_script_location_invalid += 1
|
||||||
@ -338,8 +340,8 @@ class GlueSource(StatefulIngestionSourceBase):
|
|||||||
try:
|
try:
|
||||||
obj = self.s3_client.get_object(Bucket=bucket, Key=key)
|
obj = self.s3_client.get_object(Bucket=bucket, Key=key)
|
||||||
except botocore.exceptions.ClientError as e:
|
except botocore.exceptions.ClientError as e:
|
||||||
self.report.report_failure(
|
self.report_warning(
|
||||||
script_path,
|
flow_urn,
|
||||||
f"Unable to download DAG for Glue job from {script_path}, so job subtasks and lineage will be missing: {e}",
|
f"Unable to download DAG for Glue job from {script_path}, so job subtasks and lineage will be missing: {e}",
|
||||||
)
|
)
|
||||||
self.report.num_job_script_failed_download += 1
|
self.report.num_job_script_failed_download += 1
|
||||||
@ -353,8 +355,8 @@ class GlueSource(StatefulIngestionSourceBase):
|
|||||||
|
|
||||||
# sometimes the Python script can be user-modified and the script is not valid for graph extraction
|
# sometimes the Python script can be user-modified and the script is not valid for graph extraction
|
||||||
except self.glue_client.exceptions.InvalidInputException as e:
|
except self.glue_client.exceptions.InvalidInputException as e:
|
||||||
self.report.report_warning(
|
self.report_warning(
|
||||||
script_path,
|
flow_urn,
|
||||||
f"Error parsing DAG for Glue job. The script {script_path} cannot be processed by Glue (this usually occurs when it has been user-modified): {e}",
|
f"Error parsing DAG for Glue job. The script {script_path} cannot be processed by Glue (this usually occurs when it has been user-modified): {e}",
|
||||||
)
|
)
|
||||||
self.report.num_job_script_failed_parsing += 1
|
self.report.num_job_script_failed_parsing += 1
|
||||||
@ -425,9 +427,9 @@ class GlueSource(StatefulIngestionSourceBase):
|
|||||||
s3_uri = self.get_s3_uri(node_args)
|
s3_uri = self.get_s3_uri(node_args)
|
||||||
|
|
||||||
if s3_uri is None:
|
if s3_uri is None:
|
||||||
self.report.report_warning(
|
self.report_warning(
|
||||||
f"{node['Nodetype']}-{node['Id']}",
|
flow_urn,
|
||||||
f"Could not find script path for job {node['Nodetype']}-{node['Id']} in flow {flow_urn}. Skipping",
|
f"Could not find script path for job {node['NodeType']}-{node['Id']} in flow {flow_urn}. Skipping",
|
||||||
)
|
)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@ -461,12 +463,9 @@ class GlueSource(StatefulIngestionSourceBase):
|
|||||||
|
|
||||||
else:
|
else:
|
||||||
if self.source_config.ignore_unsupported_connectors:
|
if self.source_config.ignore_unsupported_connectors:
|
||||||
logger.debug(
|
self.report_warning(
|
||||||
f"Unrecognized node {node['Nodetype']}-{node['Id']} in flow {flow_urn}. Args : {node_args}",
|
flow_urn,
|
||||||
)
|
f"Unrecognized node {node['NodeType']}-{node['Id']} in flow {flow_urn}. Args: {node_args} Skipping",
|
||||||
self.report.report_warning(
|
|
||||||
f"{node['Nodetype']}-{node['Id']}",
|
|
||||||
f"Unrecognized node {node['Nodetype']}-{node['Id']} in flow {flow_urn}. Skipping",
|
|
||||||
)
|
)
|
||||||
return None
|
return None
|
||||||
else:
|
else:
|
||||||
@ -528,7 +527,7 @@ class GlueSource(StatefulIngestionSourceBase):
|
|||||||
# Source and Target for some edges is not available
|
# Source and Target for some edges is not available
|
||||||
# in nodes. this may lead to broken edge in lineage.
|
# in nodes. this may lead to broken edge in lineage.
|
||||||
if source_node is None or target_node is None:
|
if source_node is None or target_node is None:
|
||||||
self.report.report_warning(
|
self.report_warning(
|
||||||
flow_urn,
|
flow_urn,
|
||||||
f"Unrecognized source or target node in edge: {edge}. Skipping."
|
f"Unrecognized source or target node in edge: {edge}. Skipping."
|
||||||
"This may lead to missing lineage",
|
"This may lead to missing lineage",
|
||||||
@ -992,7 +991,7 @@ class GlueSource(StatefulIngestionSourceBase):
|
|||||||
dag: Optional[Dict[str, Any]] = None
|
dag: Optional[Dict[str, Any]] = None
|
||||||
|
|
||||||
if job_script_location is not None:
|
if job_script_location is not None:
|
||||||
dag = self.get_dataflow_graph(job_script_location)
|
dag = self.get_dataflow_graph(job_script_location, flow_urn)
|
||||||
else:
|
else:
|
||||||
self.report.num_job_script_location_missing += 1
|
self.report.num_job_script_location_missing += 1
|
||||||
|
|
||||||
@ -1213,3 +1212,7 @@ class GlueSource(StatefulIngestionSourceBase):
|
|||||||
|
|
||||||
def get_report(self):
|
def get_report(self):
|
||||||
return self.report
|
return self.report
|
||||||
|
|
||||||
|
def report_warning(self, key: str, reason: str) -> None:
|
||||||
|
logger.warning(f"{key}: {reason}")
|
||||||
|
self.report.report_warning(key, reason)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user