mirror of
https://github.com/datahub-project/datahub.git
synced 2025-09-22 07:28:46 +00:00
fix(ingest): detect malformed Glue S3 script paths (#3037)
This commit is contained in:
parent
f1bea875b2
commit
3d0534be4c
@ -111,6 +111,20 @@ class GlueSource(Source):
|
||||
S3 path to the job's Python script.
|
||||
"""
|
||||
|
||||
# handle a bug in AWS where script path has duplicate prefixes
|
||||
if script_path.lower().startswith("s3://s3://"):
|
||||
script_path = script_path[5:]
|
||||
|
||||
# catch any other cases where the script path is invalid
|
||||
if not script_path.startswith("s3://"):
|
||||
|
||||
self.report.report_warning(
|
||||
script_path,
|
||||
f"Error parsing DAG for Glue job. The script {script_path} is not a valid S3 path.",
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
# extract the script's bucket and key
|
||||
url = urlparse(script_path, allow_fragments=False)
|
||||
bucket = url.netloc
|
||||
|
Loading…
x
Reference in New Issue
Block a user