mirror of
https://github.com/datahub-project/datahub.git
synced 2025-09-22 15:43:21 +00:00
fix(ingest): detect malformed Glue S3 script paths (#3037)
This commit is contained in:
parent
f1bea875b2
commit
3d0534be4c
@ -111,6 +111,20 @@ class GlueSource(Source):
|
|||||||
S3 path to the job's Python script.
|
S3 path to the job's Python script.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# handle a bug in AWS where script path has duplicate prefixes
|
||||||
|
if script_path.lower().startswith("s3://s3://"):
|
||||||
|
script_path = script_path[5:]
|
||||||
|
|
||||||
|
# catch any other cases where the script path is invalid
|
||||||
|
if not script_path.startswith("s3://"):
|
||||||
|
|
||||||
|
self.report.report_warning(
|
||||||
|
script_path,
|
||||||
|
f"Error parsing DAG for Glue job. The script {script_path} is not a valid S3 path.",
|
||||||
|
)
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
# extract the script's bucket and key
|
# extract the script's bucket and key
|
||||||
url = urlparse(script_path, allow_fragments=False)
|
url = urlparse(script_path, allow_fragments=False)
|
||||||
bucket = url.netloc
|
bucket = url.netloc
|
||||||
|
Loading…
x
Reference in New Issue
Block a user