From cf6673cecf7f7ac1ad82b16ce9c8e9bf65bb7d57 Mon Sep 17 00:00:00 2001 From: Jake Poznanski Date: Thu, 30 Jan 2025 13:42:42 -0800 Subject: [PATCH] Pipeline fixes --- olmocr/pipeline.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/olmocr/pipeline.py b/olmocr/pipeline.py index d8408bd..ea554ea 100644 --- a/olmocr/pipeline.py +++ b/olmocr/pipeline.py @@ -701,10 +701,21 @@ def submit_beaker_job(args): b.secret.write(f"{owner}-WEKA_ACCESS_KEY_ID", os.environ.get("WEKA_ACCESS_KEY_ID", ""), args.beaker_workspace) b.secret.write(f"{owner}-WEKA_SECRET_ACCESS_KEY", os.environ.get("WEKA_SECRET_ACCESS_KEY", ""), args.beaker_workspace) - b.secret.write(f"{owner}-AWS_CREDENTIALS_FILE", open(os.path.join(os.path.expanduser("~"), ".aws", "credentials")).read(), args.beaker_workspace) + b.secret.write( + f"{owner}-AWS_CREDENTIALS_FILE", + open(os.path.join(os.path.expanduser("~"), ".aws", "credentials")).read(), + args.beaker_workspace, + ) + + env_var_secrets = [ + EnvVar(name="WEKA_ACCESS_KEY_ID", secret=f"{owner}-WEKA_ACCESS_KEY_ID"), + EnvVar(name="WEKA_SECRET_ACCESS_KEY", secret=f"{owner}-WEKA_SECRET_ACCESS_KEY"), + EnvVar(name="AWS_CREDENTIALS_FILE", secret=f"{owner}-AWS_CREDENTIALS_FILE"), + ] try: b.secret.get("OE_DATA_GCS_SA_KEY", args.beaker_workspace) + env_var_secrets.append(EnvVar(name="GOOGLE_APPLICATION_CREDENTIALS_FILE", secret="OE_DATA_GCS_SA_KEY")) except SecretNotFound: print("Input the olmo-gcs SA key if you would like to load weights from gcs (end with a double newline):") lines = [] @@ -717,6 +728,7 @@ def submit_beaker_job(args): gcs_sa_key = "\n".join(lines[:-1]).strip() # Remove the last empty line if gcs_sa_key: b.secret.write("OE_DATA_GCS_SA_KEY", gcs_sa_key, args.beaker_workspace) + env_var_secrets.append(EnvVar(name="GOOGLE_APPLICATION_CREDENTIALS_FILE", secret="OE_DATA_GCS_SA_KEY")) # Create the experiment spec experiment_spec = ExperimentSpec( @@ -734,14 +746,7 @@ def submit_beaker_job(args): ), image=ImageSource(beaker=beaker_image), command=["python", "-m", "olmocr.beakerpipeline"] + args_list, - env_vars=[ - EnvVar(name="BEAKER_JOB_NAME", value=task_name), - EnvVar(name="OWNER", value=owner), - EnvVar(name="WEKA_ACCESS_KEY_ID", secret=f"{owner}-WEKA_ACCESS_KEY_ID"), - EnvVar(name="WEKA_SECRET_ACCESS_KEY", secret=f"{owner}-WEKA_SECRET_ACCESS_KEY"), - EnvVar(name="AWS_CREDENTIALS_FILE", secret=f"{owner}-AWS_CREDENTIALS_FILE"), - EnvVar(name="GOOGLE_APPLICATION_CREDENTIALS_FILE", secret="OE_DATA_GCS_SA_KEY"), - ], + env_vars=[EnvVar(name="BEAKER_JOB_NAME", value=task_name), EnvVar(name="OWNER", value=owner)] + env_var_secrets, resources=TaskResources(gpu_count=1), constraints=Constraints(cluster=args.beaker_cluster if isinstance(args.beaker_cluster, list) else [args.beaker_cluster]), result=ResultSpec(path="/noop-results"),