meg-huggingface commited on
Commit
7de9b42
·
1 Parent(s): 4846310

Addressing the need to update the requests dataset to COMPLETED, FAILED, etc. based on model *and task*.

Browse files
Files changed (1) hide show
  1. process_runs.py +13 -14
process_runs.py CHANGED
@@ -76,32 +76,32 @@ def update_requests(requests, all_attempts, failed_attempts):
76
  # run_dir="/runs/${experiment_name}/${backend_model}/${now}", where
77
  # ${backend_model} is ${organization}/${model_name}
78
  for line in all_attempts:
 
79
  print(f"Checking {line}")
80
  split_run_dir = line.strip().strip("/").split("/")
81
  print(f"Processing run directory {split_run_dir}")
82
  task = split_run_dir[1]
83
  print(f"Task is {task}")
 
 
 
 
 
84
  model = "/".join([split_run_dir[2], split_run_dir[3]])
85
- print(f"Model is is {model}")
86
  traceback_error = check_for_traceback(line)
87
  if traceback_error != "":
88
  print("Found a traceback error!")
89
  print(traceback_error)
90
- requests_df.loc[(requests_df["status"] == PENDING) & (requests_df["model"] == model) & (requests_df["task"] == task), ['status']] = FAILED
91
- requests_df.loc[(requests_df["status"] == PENDING) & (requests_df["model"] == model) & (requests_df["task"] == task), ['error_message']] = traceback_error
92
  elif line in failed_attempts:
93
  print(f"Job failed, but not sure why -- didn't find a traceback in {line}.")
94
- print(f"Setting {model}, {task}, status {PENDING} to {FAILED}.")
95
- print("DEBUG: PENDING status dataframe:")
96
- print(requests_df[(requests_df["status"] == PENDING)])
97
- print("DEBUG: model dataframe:")
98
- print(requests_df[(requests_df["model"] == model)])
99
- print("DEBUG: task dataframe:")
100
- print(requests_df[(requests_df["task"] == task)])
101
- print(requests_df[(requests_df["status"] == PENDING) & (requests_df["model"] == model) & (requests_df["task"] == task)])
102
- requests_df.loc[(requests_df["status"] == PENDING) & (requests_df["model"] == model) & (requests_df["task"] == task), ['status']] = FAILED
103
  else:
104
- requests_df.loc[(requests_df["status"] == PENDING) & (requests_df["model"] == model) & (requests_df["task"] == task), ['status']] = COMPLETED
105
  updated_dset = Dataset.from_pandas(requests_df)
106
  return updated_dset
107
 
@@ -116,7 +116,6 @@ if __name__ == '__main__':
116
  )
117
  # Update requests dataset based on whether things have failed or not.
118
  print(f"Examining the run directory for each model & task to determine if it {FAILED} or {COMPLETED}.")
119
- print(f"Setting the corresponding line in {REQUESTS_DSET} to {FAILED} or {COMPLETED} based on what's in the directory.")
120
  requests = load_dataset(f"{REQUESTS_DSET}", split="test", token=TOKEN)
121
  all_attempts = open(f"{args.attempts}", "r+").readlines()
122
  failed_attempts = open(f"{args.failed_attempts}", "r+").readlines()
 
76
  # run_dir="/runs/${experiment_name}/${backend_model}/${now}", where
77
  # ${backend_model} is ${organization}/${model_name}
78
  for line in all_attempts:
79
+ line = line.strip()
80
  print(f"Checking {line}")
81
  split_run_dir = line.strip().strip("/").split("/")
82
  print(f"Processing run directory {split_run_dir}")
83
  task = split_run_dir[1]
84
  print(f"Task is {task}")
85
+ # The naming of the optimum benchmark configs uses an underscore.
86
+ # The naming of the HF Api list models function uses a hyphen.
87
+ # We therefore need to adapt this task string name depending on
88
+ # which part of our pipeline we're talking to.
89
+ hyphenated_task_name = "-".join(task.split("_"))
90
  model = "/".join([split_run_dir[2], split_run_dir[3]])
91
+ print(f"Model is {model}")
92
  traceback_error = check_for_traceback(line)
93
  if traceback_error != "":
94
  print("Found a traceback error!")
95
  print(traceback_error)
96
+ requests_df.loc[(requests_df["status"] == PENDING) & (requests_df["model"] == model) & (requests_df["task"] == hyphenated_task_name), ['status']] = FAILED
97
+ requests_df.loc[(requests_df["status"] == PENDING) & (requests_df["model"] == model) & (requests_df["task"] == hyphenated_task_name), ['error_message']] = traceback_error
98
  elif line in failed_attempts:
99
  print(f"Job failed, but not sure why -- didn't find a traceback in {line}.")
100
+ print(f"Setting {model}, {hyphenated_task_name}, status {PENDING} to {FAILED}.")
101
+ print(requests_df[(requests_df["status"] == PENDING) & (requests_df["model"] == model) & (requests_df["task"] == hyphenated_task_name)])
102
+ requests_df.loc[(requests_df["status"] == PENDING) & (requests_df["model"] == model) & (requests_df["task"] == hyphenated_task_name), ['status']] = FAILED
 
 
 
 
 
 
103
  else:
104
+ requests_df.loc[(requests_df["status"] == PENDING) & (requests_df["model"] == model) & (requests_df["task"] == hyphenated_task_name), ['status']] = COMPLETED
105
  updated_dset = Dataset.from_pandas(requests_df)
106
  return updated_dset
107
 
 
116
  )
117
  # Update requests dataset based on whether things have failed or not.
118
  print(f"Examining the run directory for each model & task to determine if it {FAILED} or {COMPLETED}.")
 
119
  requests = load_dataset(f"{REQUESTS_DSET}", split="test", token=TOKEN)
120
  all_attempts = open(f"{args.attempts}", "r+").readlines()
121
  failed_attempts = open(f"{args.failed_attempts}", "r+").readlines()