hsaest commited on
Commit
6a1fa89
1 Parent(s): cd6ca15

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -70
app.py CHANGED
@@ -68,69 +68,24 @@ def load_line_json_data(filename):
68
  def add_new_eval(
69
  val_or_test: str,
70
  eval_mode: str,
71
- model: str,
72
- tooluse_strategy: str,
73
- planning_strategy: str,
74
- organization: str,
75
- mail: str,
76
  path_to_file: str,
77
  ):
78
- # Very basic email parsing
79
- _, parsed_mail = parseaddr(mail)
80
- if not "@" in parsed_mail:
81
- return format_warning("Please provide a valid email adress.")
82
-
83
  print("Adding new eval")
84
 
85
  if path_to_file is None:
86
  return format_warning("Please attach a file.")
87
 
88
- # Save submitted file
89
- api.upload_file(
90
- repo_id=RESULTS_DATASET,
91
- path_or_fileobj=path_to_file.name,
92
- path_in_repo=f"{organization}/{val_or_test}_{eval_mode}_{tooluse_strategy}_{planning_strategy}_raw_{datetime.datetime.today()}.jsonl",
93
- repo_type="dataset",
94
- token=TOKEN
95
- )
96
 
97
  # Compute score
98
  file_path = path_to_file.name
99
- result = eval_score(val_or_test,file_path=file_path,TOKEN=TOKEN)
100
- with open(f"scored/{organization}_{val_or_test}_{eval_mode}_{tooluse_strategy}_{planning_strategy}.jsonl", "w") as scored_file:
101
- scored_file.write(json.dumps(result) + "\n")
102
-
103
- # Save scored file
104
- api.upload_file(
105
- repo_id=RESULTS_DATASET,
106
- path_or_fileobj=f"scored/{organization}_{val_or_test}_{eval_mode}_{tooluse_strategy}_{planning_strategy}.jsonl",
107
- path_in_repo=f"{organization}/{model}/{val_or_test}_{eval_mode}_{tooluse_strategy}_{planning_strategy}_scored_{datetime.datetime.today()}.jsonl",
108
- repo_type="dataset",
109
- token=TOKEN
110
- )
111
-
112
- # Actual submission
113
- eval_entry = {
114
- "Model": model,
115
- "Tool-use Strategy": tooluse_strategy,
116
- "Planning Strategy": planning_strategy,
117
- "Organization": organization,
118
- "Mail": mail,
119
- "Delivery Rate": result['Delivery Rate'],
120
- "Commonsense Constraint Micro Pass Rate":result['Commonsense Constraint Micro Pass Rate'],
121
- "Commonsense Constraint Macro Pass Rate":result['Commonsense Constraint Macro Pass Rate'],
122
- "Hard Constraint Micro Pass Rate":result['Hard Constraint Micro Pass Rate'],
123
- "Hard Constraint Macro Pass Rate":result['Hard Constraint Macro Pass Rate'],
124
- "Final Pass Rate":result['Final Pass Rate']
125
- }
126
- eval_mode = eval_mode.replace('-','')
127
- eval_results[f'{val_or_test}_{eval_mode}'] = eval_results[f'{val_or_test}_{eval_mode}'].add_item(eval_entry)
128
 
129
- print(eval_results)
130
-
131
- eval_results.push_to_hub(RESULTS_DATASET, config_name = 'scores', token=TOKEN)
132
-
133
- return format_log(f"Model: {model} | Tool-use Strategy: {tooluse_strategy} | Planning Strategy: {planning_strategy} | submitted by {organization} successfully. \nPlease refresh the leaderboard, and wait a bit to see the score displayed (Validation ~2mins, Test ~7mins).")
134
 
135
 
136
  def refresh():
@@ -141,9 +96,6 @@ def refresh():
141
  eval_dataframe_test_soleplanning = get_dataframe_from_results(eval_results=eval_results, split="test",mode='soleplanning')
142
  return eval_dataframe_val_twostage, eval_dataframe_val_soleplanning, eval_dataframe_test_twostage, eval_dataframe_test_soleplanning
143
 
144
- # def upload_file(files):
145
- # file_paths = [file.name for file in files]
146
- # return file_paths
147
 
148
 
149
  demo = gr.Blocks()
@@ -185,14 +137,8 @@ with demo:
185
  with gr.Column():
186
  level_of_test = gr.Radio(["validation", "test"], value="validation", label="Split")
187
  eval_mode = gr.Radio(["two-stage", "sole-planning"], value="two-stage", label="Eval Mode")
188
- model = gr.Textbox(label="Foundation Model")
189
- tooluse_strategy = gr.Textbox(label="Tool-use Strategy")
190
- planning_strategy = gr.Textbox(label="Planning Strategy")
191
- with gr.Column():
192
- organization = gr.Textbox(label="Organization")
193
- mail = gr.Textbox(label="Contact email")
194
- file_output = gr.File()
195
-
196
 
197
  submit_button = gr.Button("Submit Eval")
198
  submission_result = gr.Markdown()
@@ -201,16 +147,12 @@ with demo:
201
  [
202
  level_of_test,
203
  eval_mode,
204
- model,
205
- tooluse_strategy,
206
- planning_strategy,
207
- organization,
208
- mail,
209
- file_output,
210
  ],
211
- submission_result,
212
  )
213
 
214
  demo.launch(debug=True)
215
 
216
 
 
 
68
  def add_new_eval(
69
  val_or_test: str,
70
  eval_mode: str,
 
 
 
 
 
71
  path_to_file: str,
72
  ):
 
 
 
 
 
73
  print("Adding new eval")
74
 
75
  if path_to_file is None:
76
  return format_warning("Please attach a file.")
77
 
 
 
 
 
 
 
 
 
78
 
79
  # Compute score
80
  file_path = path_to_file.name
81
+ result, detail_json = eval_score(val_or_test,file_path=file_path,TOKEN=TOKEN)
82
+ print(detail_json)
83
+ print(type(detail_json))
84
+ outputPath=os.path.join('.',datetime.now().strftime('%Y%m%d%H%M%S') + '.json')
85
+ with open(outputPath,'w') as w:
86
+ json.dump(detail_json,w)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
+ return format_log(f"{result}"), gr.File(label=f"Download the detailed constraint pass rate reports", value=outputPath, visible=True)
 
 
 
 
89
 
90
 
91
  def refresh():
 
96
  eval_dataframe_test_soleplanning = get_dataframe_from_results(eval_results=eval_results, split="test",mode='soleplanning')
97
  return eval_dataframe_val_twostage, eval_dataframe_val_soleplanning, eval_dataframe_test_twostage, eval_dataframe_test_soleplanning
98
 
 
 
 
99
 
100
 
101
  demo = gr.Blocks()
 
137
  with gr.Column():
138
  level_of_test = gr.Radio(["validation", "test"], value="validation", label="Split")
139
  eval_mode = gr.Radio(["two-stage", "sole-planning"], value="two-stage", label="Eval Mode")
140
+ file_input = gr.File(label="Upload file")
141
+ file_output = gr.File(label="Download the detailed constraint pass rate reports", visible=False)
 
 
 
 
 
 
142
 
143
  submit_button = gr.Button("Submit Eval")
144
  submission_result = gr.Markdown()
 
147
  [
148
  level_of_test,
149
  eval_mode,
150
+ file_input,
 
 
 
 
 
151
  ],
152
+ [submission_result, file_output]
153
  )
154
 
155
  demo.launch(debug=True)
156
 
157
 
158
+