Ori commited on
Commit
9ae6f61
1 Parent(s): b3bee8a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -3,7 +3,6 @@ import json
3
  import datetime
4
  from email.utils import parseaddr
5
  import numpy as np
6
-
7
  import gradio as gr
8
  import pandas as pd
9
  from datasets import load_dataset
@@ -55,8 +54,8 @@ def format_dataframe(df):
55
  if "URL" in df.columns:
56
  df["Model Name"] = df.apply(lambda row: f"[{row['Model Name']}]({row['URL']})", axis=1)
57
  df = df.drop(columns=["URL"])
58
- df = df.rename(columns={"Model Family": "Base Model"})
59
- df = df[["Model Name", "Accuracy", "Accuracy (easy)", "Accuracy (medium)", "Accuracy (hard)", "Answer rate", "Precision", "EM", "Base Model", "Organization"]]
60
  return df
61
 
62
  eval_dataframe_test = get_dataframe_from_results(eval_results=eval_results, split="test")
@@ -141,7 +140,7 @@ def add_new_eval(
141
  }) + "\n"
142
  )
143
 
144
- all_scores.append({"score": score, "has_ans": has_ans})
145
 
146
  scores += score
147
  num_questions += 1
@@ -182,6 +181,7 @@ def add_new_eval(
182
  "EM": em
183
  }
184
  eval_results["test"] = eval_results["test"].add_item(eval_entry)
 
185
  eval_results.push_to_hub(RESULTS_DATASET, config_name=YEAR_VERSION, token=TOKEN)
186
 
187
  return format_log(
@@ -283,4 +283,4 @@ with demo:
283
  scheduler = BackgroundScheduler()
284
  scheduler.add_job(restart_space, "interval", seconds=3600)
285
  scheduler.start()
286
- demo.launch(debug=True)
 
3
  import datetime
4
  from email.utils import parseaddr
5
  import numpy as np
 
6
  import gradio as gr
7
  import pandas as pd
8
  from datasets import load_dataset
 
54
  if "URL" in df.columns:
55
  df["Model Name"] = df.apply(lambda row: f"[{row['Model Name']}]({row['URL']})", axis=1)
56
  df = df.drop(columns=["URL"])
57
+ #df = df.rename(columns={"Model Family": "Base Model"})
58
+ df = df[["Model Name", "Accuracy", "Answer rate", "Precision", "EM", "Accuracy (easy)", "Accuracy (medium)", "Accuracy (hard)", "Base Model", "Organization"]]
59
  return df
60
 
61
  eval_dataframe_test = get_dataframe_from_results(eval_results=eval_results, split="test")
 
140
  }) + "\n"
141
  )
142
 
143
+ all_scores.append({"score": score, "has_ans": has_ans, "model_answer": answer, 'id': task_id})
144
 
145
  scores += score
146
  num_questions += 1
 
181
  "EM": em
182
  }
183
  eval_results["test"] = eval_results["test"].add_item(eval_entry)
184
+
185
  eval_results.push_to_hub(RESULTS_DATASET, config_name=YEAR_VERSION, token=TOKEN)
186
 
187
  return format_log(
 
283
  scheduler = BackgroundScheduler()
284
  scheduler.add_job(restart_space, "interval", seconds=3600)
285
  scheduler.start()
286
+ demo.launch(debug=True)