Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -3,7 +3,6 @@ import json
|
|
3 |
import datetime
|
4 |
from email.utils import parseaddr
|
5 |
import numpy as np
|
6 |
-
|
7 |
import gradio as gr
|
8 |
import pandas as pd
|
9 |
from datasets import load_dataset
|
@@ -55,8 +54,8 @@ def format_dataframe(df):
|
|
55 |
if "URL" in df.columns:
|
56 |
df["Model Name"] = df.apply(lambda row: f"[{row['Model Name']}]({row['URL']})", axis=1)
|
57 |
df = df.drop(columns=["URL"])
|
58 |
-
df = df.rename(columns={"Model Family": "Base Model"})
|
59 |
-
df = df[["Model Name", "Accuracy", "Accuracy (easy)", "Accuracy (medium)", "Accuracy (hard)", "
|
60 |
return df
|
61 |
|
62 |
eval_dataframe_test = get_dataframe_from_results(eval_results=eval_results, split="test")
|
@@ -141,7 +140,7 @@ def add_new_eval(
|
|
141 |
}) + "\n"
|
142 |
)
|
143 |
|
144 |
-
all_scores.append({"score": score, "has_ans": has_ans})
|
145 |
|
146 |
scores += score
|
147 |
num_questions += 1
|
@@ -182,6 +181,7 @@ def add_new_eval(
|
|
182 |
"EM": em
|
183 |
}
|
184 |
eval_results["test"] = eval_results["test"].add_item(eval_entry)
|
|
|
185 |
eval_results.push_to_hub(RESULTS_DATASET, config_name=YEAR_VERSION, token=TOKEN)
|
186 |
|
187 |
return format_log(
|
@@ -283,4 +283,4 @@ with demo:
|
|
283 |
scheduler = BackgroundScheduler()
|
284 |
scheduler.add_job(restart_space, "interval", seconds=3600)
|
285 |
scheduler.start()
|
286 |
-
demo.launch(debug=True)
|
|
|
3 |
import datetime
|
4 |
from email.utils import parseaddr
|
5 |
import numpy as np
|
|
|
6 |
import gradio as gr
|
7 |
import pandas as pd
|
8 |
from datasets import load_dataset
|
|
|
54 |
if "URL" in df.columns:
|
55 |
df["Model Name"] = df.apply(lambda row: f"[{row['Model Name']}]({row['URL']})", axis=1)
|
56 |
df = df.drop(columns=["URL"])
|
57 |
+
#df = df.rename(columns={"Model Family": "Base Model"})
|
58 |
+
df = df[["Model Name", "Accuracy", "Answer rate", "Precision", "EM", "Accuracy (easy)", "Accuracy (medium)", "Accuracy (hard)", "Base Model", "Organization"]]
|
59 |
return df
|
60 |
|
61 |
eval_dataframe_test = get_dataframe_from_results(eval_results=eval_results, split="test")
|
|
|
140 |
}) + "\n"
|
141 |
)
|
142 |
|
143 |
+
all_scores.append({"score": score, "has_ans": has_ans, "model_answer": answer, 'id': task_id})
|
144 |
|
145 |
scores += score
|
146 |
num_questions += 1
|
|
|
181 |
"EM": em
|
182 |
}
|
183 |
eval_results["test"] = eval_results["test"].add_item(eval_entry)
|
184 |
+
|
185 |
eval_results.push_to_hub(RESULTS_DATASET, config_name=YEAR_VERSION, token=TOKEN)
|
186 |
|
187 |
return format_log(
|
|
|
283 |
scheduler = BackgroundScheduler()
|
284 |
scheduler.add_job(restart_space, "interval", seconds=3600)
|
285 |
scheduler.start()
|
286 |
+
demo.launch(debug=True)
|