Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
pminervini
commited on
Commit
•
8e3d8c1
1
Parent(s):
3be882c
update
Browse files- src/backend/run_eval_suite.py +2 -0
- src/backend/tasks/faithdial/faithdial.yaml +0 -1
- src/backend/tasks/faithdial/faithdial_v2.yaml +0 -1
- src/backend/tasks/fever/fever10.yaml +0 -1
- src/backend/tasks/fever/fever11.yaml +0 -1
- src/backend/tasks/halueval/halueval_dialogue.yaml +0 -2
- src/backend/tasks/halueval/halueval_qa.yaml +0 -2
- src/backend/tasks/halueval/halueval_summarization.yaml +0 -2
- src/backend/tasks/truefalse/truefalse.yaml +0 -1
- src/backend/tasks/xsum/xsum.yaml +0 -2
src/backend/run_eval_suite.py
CHANGED
@@ -19,6 +19,8 @@ def run_evaluation(eval_request: EvalRequest, task_names, num_fewshot, batch_siz
|
|
19 |
# include_task_folder("src/backend/tasks/")
|
20 |
# initialize_tasks('INFO')
|
21 |
|
|
|
|
|
22 |
task_manager = TaskManager(include_path="./src/backend/tasks/")
|
23 |
# task_manager.initialize_tasks('INFO')
|
24 |
|
|
|
19 |
# include_task_folder("src/backend/tasks/")
|
20 |
# initialize_tasks('INFO')
|
21 |
|
22 |
+
print(f"Allocating task manager for: {task_names}")
|
23 |
+
|
24 |
task_manager = TaskManager(include_path="./src/backend/tasks/")
|
25 |
# task_manager.initialize_tasks('INFO')
|
26 |
|
src/backend/tasks/faithdial/faithdial.yaml
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
group: faithdial
|
2 |
task: faithdial_hallu
|
3 |
dataset_path: McGill-NLP/FaithDial
|
4 |
training_split: train
|
|
|
|
|
1 |
task: faithdial_hallu
|
2 |
dataset_path: McGill-NLP/FaithDial
|
3 |
training_split: train
|
src/backend/tasks/faithdial/faithdial_v2.yaml
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
group: faithdial
|
2 |
task: faithdial_hallu_v2
|
3 |
dataset_path: McGill-NLP/FaithDial
|
4 |
training_split: train
|
|
|
|
|
1 |
task: faithdial_hallu_v2
|
2 |
dataset_path: McGill-NLP/FaithDial
|
3 |
training_split: train
|
src/backend/tasks/fever/fever10.yaml
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
group: fever
|
2 |
task: fever10
|
3 |
dataset_path: fever
|
4 |
dataset_name: v1.0
|
|
|
|
|
1 |
task: fever10
|
2 |
dataset_path: fever
|
3 |
dataset_name: v1.0
|
src/backend/tasks/fever/fever11.yaml
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
group: fever
|
2 |
task: fever11
|
3 |
dataset_path: pminervini/hl-fever
|
4 |
dataset_name: v1.0
|
|
|
|
|
1 |
task: fever11
|
2 |
dataset_path: pminervini/hl-fever
|
3 |
dataset_name: v1.0
|
src/backend/tasks/halueval/halueval_dialogue.yaml
CHANGED
@@ -1,5 +1,3 @@
|
|
1 |
-
group:
|
2 |
-
- halueval
|
3 |
task: halueval_dialogue
|
4 |
dataset_path: pminervini/HaluEval
|
5 |
dataset_name: dialogue_samples
|
|
|
|
|
|
|
1 |
task: halueval_dialogue
|
2 |
dataset_path: pminervini/HaluEval
|
3 |
dataset_name: dialogue_samples
|
src/backend/tasks/halueval/halueval_qa.yaml
CHANGED
@@ -1,5 +1,3 @@
|
|
1 |
-
group:
|
2 |
-
- halueval
|
3 |
task: halueval_qa
|
4 |
dataset_path: pminervini/HaluEval
|
5 |
dataset_name: qa_samples
|
|
|
|
|
|
|
1 |
task: halueval_qa
|
2 |
dataset_path: pminervini/HaluEval
|
3 |
dataset_name: qa_samples
|
src/backend/tasks/halueval/halueval_summarization.yaml
CHANGED
@@ -1,5 +1,3 @@
|
|
1 |
-
group:
|
2 |
-
- halueval
|
3 |
task: halueval_summarization
|
4 |
dataset_path: pminervini/HaluEval
|
5 |
dataset_name: summarization_samples
|
|
|
|
|
|
|
1 |
task: halueval_summarization
|
2 |
dataset_path: pminervini/HaluEval
|
3 |
dataset_name: summarization_samples
|
src/backend/tasks/truefalse/truefalse.yaml
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
group: truefalse
|
2 |
task: truefalse_cieacf
|
3 |
dataset_path: pminervini/true-false
|
4 |
dataset_name: default
|
|
|
|
|
1 |
task: truefalse_cieacf
|
2 |
dataset_path: pminervini/true-false
|
3 |
dataset_name: default
|
src/backend/tasks/xsum/xsum.yaml
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
task:
|
2 |
-
- xsum
|
|
|
|
|
|