Add Model Evals
Browse files
README.md
CHANGED
@@ -1 +1,18 @@
|
|
1 |
-
wandb: https://wandb.ai/eleutherai/pythia-rlhf/runs/6y83ekqy?workspace=user-yongzx
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
wandb: https://wandb.ai/eleutherai/pythia-rlhf/runs/6y83ekqy?workspace=user-yongzx
|
2 |
+
|
3 |
+
Model Evals
|
4 |
+
| Task |Version|Filter| Metric |Value | |Stderr|
|
5 |
+
|--------------|-------|------|----------|-----:|---|-----:|
|
6 |
+
|arc_challenge |Yaml |none |acc |0.2526|± |0.0127|
|
7 |
+
| | |none |acc_norm |0.2773|± |0.0131|
|
8 |
+
|arc_easy |Yaml |none |acc |0.5791|± |0.0101|
|
9 |
+
| | |none |acc_norm |0.4912|± |0.0103|
|
10 |
+
|lambada_openai|Yaml |none |perplexity|7.0516|± |0.1979|
|
11 |
+
| | |none |acc |0.5684|± |0.0069|
|
12 |
+
|logiqa |Yaml |none |acc |0.2166|± |0.0162|
|
13 |
+
| | |none |acc_norm |0.2919|± |0.0178|
|
14 |
+
|piqa |Yaml |none |acc |0.7176|± |0.0105|
|
15 |
+
| | |none |acc_norm |0.6964|± |0.0107|
|
16 |
+
|sciq |Yaml |none |acc |0.8460|± |0.0114|
|
17 |
+
| | |none |acc_norm |0.7700|± |0.0133|
|
18 |
+
|winogrande |Yaml |none |acc |0.5399|± |0.0140|
|