Commit
•
a49eb28
1
Parent(s):
26e1596
Add verifyToken field to verify evaluation results are produced by Hugging Face's automatic model evaluator
Browse filesBeep boop, I am a bot from Hugging Face's automatic model evaluator 👋! We've added a new `verifyToken` field to your evaluation results to verify that they are produced by the model evaluator. Accept this PR to ensure that your results remain listed as **verified** on the [Hub leaderboard](https://huggingface.co/spaces/autoevaluate/leaderboards).
README.md
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
---
|
2 |
language: en
|
3 |
-
|
4 |
tags:
|
5 |
- text-generation
|
6 |
- opt
|
7 |
-
|
8 |
commercial: false
|
9 |
model-index:
|
10 |
- name: inverse-scaling/opt-6.7b_eval
|
@@ -18,14 +18,16 @@ model-index:
|
|
18 |
config: inverse-scaling--NeQA
|
19 |
split: train
|
20 |
metrics:
|
21 |
-
-
|
22 |
-
type: accuracy
|
23 |
value: 0.54
|
|
|
24 |
verified: true
|
25 |
-
|
26 |
-
|
27 |
value: 0.740270353704691
|
|
|
28 |
verified: true
|
|
|
29 |
- task:
|
30 |
type: zero-shot-classification
|
31 |
name: Zero-Shot Text Classification
|
@@ -35,14 +37,16 @@ model-index:
|
|
35 |
config: inverse-scaling--quote-repetition
|
36 |
split: train
|
37 |
metrics:
|
38 |
-
-
|
39 |
-
type: accuracy
|
40 |
value: 0.86
|
|
|
41 |
verified: true
|
42 |
-
|
43 |
-
|
44 |
value: 0.22016974209290055
|
|
|
45 |
verified: true
|
|
|
46 |
- task:
|
47 |
type: zero-shot-classification
|
48 |
name: Zero-Shot Text Classification
|
@@ -52,14 +56,16 @@ model-index:
|
|
52 |
config: inverse-scaling--redefine-math
|
53 |
split: train
|
54 |
metrics:
|
55 |
-
-
|
56 |
-
type: accuracy
|
57 |
value: 0.6733333333333333
|
|
|
58 |
verified: true
|
59 |
-
|
60 |
-
|
61 |
value: 0.638882334422734
|
|
|
62 |
verified: true
|
|
|
63 |
- task:
|
64 |
type: zero-shot-classification
|
65 |
name: Zero-Shot Text Classification
|
@@ -69,14 +75,16 @@ model-index:
|
|
69 |
config: inverse-scaling--hindsight-neglect-10shot
|
70 |
split: train
|
71 |
metrics:
|
72 |
-
-
|
73 |
-
type: accuracy
|
74 |
value: 0.4666666666666667
|
|
|
75 |
verified: true
|
76 |
-
|
77 |
-
|
78 |
value: 0.7550815605928027
|
|
|
79 |
verified: true
|
|
|
80 |
- task:
|
81 |
type: zero-shot-classification
|
82 |
name: Zero-Shot Text Classification
|
@@ -86,14 +94,16 @@ model-index:
|
|
86 |
config: mathemakitten--winobias_antistereotype_test_cot_v3
|
87 |
split: test
|
88 |
metrics:
|
89 |
-
-
|
90 |
-
type: accuracy
|
91 |
value: 0.3737864077669903
|
|
|
92 |
verified: true
|
93 |
-
|
94 |
-
|
95 |
value: 1.2823651640752816
|
|
|
96 |
verified: true
|
|
|
97 |
- task:
|
98 |
type: zero-shot-classification
|
99 |
name: Zero-Shot Text Classification
|
@@ -103,14 +113,16 @@ model-index:
|
|
103 |
config: mathemakitten--winobias_antistereotype_test_v5
|
104 |
split: test
|
105 |
metrics:
|
106 |
-
-
|
107 |
-
type: accuracy
|
108 |
value: 0.3859223300970874
|
|
|
109 |
verified: true
|
110 |
-
|
111 |
-
|
112 |
value: 1.295986159347468
|
|
|
113 |
verified: true
|
|
|
114 |
---
|
115 |
|
116 |
# OPT : Open Pre-trained Transformer Language Models
|
|
|
1 |
---
|
2 |
language: en
|
3 |
+
license: other
|
4 |
tags:
|
5 |
- text-generation
|
6 |
- opt
|
7 |
+
inference: false
|
8 |
commercial: false
|
9 |
model-index:
|
10 |
- name: inverse-scaling/opt-6.7b_eval
|
|
|
18 |
config: inverse-scaling--NeQA
|
19 |
split: train
|
20 |
metrics:
|
21 |
+
- type: accuracy
|
|
|
22 |
value: 0.54
|
23 |
+
name: Accuracy
|
24 |
verified: true
|
25 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiOWE0ZjA1NDg0YjYzNTZhYjIwZDRhNDcxYjNiYTQ1YTY2YWQ1YTUzZmIyMTlmYTljMGJiNjAyNzc0YTNiYWFhNCIsInZlcnNpb24iOjF9.eWcHC6dzOjnuF-mT6Z2G8Z1xCoow6iViE1Qy-VNKMSzIcJZcvgkZI0NhU50YMi4tOOZN2k92MATtbXtcZR5yCQ
|
26 |
+
- type: loss
|
27 |
value: 0.740270353704691
|
28 |
+
name: Loss
|
29 |
verified: true
|
30 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNTY1M2ZlYTYzMjZhNTVmZjgyMWJiYmYxZGM2NjQxYjdlZDI3ZmZmODAxMTI5N2RmMjMyNzYzMWUxZTViNjM5YSIsInZlcnNpb24iOjF9.G3DqNVlNLP5uAmzOKa9hsxBBiSWXbrDesp3hIlQomYe2YsbWbYF0WssbFi7DXEu5hmj6yCN2E-olbEjzwZ2eBQ
|
31 |
- task:
|
32 |
type: zero-shot-classification
|
33 |
name: Zero-Shot Text Classification
|
|
|
37 |
config: inverse-scaling--quote-repetition
|
38 |
split: train
|
39 |
metrics:
|
40 |
+
- type: accuracy
|
|
|
41 |
value: 0.86
|
42 |
+
name: Accuracy
|
43 |
verified: true
|
44 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMzA0ZjJlZGUwOGNhNmE3MmMzMDY1YTM4ZjYzNDUwYjk1MTU2MmVhMGQzYjI3YzI0ZGMzMWFkODIyZWE5Mjk2ZCIsInZlcnNpb24iOjF9.pc3tzIMBv05ZBixkmRojnIzsdHLvYhZX_sJnNZ_t_oo61DrTUhYQYq3xikx8S5rIr5sWrLTbxWn3rAAXme0KAQ
|
45 |
+
- type: loss
|
46 |
value: 0.22016974209290055
|
47 |
+
name: Loss
|
48 |
verified: true
|
49 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiOWE5Njk4OWQ5Mzg3ODljMWM3MzhiNjBhNTk5ZGJjMDU3ZTJlZDZjZjBjYzdkMmMxZTJlMTJkMjg1OTA5ZWQxNSIsInZlcnNpb24iOjF9.NubehOGlzEURMYuTkvqzXmf1ENadam7uZ62YA1nv1DjAivd8VySmpLl-QnnZLcDbhduMZbRp4lMQbWG9Z26LAg
|
50 |
- task:
|
51 |
type: zero-shot-classification
|
52 |
name: Zero-Shot Text Classification
|
|
|
56 |
config: inverse-scaling--redefine-math
|
57 |
split: train
|
58 |
metrics:
|
59 |
+
- type: accuracy
|
|
|
60 |
value: 0.6733333333333333
|
61 |
+
name: Accuracy
|
62 |
verified: true
|
63 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiZTAyYjAwMzgyMDc2MmU1NDM0MmMyOWUyYzc3YTYxNzkyYzk2ZGZiMTk5NjlkODUwNDQ1NzFlMTU0Y2Y0ZGZlYSIsInZlcnNpb24iOjF9.VMxtPMY9qKk4eSjAlDb_jfg1nsf8eq1Oz5WnfUSC-VkXREQ6-f1qBooJc617t6U5apIbHnaW9XP3LTYrGzvUDQ
|
64 |
+
- type: loss
|
65 |
value: 0.638882334422734
|
66 |
+
name: Loss
|
67 |
verified: true
|
68 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNDYwNWVmZGM5ZmM2MmY0Y2IzYzNhOTNkZmU2YTA2MWZlZTU1ZGI2OTM1YzJiNjViNzMwMjA0Y2Q0ODBlYTgzOSIsInZlcnNpb24iOjF9.YJujmeEYbf4ZOJ0w_Q24d7t5ksKST35aweNJSk6UYuCiV6uSIJhJUz_w8iFwo9ykM-EOXamL87dftlkyawgtBw
|
69 |
- task:
|
70 |
type: zero-shot-classification
|
71 |
name: Zero-Shot Text Classification
|
|
|
75 |
config: inverse-scaling--hindsight-neglect-10shot
|
76 |
split: train
|
77 |
metrics:
|
78 |
+
- type: accuracy
|
|
|
79 |
value: 0.4666666666666667
|
80 |
+
name: Accuracy
|
81 |
verified: true
|
82 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiYjVlMzdkYTUxZmI1ZDBmMDdjM2VhMjA1ZTg0MGYzMzU0NzFlN2JmNDY2NDc0MmVlMjI3MDg1Y2Q5MDRhYWU1ZCIsInZlcnNpb24iOjF9.Z01fwvvUFNOWeUWexSpdmAUPYJIsYUV-eb1ybSEjQ3cb9ow2STMVgxp0PqaDJMVWKg30xIkARahsg8ci6QpbBw
|
83 |
+
- type: loss
|
84 |
value: 0.7550815605928027
|
85 |
+
name: Loss
|
86 |
verified: true
|
87 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiZWQyNmYwZjdkMTM1YjIxYzEwMmUwMWVlZTRjODQwYWExNDQ2MTgzYzA0ZTlkODcxYWIxMzdmNWE0NDdmNzcxYiIsInZlcnNpb24iOjF9.TtX2cKfatVMFX09l6DiuKFEa1vlDJUBPohSLmdQGh8QCTf-DrylUqARU8Ni5cSiSlidFF4n4IWIL0vQ941n6DQ
|
88 |
- task:
|
89 |
type: zero-shot-classification
|
90 |
name: Zero-Shot Text Classification
|
|
|
94 |
config: mathemakitten--winobias_antistereotype_test_cot_v3
|
95 |
split: test
|
96 |
metrics:
|
97 |
+
- type: accuracy
|
|
|
98 |
value: 0.3737864077669903
|
99 |
+
name: Accuracy
|
100 |
verified: true
|
101 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiOTliMzU1NGIxMTUxYTM4NzVlYzI4YzljMDYyOWM1ZDdkMWMyNjIwOWQ4OGNhZWE3ZTljZGI0ZTA2ZWU3MjVmMiIsInZlcnNpb24iOjF9.dTlDpXOusgl6m3dn7XwfKeaxaVfU1VnEHWFeh7yBNSq5TyHPWbixlNumOWDjc-y9v8g0oWBXqWhT0KMQDaGVCQ
|
102 |
+
- type: loss
|
103 |
value: 1.2823651640752816
|
104 |
+
name: Loss
|
105 |
verified: true
|
106 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMTRjZmU4YWNkNGEwMjNlMGEyYjA1ZjhjOGE3OTZiZTJlYjMyMjViMTYyYWQ1YTdlMmM1ZjU5NTFhOWU3NzM1OCIsInZlcnNpb24iOjF9.yGmOME0MrX0moaU5c2WYf8H7CFfSGsPuQ2qp9MCi_es5RQRWoCHeCcR5oLQ4RATmVpYdzocPxqrbeZfqxVIOAQ
|
107 |
- task:
|
108 |
type: zero-shot-classification
|
109 |
name: Zero-Shot Text Classification
|
|
|
113 |
config: mathemakitten--winobias_antistereotype_test_v5
|
114 |
split: test
|
115 |
metrics:
|
116 |
+
- type: accuracy
|
|
|
117 |
value: 0.3859223300970874
|
118 |
+
name: Accuracy
|
119 |
verified: true
|
120 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNzZlZjIzNDM0Mzk5MmRlMTFlOWVlZjY3MDFmY2NhZjlkYWNmMWQ2MjdhOTg3YTg0OTI1YjY5YmYxMTc4YjYyOCIsInZlcnNpb24iOjF9.nCFVShWbHuHFKEdK5INjQSfLI9KQUNQZqqjqYCw_HVHSW0QHLIXdAb7_GDZJhCUTJ-JkBVCJFtEliA2Zw9GjAw
|
121 |
+
- type: loss
|
122 |
value: 1.295986159347468
|
123 |
+
name: Loss
|
124 |
verified: true
|
125 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiYWE3ZjhmYzM3NjRhMjc3OGU5NWQzY2Q1NzA2ZDBjN2Q1YmZkYzdiMDBhMmY1ZDM5NmU2YzQ2ZGZmZmYyMzg5NiIsInZlcnNpb24iOjF9.2UzIpqw83YQdGOqTKKP7ywqpNdgCDkR36lhkbja6qFsKyQctcg4vZgLXfMSfufWf1G_9iXqY8r-JiZadMdK3Dg
|
126 |
---
|
127 |
|
128 |
# OPT : Open Pre-trained Transformer Language Models
|