autoevaluator HF staff commited on
Commit
1c4c1ca
1 Parent(s): 5946fbf

Add verifyToken field to verify evaluation results are produced by Hugging Face's automatic model evaluator

Browse files

Beep boop, I am a bot from Hugging Face's automatic model evaluator 👋! We've added a new `verifyToken` field to your evaluation results to verify that they are produced by the model evaluator. Accept this PR to ensure that your results remain listed as **verified** on the [Hub leaderboard](https://huggingface.co/spaces/autoevaluate/leaderboards).

Files changed (1) hide show
  1. README.md +38 -26
README.md CHANGED
@@ -1,9 +1,9 @@
1
  ---
2
  language: en
3
- inference: false
4
  tags:
5
  - text-generation
6
- license: other
7
  commercial: false
8
  model-index:
9
  - name: inverse-scaling/opt-350m_eval
@@ -17,14 +17,16 @@ model-index:
17
  config: inverse-scaling--NeQA
18
  split: train
19
  metrics:
20
- - name: Accuracy
21
- type: accuracy
22
  value: 0.4666666666666667
 
23
  verified: true
24
- - name: Loss
25
- type: loss
26
  value: 0.9192380222864449
 
27
  verified: true
 
28
  - task:
29
  type: zero-shot-classification
30
  name: Zero-Shot Text Classification
@@ -34,14 +36,16 @@ model-index:
34
  config: inverse-scaling--quote-repetition
35
  split: train
36
  metrics:
37
- - name: Accuracy
38
- type: accuracy
39
  value: 0.9633333333333334
 
40
  verified: true
41
- - name: Loss
42
- type: loss
43
  value: 0.03444786100047819
 
44
  verified: true
 
45
  - task:
46
  type: zero-shot-classification
47
  name: Zero-Shot Text Classification
@@ -51,14 +55,16 @@ model-index:
51
  config: inverse-scaling--redefine-math
52
  split: train
53
  metrics:
54
- - name: Accuracy
55
- type: accuracy
56
  value: 0.6877777777777778
 
57
  verified: true
58
- - name: Loss
59
- type: loss
60
  value: 0.6016371671193176
 
61
  verified: true
 
62
  - task:
63
  type: zero-shot-classification
64
  name: Zero-Shot Text Classification
@@ -68,14 +74,16 @@ model-index:
68
  config: inverse-scaling--hindsight-neglect-10shot
69
  split: train
70
  metrics:
71
- - name: Accuracy
72
- type: accuracy
73
  value: 0.4380952380952381
 
74
  verified: true
75
- - name: Loss
76
- type: loss
77
  value: 0.8774787804555325
 
78
  verified: true
 
79
  - task:
80
  type: zero-shot-classification
81
  name: Zero-Shot Text Classification
@@ -85,14 +93,16 @@ model-index:
85
  config: mathemakitten--winobias_antistereotype_test_cot_v3
86
  split: test
87
  metrics:
88
- - name: Accuracy
89
- type: accuracy
90
  value: 0.44660194174757284
 
91
  verified: true
92
- - name: Loss
93
- type: loss
94
  value: 0.9301078982717057
 
95
  verified: true
 
96
  - task:
97
  type: zero-shot-classification
98
  name: Zero-Shot Text Classification
@@ -102,14 +112,16 @@ model-index:
102
  config: mathemakitten--winobias_antistereotype_test_v5
103
  split: test
104
  metrics:
105
- - name: Accuracy
106
- type: accuracy
107
  value: 0.4368932038834951
 
108
  verified: true
109
- - name: Loss
110
- type: loss
111
  value: 0.9175132444057151
 
112
  verified: true
 
113
  ---
114
 
115
 
 
1
  ---
2
  language: en
3
+ license: other
4
  tags:
5
  - text-generation
6
+ inference: false
7
  commercial: false
8
  model-index:
9
  - name: inverse-scaling/opt-350m_eval
 
17
  config: inverse-scaling--NeQA
18
  split: train
19
  metrics:
20
+ - type: accuracy
 
21
  value: 0.4666666666666667
22
+ name: Accuracy
23
  verified: true
24
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiYzczZDk5ODcxOWEyYTg2NDc5MzUxYTUzY2IxMzEzYTQ4Mjc2NjY1YzZkNDNmODg1Y2JhMzEzNzNiNDE0MzVlMCIsInZlcnNpb24iOjF9._V6n5pjfCnCFhUIN5rOfSj4enIrb3uo7hDBgnwUsnVxJ2vUWdZiSXR29_ZtGBlJ8b78gfEVQPr9JkZ2vWH-kDw
25
+ - type: loss
26
  value: 0.9192380222864449
27
+ name: Loss
28
  verified: true
29
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNWMyMzFmNjM4MDhiMGRiM2FlOTU2NjJmY2FiNmMyYTFjZGM5MjMyNzU1MTcyYjhhOGI3MjQ1N2ZkZWNhNTNjOCIsInZlcnNpb24iOjF9.zCtWmvufSPCQpO28PXuyd4tuA_m1WjoNKivlxMW9Z8BgFmhvTObC9FtbS0kkJ6hS9wS2NHLi8-gHyQqjCuCJAA
30
  - task:
31
  type: zero-shot-classification
32
  name: Zero-Shot Text Classification
 
36
  config: inverse-scaling--quote-repetition
37
  split: train
38
  metrics:
39
+ - type: accuracy
 
40
  value: 0.9633333333333334
41
+ name: Accuracy
42
  verified: true
43
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiYWQ4NGY0N2UzZTI0MDRkYjQwODhjNWJhMjg3OTExYzI2NmVkMGVmMWJjYjMxNDZiZTYwZDdmMzhhMTJiNTM5ZCIsInZlcnNpb24iOjF9.aLrG02yUjaEbIoarFb13RKohrd2v9EhjefJ8Hp8RbK7cFtgZSbbybZ4q3_tmZEjZW96CCeHTldVjiuCfKM36CQ
44
+ - type: loss
45
  value: 0.03444786100047819
46
+ name: Loss
47
  verified: true
48
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNzgwODhkYzcyNTZhN2MxOTVhYmQ3YjdhZTM3ZTZhNzRhNDRlMzBjNzJiNTllNWU4MWM4M2E0NzljMjViOTUwNiIsInZlcnNpb24iOjF9.raav8NSrkoH1d7veZGaQxapvVB9J7s9E5dPqyMkZ2dWxWHoqWCbT1Rwt_FpTbkd8g2qSlnQBGF94W1Mo_tzPAw
49
  - task:
50
  type: zero-shot-classification
51
  name: Zero-Shot Text Classification
 
55
  config: inverse-scaling--redefine-math
56
  split: train
57
  metrics:
58
+ - type: accuracy
 
59
  value: 0.6877777777777778
60
+ name: Accuracy
61
  verified: true
62
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiOTVkNTE3NTZjYTUzYzViMGUzYjhlMGNjMjZlNDE4MGE5N2NmZDQyYWQzOTg1N2JmMTY1ODg0Y2UyYzYxNzQ3NCIsInZlcnNpb24iOjF9.Z8xYedPo4bCpRjO7soiqpoQX_JusfqLtDlUFl5rug7n-9BDPy8EQyCm37bKBAge0SosQQxMaPv04Q_doUhVlAw
63
+ - type: loss
64
  value: 0.6016371671193176
65
+ name: Loss
66
  verified: true
67
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMzQ1YmY4ODlmZjJlNTY1NWQ3ZTQ0MjM5MjBkYjg2MjZiNWEzODYzNDcwOWQ1ZWU3MmY2MjRjYmQ2ZDQ1NDk0NSIsInZlcnNpb24iOjF9.Rr-dawBCi_eof82m2928wQXvEWiyeuFGf2Zpk259vkmDI6Fkn3Pz_3bNzoGNNOUVoOKhDc9cUYjBE11tIv9tBA
68
  - task:
69
  type: zero-shot-classification
70
  name: Zero-Shot Text Classification
 
74
  config: inverse-scaling--hindsight-neglect-10shot
75
  split: train
76
  metrics:
77
+ - type: accuracy
 
78
  value: 0.4380952380952381
79
+ name: Accuracy
80
  verified: true
81
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiZDFmMWRmNjQyMWJkNzgxOTNkNDBiYjA0MjQ1NjE2NzM0ZjQ0Mzg0ZjkyYzlhMTdjMDZhNWY0NjU2NTAzYmFmYSIsInZlcnNpb24iOjF9.5VZ9gq1ldypfzpPYP3_Wv64rDlVO3jlJrnxK28qXDTcaHCcvF4YtYNry5ud8y9T9L1YrTVMaaPqLafavOHHlDQ
82
+ - type: loss
83
  value: 0.8774787804555325
84
+ name: Loss
85
  verified: true
86
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiYTU0YTY5ZWIxMzRhNzRlZDliMDNlZjcyM2FjOGRkZjVlNWNiM2I0MWNlZTFiNzdhMTUxMDU4YTY4YmJhZWU2NSIsInZlcnNpb24iOjF9.MletkNEfPZm3q3WPT3T7D_tO-zGxQj_opy9IPgPJmTxVpGRnZePdXl47U4LiWHPw4BrCrExIsPeBpeeJR9ZNBQ
87
  - task:
88
  type: zero-shot-classification
89
  name: Zero-Shot Text Classification
 
93
  config: mathemakitten--winobias_antistereotype_test_cot_v3
94
  split: test
95
  metrics:
96
+ - type: accuracy
 
97
  value: 0.44660194174757284
98
+ name: Accuracy
99
  verified: true
100
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiZjhkNzNlNjliYTk2NzhjNjkxNDcxNjFjZGY1N2VjNzMyN2JlNDgwNDU4YTQ3NGQ5NWQ3ZTJiMjU2MWRiYzI4MiIsInZlcnNpb24iOjF9.Ln1Bi_uUuFTmq-qBfrd7qcD_29fXC_5FTH5aenCuqmZ8TK_akoUbTxIj39FTxfFUmJtxnFgiyCcolTIOB9vgCA
101
+ - type: loss
102
  value: 0.9301078982717057
103
+ name: Loss
104
  verified: true
105
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMGUzNWRkN2RlMjA0NzNhMDhmZWMyOWVmYzk5YTZkZTYyMmRjYWRlNzMyOWYyMGYxYmI0MWJmYWNlYWFhOTliNyIsInZlcnNpb24iOjF9.m-Vjvm6yNYiShP08VEdT-XSVDUpC0Ko96F30YNtg047LE_Mx7UJ3bCSo1MnnGqQ6FIS1j4B2H1guJIvLyRMSAg
106
  - task:
107
  type: zero-shot-classification
108
  name: Zero-Shot Text Classification
 
112
  config: mathemakitten--winobias_antistereotype_test_v5
113
  split: test
114
  metrics:
115
+ - type: accuracy
 
116
  value: 0.4368932038834951
117
+ name: Accuracy
118
  verified: true
119
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiYjIyNmE2YjJhOThmNzFlZjE4ZjlkOTY4NzhjMTMyYmFiM2ExNDIwYzRjMGM4NDRiNzk4ZWIzMGNiMzIwYzA0NyIsInZlcnNpb24iOjF9.4iGtnHIrNkvivgWcihLTftRGZiHfBc2-UefBbX8st55HPXemb7A6IYKic96VN8bTBumEcb0PrSMYoSUsP6UFCQ
120
+ - type: loss
121
  value: 0.9175132444057151
122
+ name: Loss
123
  verified: true
124
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNTAzODg3MjM4YWUxODJjOTU5Y2I4MGIwMGEwMTAwMTdjMWZhZTk0NDllNDQ4NWRlODI0NjBiZGI2ZjNjNmUzNyIsInZlcnNpb24iOjF9.u8PyUlKCZw5QqYWeE5WFM2t8IWacQhyHU_jyMPZoK1PvhUVItH80CxKrkimSQNMaTwOPNd53szUesfRkP_yXDA
125
  ---
126
 
127