Commit
•
e455aeb
1
Parent(s):
c4455b5
Add verifyToken field to verify evaluation results are produced by Hugging Face's automatic model evaluator
Browse filesBeep boop, I am a bot from Hugging Face's automatic model evaluator 👋! We've added a new `verifyToken` field to your evaluation results to verify that they are produced by the model evaluator. Accept this PR to ensure that your results remain listed as **verified** on the [Hub leaderboard](https://huggingface.co/spaces/autoevaluate/leaderboards).
README.md
CHANGED
@@ -14,30 +14,36 @@ model-index:
|
|
14 |
config: samsum
|
15 |
split: train
|
16 |
metrics:
|
17 |
-
-
|
18 |
-
type: rouge
|
19 |
value: 21.8096
|
|
|
20 |
verified: true
|
21 |
-
|
22 |
-
|
23 |
value: 4.2525
|
|
|
24 |
verified: true
|
25 |
-
|
26 |
-
|
27 |
value: 17.4469
|
|
|
28 |
verified: true
|
29 |
-
|
30 |
-
|
31 |
value: 18.8907
|
|
|
32 |
verified: true
|
33 |
-
|
34 |
-
|
35 |
value: 3.0317161083221436
|
|
|
36 |
verified: true
|
37 |
-
|
38 |
-
|
39 |
value: 20.3122
|
|
|
40 |
verified: true
|
|
|
41 |
- task:
|
42 |
type: summarization
|
43 |
name: Summarization
|
@@ -47,30 +53,36 @@ model-index:
|
|
47 |
config: default
|
48 |
split: test
|
49 |
metrics:
|
50 |
-
-
|
51 |
-
type: rouge
|
52 |
value: 46.8623
|
|
|
53 |
verified: true
|
54 |
-
|
55 |
-
|
56 |
value: 24.4533
|
|
|
57 |
verified: true
|
58 |
-
|
59 |
-
|
60 |
value: 39.0548
|
|
|
61 |
verified: true
|
62 |
-
|
63 |
-
|
64 |
value: 39.0994
|
|
|
65 |
verified: true
|
66 |
-
|
67 |
-
|
68 |
value: 1.5717021226882935
|
|
|
69 |
verified: true
|
70 |
-
|
71 |
-
|
72 |
value: 22.8821
|
|
|
73 |
verified: true
|
|
|
74 |
- task:
|
75 |
type: summarization
|
76 |
name: Summarization
|
@@ -80,30 +92,36 @@ model-index:
|
|
80 |
config: 3.0.0
|
81 |
split: test
|
82 |
metrics:
|
83 |
-
-
|
84 |
-
type: rouge
|
85 |
value: 22.2062
|
|
|
86 |
verified: true
|
87 |
-
|
88 |
-
|
89 |
value: 7.6701
|
|
|
90 |
verified: true
|
91 |
-
|
92 |
-
|
93 |
value: 15.4046
|
|
|
94 |
verified: true
|
95 |
-
|
96 |
-
|
97 |
value: 19.2182
|
|
|
98 |
verified: true
|
99 |
-
|
100 |
-
|
101 |
value: 2.681241273880005
|
|
|
102 |
verified: true
|
103 |
-
|
104 |
-
|
105 |
value: 25.0234
|
|
|
106 |
verified: true
|
|
|
107 |
---
|
108 |
|
109 |
### Pegasus Models
|
|
|
14 |
config: samsum
|
15 |
split: train
|
16 |
metrics:
|
17 |
+
- type: rouge
|
|
|
18 |
value: 21.8096
|
19 |
+
name: ROUGE-1
|
20 |
verified: true
|
21 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNTM5ZTVlMTk2M2JiM2Y3ZTZkOWQyODQwZTQ1Nzc3ZDJiZDc5NzhhNTViNGRlYTI2MjkyMDBjMDMzNGU3MjFhYiIsInZlcnNpb24iOjF9.cETaihI9hH44D3QZh_yXTjEhOgumUlKr5wZqIZ_IlnE_jg0_q_KKWM986acH1K8nfHfJbDH_uZbQqqJUEuGXDQ
|
22 |
+
- type: rouge
|
23 |
value: 4.2525
|
24 |
+
name: ROUGE-2
|
25 |
verified: true
|
26 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNDU2YTMwYmRmMGJlMzhlZDczNzczZWM4ZjBjMzE5ZmM0ZmIwYzA2MWQ2MTBhMTUwODM4YjdhYjFmZmY1OGNjOSIsInZlcnNpb24iOjF9.PAtZSns7ubNapkT-P3PzfvWUGET1dHN8AnsjYbYoaSZJ8AUl-5d3rCutt0bEAFffHFCpq23YT3Go2KOaE16PDg
|
27 |
+
- type: rouge
|
28 |
value: 17.4469
|
29 |
+
name: ROUGE-L
|
30 |
verified: true
|
31 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiM2VmMTRlNmIzYzlhNzY0Y2ZlYzAyOGJhNDQwNjk1YTdhZTZjNDU2ZmJhZDc5MTlhYjgyYjg4NWE0ZTA1NWExMiIsInZlcnNpb24iOjF9.CznDprjeWu6XWUwgFjGmsV2Z1W7xXuy9jDRpH5NMFlZCHAkqzoZSlMbUTwPnJF51LVJrS_WXJaXfAxKEX_LCCQ
|
32 |
+
- type: rouge
|
33 |
value: 18.8907
|
34 |
+
name: ROUGE-LSUM
|
35 |
verified: true
|
36 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiM2I2ZDZjMzUzNzFjNzdmMGVmODJmNzY0ZGIxY2Q3MWEwNThmODNjN2FhZTE4N2M4NjlhZjU1ZTBiODI0NThiYiIsInZlcnNpb24iOjF9.JgWVNm7FZ0r6Kn-cGAfzYszUEuYooLxbxSxSDQUvqiZVuI7tjoDp-dP8-DkgTH0yH2sqNSGBNsBnXNo63YP-AQ
|
37 |
+
- type: loss
|
38 |
value: 3.0317161083221436
|
39 |
+
name: loss
|
40 |
verified: true
|
41 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiYzg3MjRmYzRjYmIzYjk3NTZhZGM2Mzc3NjNkNTIzY2IwYmEwYWZlMGE4NTUwNjQ5YWFkNTdhMjRjNjIzZmFhYSIsInZlcnNpb24iOjF9.nns3EN07Q66USOa_TIx465wYQajhg1DcTQjU47EeKayi1uxbroU6fi90I-9k52xCGIr8RmeT2W7eNkkfFW2sCw
|
42 |
+
- type: gen_len
|
43 |
value: 20.3122
|
44 |
+
name: gen_len
|
45 |
verified: true
|
46 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMTgxZWNhZjM4ZjE1YTY2MDBhN2ZmYWNiODU1YjNjMjQxMjVhMWYwYzNkMWU4MDM0NTYzN2IxMjIyYjNhZGIxZiIsInZlcnNpb24iOjF9.yMAFwoI4EebWho-7Jy9vAFFUHNytiAAPxr_2a99jL_d72sKjZdVQdU0Ag3fg_dRnVhqDeTVEIqwIkAJfIr8PDA
|
47 |
- task:
|
48 |
type: summarization
|
49 |
name: Summarization
|
|
|
53 |
config: default
|
54 |
split: test
|
55 |
metrics:
|
56 |
+
- type: rouge
|
|
|
57 |
value: 46.8623
|
58 |
+
name: ROUGE-1
|
59 |
verified: true
|
60 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiM2NjMjg2Y2Q4NjNlNmZmNmZkODlkOWU1OTg1YTcwODVmNDBiZmY4MmIwYmIyNjFiMzRkMDhkNzM2ZjFiMDYwYyIsInZlcnNpb24iOjF9.aunhAPHEDSc_V855wQiRALNEsOyD_ZTbFvi_Mu-CeZYMXO05G3ERYD6mUvdtaIP7X_Yo0vPq0y2uTQ9nVVmEDA
|
61 |
+
- type: rouge
|
62 |
value: 24.4533
|
63 |
+
name: ROUGE-2
|
64 |
verified: true
|
65 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNjNiZTI0ODUyNGJlOTEwMGU5YjcyYzA2MzA3NThlY2ZjZTUzYTc1NThlNWZmYTNkYmQxYmI4NGUzODNlMTIzNCIsInZlcnNpb24iOjF9.8NPuJo_8Gx8EW66O835sxyYz0pteEYJ2jwwF3R24gadj0cdi7IM1LGUIPyS0IRGmYYWtqu6ybKx8xClL0TO-Ag
|
66 |
+
- type: rouge
|
67 |
value: 39.0548
|
68 |
+
name: ROUGE-L
|
69 |
verified: true
|
70 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiY2EyZDk4MWUyNDQ1MDA3N2QxZTMwOWM4YTJiMzM2NzZhYTE5YmE0NTYyMjg0M2NhMmQzZGYxNjQ4YjIxNDYyNyIsInZlcnNpb24iOjF9.4zORYWUO7fMkwR_HkDnd-g17Ar_MOJF_8T2rLHujBIW1HnCrNIzXHSIp1vgvn4zh6fO0tnjZDQvCD9yEfzrtDw
|
71 |
+
- type: rouge
|
72 |
value: 39.0994
|
73 |
+
name: ROUGE-LSUM
|
74 |
verified: true
|
75 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiZDAxY2MxZTYzNWRhNWM3YTc2NjE5NzRmMzEwYTgyNDI0NmRlYjIyOTJiNzJjZWYzYThjZjczNTQzOTQzODZmNSIsInZlcnNpb24iOjF9.q3avkS3k-ljplHJbGl4nKhhH0coxe5ybHlwAJy_I1mtLoAW12k_iafy8kpXwQCmiKjqE6bgmFNPs0kkOo6JDAQ
|
76 |
+
- type: loss
|
77 |
value: 1.5717021226882935
|
78 |
+
name: loss
|
79 |
verified: true
|
80 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNmJlMzZiNjExNGU4ZjliOTgwZGZiZDFjNGI2N2NjYWZhNmEyMTI3YmZiYWRmNGE1MmZiNjcwZDZmOWFmNmMwZiIsInZlcnNpb24iOjF9.T5ecj6AYQ8eK-IDNqqtvkOWFvW2Q-f5kswto85P1fX_UMy9rgr2IwBRud-FAcQMnKktHK1ld94mjnDO9LcUcCQ
|
81 |
+
- type: gen_len
|
82 |
value: 22.8821
|
83 |
+
name: gen_len
|
84 |
verified: true
|
85 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiYzNiN2ExMDJhYjNmMDU2NmIxMTgwZjM5NGNmMzIxZGVmNGVkNzJmNGU4OWEwNjI1NmRmOTYyYjM5NTA1YWE0YSIsInZlcnNpb24iOjF9.pI2ulnlSyqN-EUcl9v5xbKiK-z6FDKeMpIjwqhw0WvjMf623WnqzSJZo-foFbf6DM7yFhMScgVbOOC3JEsKNCQ
|
86 |
- task:
|
87 |
type: summarization
|
88 |
name: Summarization
|
|
|
92 |
config: 3.0.0
|
93 |
split: test
|
94 |
metrics:
|
95 |
+
- type: rouge
|
|
|
96 |
value: 22.2062
|
97 |
+
name: ROUGE-1
|
98 |
verified: true
|
99 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNjFiNGZkNDExNjUwMjhkNzY3M2NmMDgxNGVhMzZmN2E4YjIzN2Y0ZjMxNDUyOGU2NGYwNDNlOTkzMWI1OTYwNSIsInZlcnNpb24iOjF9.F_6E8QID6kjOGEFCFaUKiBmgUTtSDTQKf4F-L2NDLSDX7drF6HvTMtm9oXUeP_P9cSUNX5OvDUSJPSlfl-C0Cw
|
100 |
+
- type: rouge
|
101 |
value: 7.6701
|
102 |
+
name: ROUGE-2
|
103 |
verified: true
|
104 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMzRhMzVmYjcyNTRlZmVhMzM4NjY3ODczZjc2M2MwM2Y5MTRhNjYxZjY4YTQ3ZmU2YzA2MGMxMzE2YjZiNGMxZSIsInZlcnNpb24iOjF9.U7IY76kNrcYgGh-Uy3H9DtaSRugShwK6Lp-I9Ov6g_r3X16Pi2cI8ZS68BaQiRvowXOKIi9R5iJLLUK3C4J2Cg
|
105 |
+
- type: rouge
|
106 |
value: 15.4046
|
107 |
+
name: ROUGE-L
|
108 |
verified: true
|
109 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMzZjNDUxYWJjNWU0ZGY4M2I2NTBmYTBmODhiMDJiNjIzNzViNGY4YjVlMTM5Y2FlNjUyMWFkYTljYjRhM2NlZCIsInZlcnNpb24iOjF9.tyvSIMTYmFFsD6zzo_CD2tYIZiovH5Ll26qGO78kUZEvYRWfxPMhUwtrylaOsaNdTdwDz-M-TF6IQxpSXl8eBw
|
110 |
+
- type: rouge
|
111 |
value: 19.2182
|
112 |
+
name: ROUGE-LSUM
|
113 |
verified: true
|
114 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiY2Q3N2ZiYjVlZTY1OTYzZWRiY2RmZDkwMjM0YzIxYTUyYmJlMWQ2NDBhNTVjYjM0NGNmNWFjOWJmYzc4NDkwYiIsInZlcnNpb24iOjF9.LV3UbhyIL4EP1n5KBqAc29GMh7IIxo5tFbzPBV9Mc8Ew08d6zMs1U5aZwc0an7ClBP20B-haYHwk8JNDBZHcCg
|
115 |
+
- type: loss
|
116 |
value: 2.681241273880005
|
117 |
+
name: loss
|
118 |
verified: true
|
119 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiY2FjNzRjNGY0ZDkzZDM1NDIzNTQwYmQyNzE3NDQzNTBlMjIxZTFhYmZlZTRmYWI2YjRiYTA5NDYyZTM4NjM3MSIsInZlcnNpb24iOjF9.h1WwEclg4cWWYlCH8O6ifsRvOVFZ2uZj6nk7_m89rHllmOhUtVxCeSTPdNEynyv9B_SKUWPLaS_5Uj55FpF2AA
|
120 |
+
- type: gen_len
|
121 |
value: 25.0234
|
122 |
+
name: gen_len
|
123 |
verified: true
|
124 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNjAxM2Y1ZDgzMTc3ZmVlOWI5YzYyMmExMzc3Y2JkYzJjZDRkODY5MWE4ZGQzZjFmNTFjNDZlMTcyZmQ3MWM5MyIsInZlcnNpb24iOjF9.6XKMtrC2IqCcphlUI8aJp673IRfsnjJhOJrvftxyA_k3YxSDI_3Iyo_mURpXAVo4llb52lB638bRHZv1tvdvDg
|
125 |
---
|
126 |
|
127 |
### Pegasus Models
|