autoevaluator HF staff commited on
Commit
3c50149
1 Parent(s): 2179ab8

Add evaluation results on the default config and test split of xsum

Browse files

Beep boop, I am a bot from Hugging Face's automatic model evaluator 👋!\
Your model has been evaluated on the default config and test split of the [xsum](https://huggingface.co/datasets/xsum) dataset by

@zuzannad1

, using the predictions stored [here](https://huggingface.co/datasets/autoevaluate/autoeval-eval-xsum-default-7c65dc-60294145402).\
Accept this pull request to see the results displayed on the [Hub leaderboard](https://huggingface.co/spaces/autoevaluate/leaderboards?dataset=xsum).\
Evaluate your model on more datasets [here](https://huggingface.co/spaces/autoevaluate/model-evaluator?dataset=xsum).

Files changed (1) hide show
  1. README.md +66 -60
README.md CHANGED
@@ -1,9 +1,9 @@
1
  ---
2
- tags:
3
- - summarization
4
  language:
5
  - en
6
  license: mit
 
 
7
  model-index:
8
  - name: facebook/bart-large-xsum
9
  results:
@@ -16,29 +16,29 @@ model-index:
16
  config: 3.0.0
17
  split: test
18
  metrics:
19
- - name: ROUGE-1
20
- type: rouge
21
  value: 25.2697
 
22
  verified: true
23
- - name: ROUGE-2
24
- type: rouge
25
  value: 7.6638
 
26
  verified: true
27
- - name: ROUGE-L
28
- type: rouge
29
  value: 17.1808
 
30
  verified: true
31
- - name: ROUGE-LSUM
32
- type: rouge
33
  value: 21.7933
 
34
  verified: true
35
- - name: loss
36
- type: loss
37
  value: 3.5042972564697266
 
38
  verified: true
39
- - name: gen_len
40
- type: gen_len
41
  value: 27.4462
 
42
  verified: true
43
  - task:
44
  type: summarization
@@ -49,30 +49,36 @@ model-index:
49
  config: default
50
  split: test
51
  metrics:
52
- - name: ROUGE-1
53
- type: rouge
54
- value: 45.4525
55
- verified: true
56
- - name: ROUGE-2
57
- type: rouge
58
- value: 22.3455
59
- verified: true
60
- - name: ROUGE-L
61
- type: rouge
62
- value: 37.2302
63
- verified: true
64
- - name: ROUGE-LSUM
65
- type: rouge
66
- value: 37.2323
67
- verified: true
68
- - name: loss
69
- type: loss
70
- value: 2.3128726482391357
71
- verified: true
72
- - name: gen_len
73
- type: gen_len
 
 
 
 
74
  value: 25.5435
 
75
  verified: true
 
76
  - task:
77
  type: summarization
78
  name: Summarization
@@ -82,29 +88,29 @@ model-index:
82
  config: samsum
83
  split: train
84
  metrics:
85
- - name: ROUGE-1
86
- type: rouge
87
  value: 24.7852
 
88
  verified: true
89
- - name: ROUGE-2
90
- type: rouge
91
  value: 5.2533
 
92
  verified: true
93
- - name: ROUGE-L
94
- type: rouge
95
  value: 18.6792
 
96
  verified: true
97
- - name: ROUGE-LSUM
98
- type: rouge
99
  value: 20.629
 
100
  verified: true
101
- - name: loss
102
- type: loss
103
  value: 3.746837854385376
 
104
  verified: true
105
- - name: gen_len
106
- type: gen_len
107
  value: 23.1206
 
108
  verified: true
109
  - task:
110
  type: summarization
@@ -115,29 +121,29 @@ model-index:
115
  config: samsum
116
  split: test
117
  metrics:
118
- - name: ROUGE-1
119
- type: rouge
120
  value: 24.9158
 
121
  verified: true
122
- - name: ROUGE-2
123
- type: rouge
124
  value: 5.5837
 
125
  verified: true
126
- - name: ROUGE-L
127
- type: rouge
128
  value: 18.8935
 
129
  verified: true
130
- - name: ROUGE-LSUM
131
- type: rouge
132
  value: 20.76
 
133
  verified: true
134
- - name: loss
135
- type: loss
136
  value: 3.775235891342163
 
137
  verified: true
138
- - name: gen_len
139
- type: gen_len
140
  value: 23.0928
 
141
  verified: true
142
  ---
143
  ### Bart model finetuned on xsum
 
1
  ---
 
 
2
  language:
3
  - en
4
  license: mit
5
+ tags:
6
+ - summarization
7
  model-index:
8
  - name: facebook/bart-large-xsum
9
  results:
 
16
  config: 3.0.0
17
  split: test
18
  metrics:
19
+ - type: rouge
 
20
  value: 25.2697
21
+ name: ROUGE-1
22
  verified: true
23
+ - type: rouge
 
24
  value: 7.6638
25
+ name: ROUGE-2
26
  verified: true
27
+ - type: rouge
 
28
  value: 17.1808
29
+ name: ROUGE-L
30
  verified: true
31
+ - type: rouge
 
32
  value: 21.7933
33
+ name: ROUGE-LSUM
34
  verified: true
35
+ - type: loss
 
36
  value: 3.5042972564697266
37
+ name: loss
38
  verified: true
39
+ - type: gen_len
 
40
  value: 27.4462
41
+ name: gen_len
42
  verified: true
43
  - task:
44
  type: summarization
 
49
  config: default
50
  split: test
51
  metrics:
52
+ - type: rouge
53
+ value: 45.4419
54
+ name: ROUGE-1
55
+ verified: true
56
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNzkzZTM0ZTQ3OTJhODgxOGJhNWE0Y2QxMjcwMjBiODdlMzY3Yjk1MjQ1MDJlODQwZjNlZWUyMTEwNmYyYjUzYiIsInZlcnNpb24iOjF9.3_JITjNVx36poltYC02qpeuMiAyYu2AOrfMpCACYdX2_FTtSxxWeYkUJHEbBnuJQKgERHmJncLcQxbh4IlvXBA
57
+ - type: rouge
58
+ value: 22.3723
59
+ name: ROUGE-2
60
+ verified: true
61
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiZDQzZTkxMWM4ZDgyOTcyYmI1MDgyNTk1YmExNTJmNDAxYjViNjlmZjUzNjQ3ZDQwNzQ5ZWQ0ZDU1YTFjYTdlYiIsInZlcnNpb24iOjF9.sDIZfKrHyHDcuYxKNYcvrl-1eMrnwMtm8cA-xDxNP4hX7eEhNoQSAo_CLiPibibcHNMOjZX9fPCMULiGb0qnBw
62
+ - type: rouge
63
+ value: 37.2229
64
+ name: ROUGE-L
65
+ verified: true
66
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiOWM2MThmY2Y1YmY4OTEwYTZjZjRiNTYxMTY4NDVmMzBhOGM3YTlkZmExMDZmOGU0ZmM1MjMwM2RjOWU1YWQxNyIsInZlcnNpb24iOjF9.TNMvdtMB-5DHUth3HeMc9IilhlciZgPI8AW8RLWl5fWTDko8X0JRk-gTMW6b6cNcRUe2lmfZ9I_ZSd-ZvnjEBA
67
+ - type: rouge
68
+ value: 37.2239
69
+ name: ROUGE-LSUM
70
+ verified: true
71
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiYzZlMGI2NmRmMTEwMWFhNzAxOTRlMWQyNzA0YjI1ODE3ZjVjYmE1ODMzZTE1M2Y3MTk2YTcxZDYwN2U2NGI4NiIsInZlcnNpb24iOjF9.tcsUnGTDhbrOi1ZNusrI8Do4kt8BuNLD91fhbJwOsr9EvP6NlAAWnfoG1iBSNYKByMcC9Y31lwZlUOUBvnUdDQ
72
+ - type: loss
73
+ value: 2.3128323554992676
74
+ name: loss
75
+ verified: true
76
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNzQ3ODBjNWY2ODdhZTAwODUxY2ViMjBmNDY3N2EwMTUzYzdjOGJlZjZlMTI0ZjhkM2I0MGRjNjM5OWNhZDU3NSIsInZlcnNpb24iOjF9.IleOf5Dq60z64kqp6w5dyc6azb1egIARnnKch-x-hpKdQUdTMyPmUO34SpWzuhMt9bJQXRG5qNxb0mpr2-ZMCg
77
+ - type: gen_len
78
  value: 25.5435
79
+ name: gen_len
80
  verified: true
81
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiZTE3ODlkZDhhMTEwNTlhNzVjMWMxMGQyZDc0OTc0NWY0MDBlMzUzNGI3MGQwNmJmNzQ3NTQ5MjhhNDhiYTM5YSIsInZlcnNpb24iOjF9.e7nHzg3OH3zkWiCj3iZVAAQG6Zy0E16_MJzBEEyGTlSVuPGMziNfcjRvLD6WeY_6lXUonEwc9lur0X-qUvB7Aw
82
  - task:
83
  type: summarization
84
  name: Summarization
 
88
  config: samsum
89
  split: train
90
  metrics:
91
+ - type: rouge
 
92
  value: 24.7852
93
+ name: ROUGE-1
94
  verified: true
95
+ - type: rouge
 
96
  value: 5.2533
97
+ name: ROUGE-2
98
  verified: true
99
+ - type: rouge
 
100
  value: 18.6792
101
+ name: ROUGE-L
102
  verified: true
103
+ - type: rouge
 
104
  value: 20.629
105
+ name: ROUGE-LSUM
106
  verified: true
107
+ - type: loss
 
108
  value: 3.746837854385376
109
+ name: loss
110
  verified: true
111
+ - type: gen_len
 
112
  value: 23.1206
113
+ name: gen_len
114
  verified: true
115
  - task:
116
  type: summarization
 
121
  config: samsum
122
  split: test
123
  metrics:
124
+ - type: rouge
 
125
  value: 24.9158
126
+ name: ROUGE-1
127
  verified: true
128
+ - type: rouge
 
129
  value: 5.5837
130
+ name: ROUGE-2
131
  verified: true
132
+ - type: rouge
 
133
  value: 18.8935
134
+ name: ROUGE-L
135
  verified: true
136
+ - type: rouge
 
137
  value: 20.76
138
+ name: ROUGE-LSUM
139
  verified: true
140
+ - type: loss
 
141
  value: 3.775235891342163
142
+ name: loss
143
  verified: true
144
+ - type: gen_len
 
145
  value: 23.0928
146
+ name: gen_len
147
  verified: true
148
  ---
149
  ### Bart model finetuned on xsum