autoevaluator HF staff commited on
Commit
17fade8
1 Parent(s): 8d8ffc1

Add evaluation results on the default config and test split of xsum

Browse files

Beep boop, I am a bot from Hugging Face's automatic model evaluator 👋!\
Your model has been evaluated on the default config and test split of the [xsum](https://huggingface.co/datasets/xsum) dataset by

@zuzannad1

, using the predictions stored [here](https://huggingface.co/datasets/autoevaluate/autoeval-eval-xsum-default-7c65dc-60294145404).\
Accept this pull request to see the results displayed on the [Hub leaderboard](https://huggingface.co/spaces/autoevaluate/leaderboards?dataset=xsum).\
Evaluate your model on more datasets [here](https://huggingface.co/spaces/autoevaluate/model-evaluator?dataset=xsum).

Files changed (1) hide show
  1. README.md +54 -48
README.md CHANGED
@@ -14,29 +14,29 @@ model-index:
14
  config: samsum
15
  split: train
16
  metrics:
17
- - name: ROUGE-1
18
- type: rouge
19
  value: 21.8096
 
20
  verified: true
21
- - name: ROUGE-2
22
- type: rouge
23
  value: 4.2525
 
24
  verified: true
25
- - name: ROUGE-L
26
- type: rouge
27
  value: 17.4469
 
28
  verified: true
29
- - name: ROUGE-LSUM
30
- type: rouge
31
  value: 18.8907
 
32
  verified: true
33
- - name: loss
34
- type: loss
35
  value: 3.0317161083221436
 
36
  verified: true
37
- - name: gen_len
38
- type: gen_len
39
  value: 20.3122
 
40
  verified: true
41
  - task:
42
  type: summarization
@@ -47,30 +47,36 @@ model-index:
47
  config: default
48
  split: test
49
  metrics:
50
- - name: ROUGE-1
51
- type: rouge
52
- value: 46.8623
53
- verified: true
54
- - name: ROUGE-2
55
- type: rouge
56
- value: 24.4533
57
- verified: true
58
- - name: ROUGE-L
59
- type: rouge
60
- value: 39.0548
61
- verified: true
62
- - name: ROUGE-LSUM
63
- type: rouge
64
- value: 39.0994
65
- verified: true
66
- - name: loss
67
- type: loss
68
- value: 1.5717021226882935
69
- verified: true
70
- - name: gen_len
71
- type: gen_len
72
- value: 22.8821
73
- verified: true
 
 
 
 
 
 
74
  - task:
75
  type: summarization
76
  name: Summarization
@@ -80,29 +86,29 @@ model-index:
80
  config: 3.0.0
81
  split: test
82
  metrics:
83
- - name: ROUGE-1
84
- type: rouge
85
  value: 22.2062
 
86
  verified: true
87
- - name: ROUGE-2
88
- type: rouge
89
  value: 7.6701
 
90
  verified: true
91
- - name: ROUGE-L
92
- type: rouge
93
  value: 15.4046
 
94
  verified: true
95
- - name: ROUGE-LSUM
96
- type: rouge
97
  value: 19.2182
 
98
  verified: true
99
- - name: loss
100
- type: loss
101
  value: 2.681241273880005
 
102
  verified: true
103
- - name: gen_len
104
- type: gen_len
105
  value: 25.0234
 
106
  verified: true
107
  ---
108
 
 
14
  config: samsum
15
  split: train
16
  metrics:
17
+ - type: rouge
 
18
  value: 21.8096
19
+ name: ROUGE-1
20
  verified: true
21
+ - type: rouge
 
22
  value: 4.2525
23
+ name: ROUGE-2
24
  verified: true
25
+ - type: rouge
 
26
  value: 17.4469
27
+ name: ROUGE-L
28
  verified: true
29
+ - type: rouge
 
30
  value: 18.8907
31
+ name: ROUGE-LSUM
32
  verified: true
33
+ - type: loss
 
34
  value: 3.0317161083221436
35
+ name: loss
36
  verified: true
37
+ - type: gen_len
 
38
  value: 20.3122
39
+ name: gen_len
40
  verified: true
41
  - task:
42
  type: summarization
 
47
  config: default
48
  split: test
49
  metrics:
50
+ - type: rouge
51
+ value: 46.7782
52
+ name: ROUGE-1
53
+ verified: true
54
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiYzk4Njc5YTQyZDJhNWNmMWNiMDdmOGY3NGZkOTE5ODYxZWI1YzllYzVhZDBmZTdhMTUzYzBhYjg4NDExMDI0OCIsInZlcnNpb24iOjF9.FB6f5FsSE8JuwyPUC1usCF0GXFx4y7YnxNkkhu0xyuv1vG-8y2plnJqSfF30Jae1Bpb_6IGqtnCisuvC9_d_AA
55
+ - type: rouge
56
+ value: 24.3976
57
+ name: ROUGE-2
58
+ verified: true
59
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiYjg4ZTg0ZjRmNGFiMTY0MjVlNjBkOGI4NzhkYjE3M2YyMDhjOWY1MTVmMzBjMmQ4Y2ViNWQ3NGU0OGQzMmJhYiIsInZlcnNpb24iOjF9.DELSboK4-QhPB_JJvX9tBZDCMc73F-n7yqKUesEiAd7rMjPAc8RLJcO_1SBxLVc0w1Pxt84Z0V-Fz8Ee-LGwDg
60
+ - type: rouge
61
+ value: 38.9758
62
+ name: ROUGE-L
63
+ verified: true
64
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNzQzNWY4Y2YxZTZjOGM3YzdmNTYxMTc0ZDJmZjNjNzEyZTdlMzYzZTMyYTcyZDgwZGZiZjNmZWQ4MzA3Y2UwMiIsInZlcnNpb24iOjF9.tMfwcvdN558uEuSa9aUXDR06q0jPKy-6s3f1h8LkO9lc7JV5oy9SSnsDXQNALIyzh3FhmyScegEcXr0LLIwUBA
65
+ - type: rouge
66
+ value: 39.0386
67
+ name: ROUGE-LSUM
68
+ verified: true
69
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNTA3YjI3MWVmOWJjZDk1YzkyMDJlYzk0MjQyYzQ1MjZhMjI2YWQ3Y2Y2ZGZiNGJjOWFhOWU2NDNkMzQxMWQzZSIsInZlcnNpb24iOjF9._XvQukx6SpEEjOHf3ivplJ8YW5_Q7oj8mc1uu5YIJaXyK9yuf9HW1DhXFxYdUm_K_cAtSRa5PPCGeKkDJfTvDQ
70
+ - type: loss
71
+ value: 1.5713257789611816
72
+ name: loss
73
+ verified: true
74
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiODhhNDFkMjdhNmI0MDc4NWFkYjkzZTc2OGM5MTY4NGMwZDE0NWZhMTBmZmY5ZGMyMWU5NTY3MjFjZWZkZTdmYiIsInZlcnNpb24iOjF9.PJcC1UpQpfSz44f8mQN5gp5ZFbEbDtRPLzK5RoPjTirRJ4cDPxX88yLI3rDiUMZRdXitEaWqQpLkFqu-5g75Bw
75
+ - type: gen_len
76
+ value: 23.089
77
+ name: gen_len
78
+ verified: true
79
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiOGNlMDZmNjRjNWM1YTg0M2FmNDg4ZGE2OGMzYjc4MmE3MTk3YTQzNzM3ZmJmZmJhNDVlMGZlYWNiOGJmYmFlMSIsInZlcnNpb24iOjF9.w-ce3jWHW2dzLFaJe2R9hAiCvIdX-SIcrCe5ADTCDyBQwLrHOJf8-xFYLt9oE9EAlXJsbrhjlCMJbzFChNQTBg
80
  - task:
81
  type: summarization
82
  name: Summarization
 
86
  config: 3.0.0
87
  split: test
88
  metrics:
89
+ - type: rouge
 
90
  value: 22.2062
91
+ name: ROUGE-1
92
  verified: true
93
+ - type: rouge
 
94
  value: 7.6701
95
+ name: ROUGE-2
96
  verified: true
97
+ - type: rouge
 
98
  value: 15.4046
99
+ name: ROUGE-L
100
  verified: true
101
+ - type: rouge
 
102
  value: 19.2182
103
+ name: ROUGE-LSUM
104
  verified: true
105
+ - type: loss
 
106
  value: 2.681241273880005
107
+ name: loss
108
  verified: true
109
+ - type: gen_len
 
110
  value: 25.0234
111
+ name: gen_len
112
  verified: true
113
  ---
114