Safetensors
Romanian
llama
Eval Results
mihaimasala commited on
Commit
82e7308
1 Parent(s): f3657e4

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +46 -46
README.md CHANGED
@@ -35,7 +35,7 @@ model-index:
35
  metrics:
36
  - name: Average accuracy
37
  type: accuracy
38
- value: 52.73
39
  - task:
40
  type: text-generation
41
  dataset:
@@ -62,7 +62,7 @@ model-index:
62
  metrics:
63
  - name: Average accuracy
64
  type: accuracy
65
- value: 65.84
66
  - task:
67
  type: text-generation
68
  dataset:
@@ -89,7 +89,7 @@ model-index:
89
  metrics:
90
  - name: Average accuracy
91
  type: accuracy
92
- value: 47.81
93
  - task:
94
  type: text-generation
95
  dataset:
@@ -98,7 +98,7 @@ model-index:
98
  metrics:
99
  - name: Average macro-f1
100
  type: macro-f1
101
- value: 0.00
102
  - task:
103
  type: text-generation
104
  dataset:
@@ -107,7 +107,7 @@ model-index:
107
  metrics:
108
  - name: Average macro-f1
109
  type: macro-f1
110
- value: 0.00
111
  - task:
112
  type: text-generation
113
  dataset:
@@ -134,7 +134,7 @@ model-index:
134
  metrics:
135
  - name: Average bleu
136
  type: bleu
137
- value: 0.00
138
  - task:
139
  type: text-generation
140
  dataset:
@@ -143,7 +143,7 @@ model-index:
143
  metrics:
144
  - name: Average bleu
145
  type: bleu
146
- value: 0.00
147
  - task:
148
  type: text-generation
149
  dataset:
@@ -170,7 +170,7 @@ model-index:
170
  metrics:
171
  - name: Average exact_match
172
  type: exact_match
173
- value: 0.00
174
  - task:
175
  type: text-generation
176
  dataset:
@@ -179,7 +179,7 @@ model-index:
179
  metrics:
180
  - name: Average f1
181
  type: f1
182
- value: 0.00
183
  - task:
184
  type: text-generation
185
  dataset:
@@ -206,7 +206,7 @@ model-index:
206
  metrics:
207
  - name: Average spearman
208
  type: spearman
209
- value: 0.00
210
  - task:
211
  type: text-generation
212
  dataset:
@@ -215,7 +215,7 @@ model-index:
215
  metrics:
216
  - name: Average pearson
217
  type: pearson
218
- value: 0.00
219
  - task:
220
  type: text-generation
221
  dataset:
@@ -299,7 +299,7 @@ model-index:
299
  value: 64.40
300
  - name: 1-shot
301
  type: accuracy
302
- value: 66.14
303
  - name: 3-shot
304
  type: accuracy
305
  value: 65.75
@@ -350,16 +350,16 @@ model-index:
350
  metrics:
351
  - name: 0-shot
352
  type: macro-f1
353
- value: 0.00
354
  - name: 1-shot
355
  type: macro-f1
356
- value: 0.00
357
  - name: 3-shot
358
  type: macro-f1
359
- value: 0.00
360
  - name: 5-shot
361
  type: macro-f1
362
- value: 0.00
363
  - task:
364
  type: text-generation
365
  dataset:
@@ -368,16 +368,16 @@ model-index:
368
  metrics:
369
  - name: 0-shot
370
  type: macro-f1
371
- value: 0.00
372
  - name: 1-shot
373
  type: macro-f1
374
- value: 0.00
375
  - name: 3-shot
376
  type: macro-f1
377
- value: 0.00
378
  - name: 5-shot
379
  type: macro-f1
380
- value: 0.00
381
  - task:
382
  type: text-generation
383
  dataset:
@@ -386,16 +386,16 @@ model-index:
386
  metrics:
387
  - name: 0-shot
388
  type: bleu
389
- value: 0.00
390
  - name: 1-shot
391
  type: bleu
392
- value: 0.00
393
  - name: 3-shot
394
  type: bleu
395
- value: 0.00
396
  - name: 5-shot
397
  type: bleu
398
- value: 0.00
399
  - task:
400
  type: text-generation
401
  dataset:
@@ -404,16 +404,16 @@ model-index:
404
  metrics:
405
  - name: 0-shot
406
  type: bleu
407
- value: 0.00
408
  - name: 1-shot
409
  type: bleu
410
- value: 0.00
411
  - name: 3-shot
412
  type: bleu
413
- value: 0.00
414
  - name: 5-shot
415
  type: bleu
416
- value: 0.00
417
  - task:
418
  type: text-generation
419
  dataset:
@@ -422,16 +422,16 @@ model-index:
422
  metrics:
423
  - name: 0-shot
424
  type: exact_match
425
- value: 0.00
426
  - name: 1-shot
427
  type: exact_match
428
- value: 0.00
429
  - name: 3-shot
430
  type: exact_match
431
- value: 0.00
432
  - name: 5-shot
433
  type: exact_match
434
- value: 0.00
435
  - task:
436
  type: text-generation
437
  dataset:
@@ -440,16 +440,16 @@ model-index:
440
  metrics:
441
  - name: 0-shot
442
  type: f1
443
- value: 0.00
444
  - name: 1-shot
445
  type: f1
446
- value: 0.00
447
  - name: 3-shot
448
  type: f1
449
- value: 0.00
450
  - name: 5-shot
451
  type: f1
452
- value: 0.00
453
  - task:
454
  type: text-generation
455
  dataset:
@@ -458,13 +458,13 @@ model-index:
458
  metrics:
459
  - name: 0-shot
460
  type: spearman
461
- value: 0.00
462
  - name: 1-shot
463
  type: spearman
464
- value: 0.00
465
  - name: 3-shot
466
  type: spearman
467
- value: 0.00
468
  - task:
469
  type: text-generation
470
  dataset:
@@ -473,13 +473,13 @@ model-index:
473
  metrics:
474
  - name: 0-shot
475
  type: pearson
476
- value: 0.00
477
  - name: 1-shot
478
  type: pearson
479
- value: 0.00
480
  - name: 3-shot
481
  type: pearson
482
- value: 0.00
483
 
484
  ---
485
 
@@ -611,13 +611,13 @@ print(tokenizer.decode(outputs[0]))
611
  <td><center><strong>RO-EN<br>(Bleu)</strong></center>
612
  </tr>
613
  <tr>
614
- <td>Llama-3.1-8B-Instruct</td><td><center><strong>95.74</strong></center></td><td><center>59.49</center></td><td><center><strong>98.57</strong></center></td><td><center>82.41</center></td><td><center>19.01</center></td><td><center><strong>27.77</strong></center></td><td><center><strong>29.02</strong></center></td><td><center>39.80</center></td>
615
  </tr>
616
  <tr>
617
  <td>RoLlama3.1-8b-Instruct-2024-10-09</td><td><center>94.56</center></td><td><center><strong>60.10</strong></center></td><td><center>95.12</center></td><td><center><strong>87.53</strong></center></td><td><center><strong>21.88</strong></center></td><td><center>23.99</center></td><td><center>28.27</center></td><td><center><strong>40.44</strong></center></td>
618
  </tr>
619
  <tr>
620
- <td><em>RoLlama3.1-8b-Instruct-DPO-2024-10-09</em></td><td><center><em>-</em></center></td><td><center><em>-</em></center></td><td><center><em>-</em></center></td><td><center><em>-</em></center></td><td><center><em>-</em></center></td><td><center><em>-</em></center></td><td><center><em>-</em></center></td><td><center><em>-</em></center></td>
621
  </tr>
622
  </tbody>
623
  </table>
@@ -652,10 +652,10 @@ print(tokenizer.decode(outputs[0]))
652
  <td>Llama-3.1-8B-Instruct</td><td><center><strong>44.96</strong></center></td><td><center><strong>64.45</strong></center></td><td><center><strong>69.50</strong></center></td><td><center><strong>84.31</strong></center></td><td><center>72.11</center></td><td><center>71.64</center></td><td><center>84.59</center></td><td><center>84.96</center></td>
653
  </tr>
654
  <tr>
655
- <td>RoLlama3.1-8b-Instruct-2024-10-09</td><td><center>13.59</center></td><td><center>23.56</center></td><td><center>49.41</center></td><td><center>62.93</center></td><td><center><strong>75.89</strong></center></td><td><center><strong>76.00</strong></center></td><td><center><strong>86.86</strong></center></td><td><center><strong>87.05</strong></center></td>
656
  </tr>
657
  <tr>
658
- <td><em>RoLlama3.1-8b-Instruct-DPO-2024-10-09</em></td><td><center><em>-</em></center></td><td><center><em>-</em></center></td><td><center><em>-</em></center></td><td><center><em>-</em></center></td><td><center><em>-</em></center></td><td><center><em>-</em></center></td><td><center><em>-</em></center></td><td><center><em>-</em></center></td>
659
  </tr>
660
  </tbody>
661
  </table>
 
35
  metrics:
36
  - name: Average accuracy
37
  type: accuracy
38
+ value: 52.74
39
  - task:
40
  type: text-generation
41
  dataset:
 
62
  metrics:
63
  - name: Average accuracy
64
  type: accuracy
65
+ value: 65.87
66
  - task:
67
  type: text-generation
68
  dataset:
 
89
  metrics:
90
  - name: Average accuracy
91
  type: accuracy
92
+ value: 47.82
93
  - task:
94
  type: text-generation
95
  dataset:
 
98
  metrics:
99
  - name: Average macro-f1
100
  type: macro-f1
101
+ value: 96.10
102
  - task:
103
  type: text-generation
104
  dataset:
 
107
  metrics:
108
  - name: Average macro-f1
109
  type: macro-f1
110
+ value: 55.37
111
  - task:
112
  type: text-generation
113
  dataset:
 
134
  metrics:
135
  - name: Average bleu
136
  type: bleu
137
+ value: 21.29
138
  - task:
139
  type: text-generation
140
  dataset:
 
143
  metrics:
144
  - name: Average bleu
145
  type: bleu
146
+ value: 21.86
147
  - task:
148
  type: text-generation
149
  dataset:
 
170
  metrics:
171
  - name: Average exact_match
172
  type: exact_match
173
+ value: 21.58
174
  - task:
175
  type: text-generation
176
  dataset:
 
179
  metrics:
180
  - name: Average f1
181
  type: f1
182
+ value: 36.54
183
  - task:
184
  type: text-generation
185
  dataset:
 
206
  metrics:
207
  - name: Average spearman
208
  type: spearman
209
+ value: 78.01
210
  - task:
211
  type: text-generation
212
  dataset:
 
215
  metrics:
216
  - name: Average pearson
217
  type: pearson
218
+ value: 77.98
219
  - task:
220
  type: text-generation
221
  dataset:
 
299
  value: 64.40
300
  - name: 1-shot
301
  type: accuracy
302
+ value: 66.22
303
  - name: 3-shot
304
  type: accuracy
305
  value: 65.75
 
350
  metrics:
351
  - name: 0-shot
352
  type: macro-f1
353
+ value: 93.11
354
  - name: 1-shot
355
  type: macro-f1
356
+ value: 96.06
357
  - name: 3-shot
358
  type: macro-f1
359
+ value: 97.53
360
  - name: 5-shot
361
  type: macro-f1
362
+ value: 97.70
363
  - task:
364
  type: text-generation
365
  dataset:
 
368
  metrics:
369
  - name: 0-shot
370
  type: macro-f1
371
+ value: 65.61
372
  - name: 1-shot
373
  type: macro-f1
374
+ value: 55.73
375
  - name: 3-shot
376
  type: macro-f1
377
+ value: 46.33
378
  - name: 5-shot
379
  type: macro-f1
380
+ value: 53.82
381
  - task:
382
  type: text-generation
383
  dataset:
 
386
  metrics:
387
  - name: 0-shot
388
  type: bleu
389
+ value: 6.89
390
  - name: 1-shot
391
  type: bleu
392
+ value: 26.62
393
  - name: 3-shot
394
  type: bleu
395
+ value: 25.70
396
  - name: 5-shot
397
  type: bleu
398
+ value: 25.94
399
  - task:
400
  type: text-generation
401
  dataset:
 
404
  metrics:
405
  - name: 0-shot
406
  type: bleu
407
+ value: 2.16
408
  - name: 1-shot
409
  type: bleu
410
+ value: 16.65
411
  - name: 3-shot
412
  type: bleu
413
+ value: 33.41
414
  - name: 5-shot
415
  type: bleu
416
+ value: 35.22
417
  - task:
418
  type: text-generation
419
  dataset:
 
422
  metrics:
423
  - name: 0-shot
424
  type: exact_match
425
+ value: 8.99
426
  - name: 1-shot
427
  type: exact_match
428
+ value: 35.88
429
  - name: 3-shot
430
  type: exact_match
431
+ value: 31.26
432
  - name: 5-shot
433
  type: exact_match
434
+ value: 10.17
435
  - task:
436
  type: text-generation
437
  dataset:
 
440
  metrics:
441
  - name: 0-shot
442
  type: f1
443
+ value: 20.00
444
  - name: 1-shot
445
  type: f1
446
+ value: 59.41
447
  - name: 3-shot
448
  type: f1
449
+ value: 48.41
450
  - name: 5-shot
451
  type: f1
452
+ value: 18.33
453
  - task:
454
  type: text-generation
455
  dataset:
 
458
  metrics:
459
  - name: 0-shot
460
  type: spearman
461
+ value: 78.10
462
  - name: 1-shot
463
  type: spearman
464
+ value: 77.81
465
  - name: 3-shot
466
  type: spearman
467
+ value: 78.11
468
  - task:
469
  type: text-generation
470
  dataset:
 
473
  metrics:
474
  - name: 0-shot
475
  type: pearson
476
+ value: 78.30
477
  - name: 1-shot
478
  type: pearson
479
+ value: 77.58
480
  - name: 3-shot
481
  type: pearson
482
+ value: 78.06
483
 
484
  ---
485
 
 
611
  <td><center><strong>RO-EN<br>(Bleu)</strong></center>
612
  </tr>
613
  <tr>
614
+ <td>Llama-3.1-8B-Instruct</td><td><center>95.74</center></td><td><center>59.49</center></td><td><center><strong>98.57</strong></center></td><td><center>82.41</center></td><td><center>19.01</center></td><td><center><strong>27.77</strong></center></td><td><center><strong>29.02</strong></center></td><td><center>39.80</center></td>
615
  </tr>
616
  <tr>
617
  <td>RoLlama3.1-8b-Instruct-2024-10-09</td><td><center>94.56</center></td><td><center><strong>60.10</strong></center></td><td><center>95.12</center></td><td><center><strong>87.53</strong></center></td><td><center><strong>21.88</strong></center></td><td><center>23.99</center></td><td><center>28.27</center></td><td><center><strong>40.44</strong></center></td>
618
  </tr>
619
  <tr>
620
+ <td><em>RoLlama3.1-8b-Instruct-DPO-2024-10-09</em></td><td><center><em><strong>96.10</strong></em></center></td><td><center><em>55.37</em></center></td><td><center><em>-</em></center></td><td><center><em>-</em></center></td><td><center><em>21.29</em></center></td><td><center><em>21.86</em></center></td><td><center><em>-</em></center></td><td><center><em>-</em></center></td>
621
  </tr>
622
  </tbody>
623
  </table>
 
652
  <td>Llama-3.1-8B-Instruct</td><td><center><strong>44.96</strong></center></td><td><center><strong>64.45</strong></center></td><td><center><strong>69.50</strong></center></td><td><center><strong>84.31</strong></center></td><td><center>72.11</center></td><td><center>71.64</center></td><td><center>84.59</center></td><td><center>84.96</center></td>
653
  </tr>
654
  <tr>
655
+ <td>RoLlama3.1-8b-Instruct-2024-10-09</td><td><center>13.59</center></td><td><center>23.56</center></td><td><center>49.41</center></td><td><center>62.93</center></td><td><center>75.89</center></td><td><center>76.00</center></td><td><center><strong>86.86</strong></center></td><td><center><strong>87.05</strong></center></td>
656
  </tr>
657
  <tr>
658
+ <td><em>RoLlama3.1-8b-Instruct-DPO-2024-10-09</em></td><td><center><em>21.58</em></center></td><td><center><em>36.54</em></center></td><td><center><em>-</em></center></td><td><center><em>-</em></center></td><td><center><em><strong>78.01</strong></em></center></td><td><center><em><strong>77.98</strong></em></center></td><td><center><em>-</em></center></td><td><center><em>-</em></center></td>
659
  </tr>
660
  </tbody>
661
  </table>