OpenLLM-Ro
/

RoLlama2-7b-Instruct-2024-05-14

@@ -4,480 +4,6 @@ language:
 - ro
 base_model:
 - OpenLLM-Ro/RoLlama2-7b-Base
-model-index:
-- name: OpenLLM-Ro/RoLlama2-7b-Instruct-2024-05-14
-  results:
-  - task:
-      type: text-generation
-    dataset:
-      name: RoMT-Bench
-      type: RoMT-Bench
-    metrics:
-    - name: Score
-      type: Score
-      value: 3.86
-  - task:
-      type: text-generation
-    dataset:
-      name: RoCulturaBench
-      type: RoCulturaBench
-    metrics:
-    - name: Score
-      type: Score
-      value: 3.77
-  - task:
-      type: text-generation
-    dataset:
-      name: Romanian_Academic_Benchmarks
-      type: Romanian_Academic_Benchmarks
-    metrics:
-    - name: Average accuracy
-      type: accuracy
-      value: 45.71
-  - task:
-      type: text-generation
-    dataset:
-      name: OpenLLM-Ro/ro_arc_challenge
-      type: OpenLLM-Ro/ro_arc_challenge
-    metrics:
-    - name: Average accuracy
-      type: accuracy
-      value: 43.66
-  - task:
-      type: text-generation
-    dataset:
-      name: OpenLLM-Ro/ro_mmlu
-      type: OpenLLM-Ro/ro_mmlu
-    metrics:
-    - name: Average accuracy
-      type: accuracy
-      value: 39.7
-  - task:
-      type: text-generation
-    dataset:
-      name: OpenLLM-Ro/ro_winogrande
-      type: OpenLLM-Ro/ro_winogrande
-    metrics:
-    - name: Average accuracy
-      type: accuracy
-      value: 70.34
-  - task:
-      type: text-generation
-    dataset:
-      name: OpenLLM-Ro/ro_hellaswag
-      type: OpenLLM-Ro/ro_hellaswag
-    metrics:
-    - name: Average accuracy
-      type: accuracy
-      value: 57.36
-  - task:
-      type: text-generation
-    dataset:
-      name: OpenLLM-Ro/ro_gsm8k
-      type: OpenLLM-Ro/ro_gsm8k
-    metrics:
-    - name: Average accuracy
-      type: accuracy
-      value: 18.78
-  - task:
-      type: text-generation
-    dataset:
-      name: OpenLLM-Ro/ro_truthfulqa
-      type: OpenLLM-Ro/ro_truthfulqa
-    metrics:
-    - name: Average accuracy
-      type: accuracy
-      value: 44.44
-  - task:
-      type: text-generation
-    dataset:
-      name: LaRoSeDa_binary
-      type: LaRoSeDa_binary
-    metrics:
-    - name: Average macro-f1
-      type: macro-f1
-      value: 97.48
-  - task:
-      type: text-generation
-    dataset:
-      name: LaRoSeDa_multiclass
-      type: LaRoSeDa_multiclass
-    metrics:
-    - name: Average macro-f1
-      type: macro-f1
-      value: 65.26
-  - task:
-      type: text-generation
-    dataset:
-      name: LaRoSeDa_binary_finetuned
-      type: LaRoSeDa_binary_finetuned
-    metrics:
-    - name: Average macro-f1
-      type: macro-f1
-      value: 98.83
-  - task:
-      type: text-generation
-    dataset:
-      name: LaRoSeDa_multiclass_finetuned
-      type: LaRoSeDa_multiclass_finetuned
-    metrics:
-    - name: Average macro-f1
-      type: macro-f1
-      value: 87.28
-  - task:
-      type: text-generation
-    dataset:
-      name: WMT_EN-RO
-      type: WMT_EN-RO
-    metrics:
-    - name: Average bleu
-      type: bleu
-      value: 27.38
-  - task:
-      type: text-generation
-    dataset:
-      name: WMT_RO-EN
-      type: WMT_RO-EN
-    metrics:
-    - name: Average bleu
-      type: bleu
-      value: 10.32
-  - task:
-      type: text-generation
-    dataset:
-      name: WMT_EN-RO_finetuned
-      type: WMT_EN-RO_finetuned
-    metrics:
-    - name: Average bleu
-      type: bleu
-      value: 27.59
-  - task:
-      type: text-generation
-    dataset:
-      name: WMT_RO-EN_finetuned
-      type: WMT_RO-EN_finetuned
-    metrics:
-    - name: Average bleu
-      type: bleu
-      value: 40.13
-  - task:
-      type: text-generation
-    dataset:
-      name: XQuAD
-      type: XQuAD
-    metrics:
-    - name: Average exact_match
-      type: exact_match
-      value: 44.52
-  - task:
-      type: text-generation
-    dataset:
-      name: XQuAD
-      type: XQuAD
-    metrics:
-    - name: Average f1
-      type: f1
-      value: 64.75
-  - task:
-      type: text-generation
-    dataset:
-      name: XQuAD_finetuned
-      type: XQuAD_finetuned
-    metrics:
-    - name: Average exact_match
-      type: exact_match
-      value: 54.96
-  - task:
-      type: text-generation
-    dataset:
-      name: XQuAD_finetuned
-      type: XQuAD_finetuned
-    metrics:
-    - name: Average f1
-      type: f1
-      value: 70.2
-  - task:
-      type: text-generation
-    dataset:
-      name: STS
-      type: STS
-    metrics:
-    - name: Average spearman
-      type: spearman
-      value: 65.5
-  - task:
-      type: text-generation
-    dataset:
-      name: STS
-      type: STS
-    metrics:
-    - name: Average pearson
-      type: pearson
-      value: 67.79
-  - task:
-      type: text-generation
-    dataset:
-      name: STS_finetuned
-      type: STS_finetuned
-    metrics:
-    - name: Average spearman
-      type: spearman
-      value: 84.44
-  - task:
-      type: text-generation
-    dataset:
-      name: STS_finetuned
-      type: STS_finetuned
-    metrics:
-    - name: Average pearson
-      type: pearson
-      value: 84.76
-  - task:
-      type: text-generation
-    dataset:
-      name: RoMT-Bench
-      type: RoMT-Bench
-    metrics:
-    - name: First turn
-      type: Score
-      value: 4.67
-    - name: Second turn
-      type: Score
-      value: 3.04
-  - task:
-      type: text-generation
-    dataset:
-      name: OpenLLM-Ro/ro_arc_challenge
-      type: OpenLLM-Ro/ro_arc_challenge
-    metrics:
-    - name: 0-shot
-      type: accuracy
-      value: 41.73
-    - name: 1-shot
-      type: accuracy
-      value: 42.16
-    - name: 3-shot
-      type: accuracy
-      value: 43.53
-    - name: 5-shot
-      type: accuracy
-      value: 44.9
-    - name: 10-shot
-      type: accuracy
-      value: 44.99
-    - name: 25-shot
-      type: accuracy
-      value: 44.64
-  - task:
-      type: text-generation
-    dataset:
-      name: OpenLLM-Ro/ro_mmlu
-      type: OpenLLM-Ro/ro_mmlu
-    metrics:
-    - name: 0-shot
-      type: accuracy
-      value: 38.54
-    - name: 1-shot
-      type: accuracy
-      value: 39.36
-    - name: 3-shot
-      type: accuracy
-      value: 40.82
-    - name: 5-shot
-      type: accuracy
-      value: 40.07
-  - task:
-      type: text-generation
-    dataset:
-      name: OpenLLM-Ro/ro_winogrande
-      type: OpenLLM-Ro/ro_winogrande
-    metrics:
-    - name: 0-shot
-      type: accuracy
-      value: 72.61
-    - name: 1-shot
-      type: accuracy
-      value: 69.93
-    - name: 3-shot
-      type: accuracy
-      value: 70.4
-    - name: 5-shot
-      type: accuracy
-      value: 68.43
-  - task:
-      type: text-generation
-    dataset:
-      name: OpenLLM-Ro/ro_hellaswag
-      type: OpenLLM-Ro/ro_hellaswag
-    metrics:
-    - name: 0-shot
-      type: accuracy
-      value: 56.9
-    - name: 1-shot
-      type: accuracy
-      value: 57.07
-    - name: 3-shot
-      type: accuracy
-      value: 57.56
-    - name: 5-shot
-      type: accuracy
-      value: 57.35
-    - name: 10-shot
-      type: accuracy
-      value: 57.93
-  - task:
-      type: text-generation
-    dataset:
-      name: OpenLLM-Ro/ro_gsm8k
-      type: OpenLLM-Ro/ro_gsm8k
-    metrics:
-    - name: 0-shot
-      type: accuracy
-      value: 11.22
-    - name: 1-shot
-      type: accuracy
-      value: 21.38
-    - name: 3-shot
-      type: accuracy
-      value: 23.73
-  - task:
-      type: text-generation
-    dataset:
-      name: LaRoSeDa_binary
-      type: LaRoSeDa_binary
-    metrics:
-    - name: 0-shot
-      type: macro-f1
-      value: 97.67
-    - name: 1-shot
-      type: macro-f1
-      value: 96.77
-    - name: 3-shot
-      type: macro-f1
-      value: 97.6
-    - name: 5-shot
-      type: macro-f1
-      value: 97.87
-  - task:
-      type: text-generation
-    dataset:
-      name: LaRoSeDa_multiclass
-      type: LaRoSeDa_multiclass
-    metrics:
-    - name: 0-shot
-      type: macro-f1
-      value: 61.82
-    - name: 1-shot
-      type: macro-f1
-      value: 58.84
-    - name: 3-shot
-      type: macro-f1
-      value: 68.67
-    - name: 5-shot
-      type: macro-f1
-      value: 71.71
-  - task:
-      type: text-generation
-    dataset:
-      name: WMT_EN-RO
-      type: WMT_EN-RO
-    metrics:
-    - name: 0-shot
-      type: bleu
-      value: 19.71
-    - name: 1-shot
-      type: bleu
-      value: 29.62
-    - name: 3-shot
-      type: bleu
-      value: 30.11
-    - name: 5-shot
-      type: bleu
-      value: 30.1
-  - task:
-      type: text-generation
-    dataset:
-      name: WMT_RO-EN
-      type: WMT_RO-EN
-    metrics:
-    - name: 0-shot
-      type: bleu
-      value: 1.86
-    - name: 1-shot
-      type: bleu
-      value: 4.41
-    - name: 3-shot
-      type: bleu
-      value: 14.95
-    - name: 5-shot
-      type: bleu
-      value: 20.07
-  - task:
-      type: text-generation
-    dataset:
-      name: XQuAD_EM
-      type: XQuAD_EM
-    metrics:
-    - name: 0-shot
-      type: exact_match
-      value: 34.87
-    - name: 1-shot
-      type: exact_match
-      value: 44.96
-    - name: 3-shot
-      type: exact_match
-      value: 48.4
-    - name: 5-shot
-      type: exact_match
-      value: 49.83
-  - task:
-      type: text-generation
-    dataset:
-      name: XQuAD_F1
-      type: XQuAD_F1
-    metrics:
-    - name: 0-shot
-      type: f1
-      value: 58.07
-    - name: 1-shot
-      type: f1
-      value: 63.93
-    - name: 3-shot
-      type: f1
-      value: 67.89
-    - name: 5-shot
-      type: f1
-      value: 69.1
-  - task:
-      type: text-generation
-    dataset:
-      name: STS
-      type: STS
-    metrics:
-    - name: 0-shot
-      type: spearman
-      value: 61.14
-    - name: 1-shot
-      type: spearman
-      value: 66.91
-    - name: 3-shot
-      type: spearman
-      value: 68.46
-  - task:
-      type: text-generation
-    dataset:
-      name: STS
-      type: STS
-    metrics:
-    - name: 0-shot
-      type: pearson
-      value: 61.88
-    - name: 1-shot
-      type: pearson
-      value: 70.04
-    - name: 3-shot
-      type: pearson
-      value: 71.46
 datasets:
 - OpenLLM-Ro/ro_sft_alpaca
 - OpenLLM-Ro/ro_sft_alpaca_gpt4
@@ -486,6 +12,481 @@ datasets:
 - OpenLLM-Ro/ro_sft_norobots
 - OpenLLM-Ro/ro_sft_orca
 - OpenLLM-Ro/ro_sft_camel
 ---
 # Model Card for Model ID

 - ro
 base_model:
 - OpenLLM-Ro/RoLlama2-7b-Base
 datasets:
 - OpenLLM-Ro/ro_sft_alpaca
 - OpenLLM-Ro/ro_sft_alpaca_gpt4
 - OpenLLM-Ro/ro_sft_norobots
 - OpenLLM-Ro/ro_sft_orca
 - OpenLLM-Ro/ro_sft_camel
+model-index:
+    - name: OpenLLM-Ro/RoLlama2-7b-Instruct-2024-05-14
+      results:
+        - task:
+            type: text-generation
+          dataset:
+            name: RoMT-Bench
+            type: RoMT-Bench
+          metrics:
+            - name: Score
+              type: Score
+              value: 3.86
+        - task:
+            type: text-generation
+          dataset:
+            name: RoCulturaBench
+            type: RoCulturaBench
+          metrics:
+            - name: Score
+              type: Score
+              value: 3.77
+        - task:
+            type: text-generation
+          dataset:
+            name: Romanian_Academic_Benchmarks
+            type: Romanian_Academic_Benchmarks
+          metrics:
+            - name: Average accuracy
+              type: accuracy
+              value: 45.71
+        - task:
+            type: text-generation
+          dataset:
+            name: OpenLLM-Ro/ro_arc_challenge
+            type: OpenLLM-Ro/ro_arc_challenge
+          metrics:
+            - name: Average accuracy
+              type: accuracy
+              value: 43.66
+        - task:
+            type: text-generation
+          dataset:
+            name: OpenLLM-Ro/ro_mmlu
+            type: OpenLLM-Ro/ro_mmlu
+          metrics:
+            - name: Average accuracy
+              type: accuracy
+              value: 39.70
+        - task:
+            type: text-generation
+          dataset:
+            name: OpenLLM-Ro/ro_winogrande
+            type: OpenLLM-Ro/ro_winogrande
+          metrics:
+            - name: Average accuracy
+              type: accuracy
+              value: 70.34
+        - task:
+            type: text-generation
+          dataset:
+            name: OpenLLM-Ro/ro_hellaswag
+            type: OpenLLM-Ro/ro_hellaswag
+          metrics:
+            - name: Average accuracy
+              type: accuracy
+              value: 57.36
+        - task:
+            type: text-generation
+          dataset:
+            name: OpenLLM-Ro/ro_gsm8k
+            type: OpenLLM-Ro/ro_gsm8k
+          metrics:
+            - name: Average accuracy
+              type: accuracy
+              value: 18.78
+        - task:
+            type: text-generation
+          dataset:
+            name: OpenLLM-Ro/ro_truthfulqa
+            type: OpenLLM-Ro/ro_truthfulqa
+          metrics:
+            - name: Average accuracy
+              type: accuracy
+              value: 44.44
+        - task:
+            type: text-generation
+          dataset:
+            name: LaRoSeDa_binary
+            type: LaRoSeDa_binary
+          metrics:
+            - name: Average macro-f1
+              type: macro-f1
+              value: 97.48
+        - task:
+            type: text-generation
+          dataset:
+            name: LaRoSeDa_multiclass
+            type: LaRoSeDa_multiclass
+          metrics:
+            - name: Average macro-f1
+              type: macro-f1
+              value: 65.26
+        - task:
+            type: text-generation
+          dataset:
+            name: LaRoSeDa_binary_finetuned
+            type: LaRoSeDa_binary_finetuned
+          metrics:
+            - name: Average macro-f1
+              type: macro-f1
+              value: 98.83
+        - task:
+            type: text-generation
+          dataset:
+            name: LaRoSeDa_multiclass_finetuned
+            type: LaRoSeDa_multiclass_finetuned
+          metrics:
+            - name: Average macro-f1
+              type: macro-f1
+              value: 87.28
+        - task:
+            type: text-generation
+          dataset:
+            name: WMT_EN-RO
+            type: WMT_EN-RO
+          metrics:
+            - name: Average bleu
+              type: bleu
+              value: 27.38
+        - task:
+            type: text-generation
+          dataset:
+            name: WMT_RO-EN
+            type: WMT_RO-EN
+          metrics:
+            - name: Average bleu
+              type: bleu
+              value: 10.32
+        - task:
+            type: text-generation
+          dataset:
+            name: WMT_EN-RO_finetuned
+            type: WMT_EN-RO_finetuned
+          metrics:
+            - name: Average bleu
+              type: bleu
+              value: 27.59
+        - task:
+            type: text-generation
+          dataset:
+            name: WMT_RO-EN_finetuned
+            type: WMT_RO-EN_finetuned
+          metrics:
+            - name: Average bleu
+              type: bleu
+              value: 40.13
+        - task:
+            type: text-generation
+          dataset:
+            name: XQuAD
+            type: XQuAD
+          metrics:
+            - name: Average exact_match
+              type: exact_match
+              value: 44.52
+        - task:
+            type: text-generation
+          dataset:
+            name: XQuAD
+            type: XQuAD
+          metrics:
+            - name: Average f1
+              type: f1
+              value: 64.75
+        - task:
+            type: text-generation
+          dataset:
+            name: XQuAD_finetuned
+            type: XQuAD_finetuned
+          metrics:
+            - name: Average exact_match
+              type: exact_match
+              value: 54.96
+        - task:
+            type: text-generation
+          dataset:
+            name: XQuAD_finetuned
+            type: XQuAD_finetuned
+          metrics:
+            - name: Average f1
+              type: f1
+              value: 70.20
+        - task:
+            type: text-generation
+          dataset:
+            name: STS
+            type: STS
+          metrics:
+            - name: Average spearman
+              type: spearman
+              value: 65.50
+        - task:
+            type: text-generation
+          dataset:
+            name: STS
+            type: STS
+          metrics:
+            - name: Average pearson
+              type: pearson
+              value: 67.79
+        - task:
+            type: text-generation
+          dataset:
+            name: STS_finetuned
+            type: STS_finetuned
+          metrics:
+            - name: Average spearman
+              type: spearman
+              value: 84.44
+        - task:
+            type: text-generation
+          dataset:
+            name: STS_finetuned
+            type: STS_finetuned
+          metrics:
+            - name: Average pearson
+              type: pearson
+              value: 84.76
+        - task:
+            type: text-generation
+          dataset:
+            name: RoMT-Bench
+            type: RoMT-Bench
+          metrics:
+            - name: First turn
+              type: Score
+              value: 4.67
+            - name: Second turn
+              type: Score
+              value: 3.04
+        - task:
+            type: text-generation
+          dataset:
+            name: OpenLLM-Ro/ro_arc_challenge
+            type: OpenLLM-Ro/ro_arc_challenge
+          metrics:
+            - name: 0-shot
+              type: accuracy
+              value: 41.73
+            - name: 1-shot
+              type: accuracy
+              value: 42.16
+            - name: 3-shot
+              type: accuracy
+              value: 43.53
+            - name: 5-shot
+              type: accuracy
+              value: 44.90
+            - name: 10-shot
+              type: accuracy
+              value: 44.99
+            - name: 25-shot
+              type: accuracy
+              value: 44.64
+        - task:
+            type: text-generation
+          dataset:
+            name: OpenLLM-Ro/ro_mmlu
+            type: OpenLLM-Ro/ro_mmlu
+          metrics:
+            - name: 0-shot
+              type: accuracy
+              value: 38.54
+            - name: 1-shot
+              type: accuracy
+              value: 39.36
+            - name: 3-shot
+              type: accuracy
+              value: 40.82
+            - name: 5-shot
+              type: accuracy
+              value: 40.07
+        - task:
+            type: text-generation
+          dataset:
+            name: OpenLLM-Ro/ro_winogrande
+            type: OpenLLM-Ro/ro_winogrande
+          metrics:
+            - name: 0-shot
+              type: accuracy
+              value: 72.61
+            - name: 1-shot
+              type: accuracy
+              value: 69.93
+            - name: 3-shot
+              type: accuracy
+              value: 70.40
+            - name: 5-shot
+              type: accuracy
+              value: 68.43
+        - task:
+            type: text-generation
+          dataset:
+            name: OpenLLM-Ro/ro_hellaswag
+            type: OpenLLM-Ro/ro_hellaswag
+          metrics:
+            - name: 0-shot
+              type: accuracy
+              value: 56.90
+            - name: 1-shot
+              type: accuracy
+              value: 57.07
+            - name: 3-shot
+              type: accuracy
+              value: 57.56
+            - name: 5-shot
+              type: accuracy
+              value: 57.35
+            - name: 10-shot
+              type: accuracy
+              value: 57.93
+        - task:
+            type: text-generation
+          dataset:
+            name: OpenLLM-Ro/ro_gsm8k
+            type: OpenLLM-Ro/ro_gsm8k
+          metrics:
+            - name: 0-shot
+              type: accuracy
+              value: 11.22
+            - name: 1-shot
+              type: accuracy
+              value: 21.38
+            - name: 3-shot
+              type: accuracy
+              value: 23.73
+        - task:
+            type: text-generation
+          dataset:
+            name: LaRoSeDa_binary
+            type: LaRoSeDa_binary
+          metrics:
+            - name: 0-shot
+              type: macro-f1
+              value: 97.67
+            - name: 1-shot
+              type: macro-f1
+              value: 96.77
+            - name: 3-shot
+              type: macro-f1
+              value: 97.60
+            - name: 5-shot
+              type: macro-f1
+              value: 97.87
+        - task:
+            type: text-generation
+          dataset:
+            name: LaRoSeDa_multiclass
+            type: LaRoSeDa_multiclass
+          metrics:
+            - name: 0-shot
+              type: macro-f1
+              value: 61.82
+            - name: 1-shot
+              type: macro-f1
+              value: 58.84
+            - name: 3-shot
+              type: macro-f1
+              value: 68.67
+            - name: 5-shot
+              type: macro-f1
+              value: 71.71
+        - task:
+            type: text-generation
+          dataset:
+            name: WMT_EN-RO
+            type: WMT_EN-RO
+          metrics:
+            - name: 0-shot
+              type: bleu
+              value: 19.71
+            - name: 1-shot
+              type: bleu
+              value: 29.62
+            - name: 3-shot
+              type: bleu
+              value: 30.11
+            - name: 5-shot
+              type: bleu
+              value: 30.10
+        - task:
+            type: text-generation
+          dataset:
+            name: WMT_RO-EN
+            type: WMT_RO-EN
+          metrics:
+            - name: 0-shot
+              type: bleu
+              value: 1.86
+            - name: 1-shot
+              type: bleu
+              value: 4.41
+            - name: 3-shot
+              type: bleu
+              value: 14.95
+            - name: 5-shot
+              type: bleu
+              value: 20.07
+        - task:
+            type: text-generation
+          dataset:
+            name: XQuAD_EM
+            type: XQuAD_EM
+          metrics:
+            - name: 0-shot
+              type: exact_match
+              value: 34.87
+            - name: 1-shot
+              type: exact_match
+              value: 44.96
+            - name: 3-shot
+              type: exact_match
+              value: 48.40
+            - name: 5-shot
+              type: exact_match
+              value: 49.83
+        - task:
+            type: text-generation
+          dataset:
+            name: XQuAD_F1
+            type: XQuAD_F1
+          metrics:
+            - name: 0-shot
+              type: f1
+              value: 58.07
+            - name: 1-shot
+              type: f1
+              value: 63.93
+            - name: 3-shot
+              type: f1
+              value: 67.89
+            - name: 5-shot
+              type: f1
+              value: 69.10
+        - task:
+            type: text-generation
+          dataset:
+            name: STS_Spearman
+            type: STS_Spearman
+          metrics:
+            - name: 1-shot
+              type: spearman
+              value: 61.14
+            - name: 3-shot
+              type: spearman
+              value: 66.91
+            - name: 5-shot
+              type: spearman
+              value: 68.46
+        - task:
+            type: text-generation
+          dataset:
+            name: STS_Pearson
+            type: STS_Pearson
+          metrics:
+            - name: 1-shot
+              type: pearson
+              value: 61.88
+            - name: 3-shot
+              type: pearson
+              value: 70.04
+            - name: 5-shot
+              type: pearson
+              value: 71.46
 ---
 # Model Card for Model ID