Update README.md
Browse files
README.md
CHANGED
@@ -83,4 +83,52 @@ Dataset was formatted in ShareGpt format for the purposes of using with Axolotl,
|
|
83 |
- num_epochs: 3
|
84 |
- optimizer: adamw_bnb_8bit
|
85 |
- lr_scheduler: cosine
|
86 |
-
- learning_rate: 0.00025
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
- num_epochs: 3
|
84 |
- optimizer: adamw_bnb_8bit
|
85 |
- lr_scheduler: cosine
|
86 |
+
- learning_rate: 0.00025
|
87 |
+
|
88 |
+
#### Evaluation
|
89 |
+
|
90 |
+
| Groups |Version| Filter |n-shot| Metric | Value | |Stderr|
|
91 |
+
|--------------------|-------|----------------|-----:|-----------|------:|---|-----:|
|
92 |
+
|Open LLM Leaderboard|N/A |none | 5|rouge2_acc | 0.1920|± |0.0176|
|
93 |
+
| | |none | 5|bleu_max |15.2292|± |0.6714|
|
94 |
+
| | |flexible-extract| 5|exact_match| 0.0220|± |0.0066|
|
95 |
+
| - truthfulqa_mc1 | 2|none | 0|acc | 0.2440|± |0.0192|
|
96 |
+
| - truthfulqa_mc2 | 2|none | 0|acc | 0.4430|± |0.0195|
|
97 |
+
| - winogrande | 1|none | 5|acc | 0.5120|± |0.0224|
|
98 |
+
| - arc_challenge | 1|none | 25|acc | 0.1760|± |0.0170|
|
99 |
+
| | |none | 25|acc_norm | 0.2320|± |0.0189|
|
100 |
+
| - gsm8k | 3|strict-match | 5|exact_match| 0.0060|± |0.0035|
|
101 |
+
| | |flexible-extract| 5|exact_match| 0.0220|± |0.0066|
|
102 |
+
| - hellaswag | 1|none | 10|acc | 0.3520|± |0.0214|
|
103 |
+
| | |none | 10|acc_norm | 0.4040|± |0.0220|
|
104 |
+
| | |none | 5|rouge2_diff|-3.3178|± |0.9477|
|
105 |
+
| | |none | 5|rougeL_acc | 0.3860|± |0.0218|
|
106 |
+
| | |none | 5|acc_norm | 0.3180|± |0.0145|
|
107 |
+
| | |none | 5|rouge1_diff|-1.5564|± |1.0223|
|
108 |
+
| | |none | 5|bleu_diff |-0.6500|± |0.6421|
|
109 |
+
| | |none | 5|rouge2_max |16.4873|± |1.0172|
|
110 |
+
| | |none | 5|rougeL_diff|-0.7765|± |1.0034|
|
111 |
+
| | |strict-match | 5|exact_match| 0.0060|± |0.0035|
|
112 |
+
| | |none | 5|bleu_acc | 0.4360|± |0.0222|
|
113 |
+
| | |none | 5|rougeL_max |33.8798|± |0.9367|
|
114 |
+
| | |none | 5|rouge1_max |36.3550|± |0.9462|
|
115 |
+
| | |none | 5|rouge1_acc | 0.3700|± |0.0216|
|
116 |
+
| | |none | 5|acc | 0.2664|± |0.0036|
|
117 |
+
| - mmlu |N/A |none | 0|acc | 0.2533|± |0.0039|
|
118 |
+
| - humanities |N/A |none | 5|acc | 0.2408|± |0.0075|
|
119 |
+
| - other |N/A |none | 5|acc | 0.2443|± |0.0080|
|
120 |
+
| - social_sciences |N/A |none | 5|acc | 0.2538|± |0.0081|
|
121 |
+
| - stem |N/A |none | 5|acc | 0.2740|± |0.0079|
|
122 |
+
| - truthfulqa |N/A |none | 0|rouge2_acc | 0.1920|± |0.0176|
|
123 |
+
| | |none | 0|rougeL_diff|-0.7765|± |1.0034|
|
124 |
+
| | |none | 0|bleu_max |15.2292|± |0.6714|
|
125 |
+
| | |none | 0|rouge2_diff|-3.3178|± |0.9477|
|
126 |
+
| | |none | 0|rougeL_acc | 0.3860|± |0.0218|
|
127 |
+
| | |none | 0|bleu_diff |-0.6500|± |0.6421|
|
128 |
+
| | |none | 0|rouge2_max |16.4873|± |1.0172|
|
129 |
+
| | |none | 0|rouge1_diff|-1.5564|± |1.0223|
|
130 |
+
| | |none | 0|acc | 0.3435|± |0.0137|
|
131 |
+
| | |none | 0|bleu_acc | 0.4360|± |0.0222|
|
132 |
+
| | |none | 0|rougeL_max |33.8798|± |0.9367|
|
133 |
+
| | |none | 0|rouge1_max |36.3550|± |0.9462|
|
134 |
+
| | |none | 0|rouge1_acc | 0.3700|± |0.0216|
|