satvik-dixit
commited on
Commit
•
2879958
1
Parent(s):
5c1ff4d
Uploaded Curriculum
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- Curriculum_eval_results_Meta_Llama_3_8B_Vanilla_test.json +0 -0
- Curriculum_eval_results_math_llama3_instruct_MWP2K_Vanilla_test.json +0 -0
- Curriculum_eval_results_math_llama3_instruct_MWP2K_Vanilla_val.json +0 -0
- Curriculum_eval_results_math_llama3_instruct_MWP4K_Vanilla_test.json +0 -0
- Curriculum_eval_results_math_llama3_instruct_MWP4K_Vanilla_val.json +0 -0
- Curriculum_eval_results_math_llama_3_LORA_Arithmetic_10k_Vanilla_test.json +0 -0
- Curriculum_eval_results_math_llama_3_LORA_Arithmetic_10k_Vanilla_val.json +0 -0
- Curriculum_eval_results_math_llama_3_LORA_Arithmetic_2k_Vanilla_test.json +0 -0
- Curriculum_eval_results_math_llama_3_LORA_Arithmetic_2k_Vanilla_val.json +0 -0
- Curriculum_eval_results_math_llama_3_LORA_Arithmetic_4k_Vanilla_test.json +0 -0
- Curriculum_eval_results_math_llama_3_LORA_Arithmetic_4k_Vanilla_val.json +0 -0
- Curriculum_eval_results_math_llama_3_LORA_Arithmetic_6k_Vanilla_test.json +0 -0
- Curriculum_eval_results_math_llama_3_LORA_Arithmetic_6k_Vanilla_val.json +0 -0
- Curriculum_eval_results_math_llama_3_LORA_Arithmetic_8k_MWPMix_10k_Vanilla_test.json +0 -0
- Curriculum_eval_results_math_llama_3_LORA_Arithmetic_8k_MWPMix_10k_Vanilla_val.json +0 -0
- Curriculum_eval_results_math_llama_3_LORA_Arithmetic_8k_MWPMix_6k_Vanilla_test.json +0 -0
- Curriculum_eval_results_math_llama_3_LORA_Arithmetic_8k_MWPMix_6k_Vanilla_val.json +0 -0
- Curriculum_eval_results_math_llama_3_LORA_Arithmetic_8k_MWPMix_8k_Vanilla_test.json +0 -0
- Curriculum_eval_results_math_llama_3_LORA_Arithmetic_8k_MWPMix_8k_Vanilla_val.json +0 -0
- Curriculum_eval_results_math_llama_3_LORA_Arithmetic_8k_Vanilla_test.json +0 -0
- Curriculum_eval_results_math_llama_3_LORA_Arithmetic_8k_Vanilla_val.json +0 -0
- Curriculum_eval_results_math_llama_3_instruct_model_arith_10k_Vanilla_test.json +0 -0
- Curriculum_eval_results_math_llama_3_instruct_model_arith_10k_Vanilla_val.json +0 -0
- Curriculum_eval_results_math_llama_3_instruct_model_arith_2k_Vanilla_test.json +0 -0
- Curriculum_eval_results_math_llama_3_instruct_model_arith_2k_Vanilla_val.json +0 -0
- Curriculum_eval_results_math_llama_3_instruct_model_arith_4k_Vanilla_test.json +0 -0
- Curriculum_eval_results_math_llama_3_instruct_model_arith_4k_Vanilla_val.json +0 -0
- Curriculum_eval_results_math_llama_3_instruct_model_arith_6k_Vanilla_test.json +0 -0
- Curriculum_eval_results_math_llama_3_instruct_model_arith_6k_Vanilla_val.json +0 -0
- Curriculum_eval_results_math_llama_3_instruct_model_arith_8k_Vanilla_test.json +0 -0
- Curriculum_eval_results_math_llama_3_instruct_model_arith_8k_Vanilla_val.json +0 -0
- Curriculum_math_llama3_instruct_MWP2K_Vanilla_test_metrics.json +16 -0
- Curriculum_math_llama3_instruct_MWP2K_Vanilla_val_metrics.json +16 -0
- Curriculum_math_llama3_instruct_MWP4K_Vanilla_test_metrics.json +16 -0
- Curriculum_math_llama3_instruct_MWP4K_Vanilla_val_metrics.json +16 -0
- Curriculum_math_llama_3_LORA_Arithmetic_10k_Vanilla_test_metrics.json +16 -0
- Curriculum_math_llama_3_LORA_Arithmetic_10k_Vanilla_val_metrics.json +16 -0
- Curriculum_math_llama_3_LORA_Arithmetic_2k_Vanilla_test_metrics.json +16 -0
- Curriculum_math_llama_3_LORA_Arithmetic_2k_Vanilla_val_metrics.json +16 -0
- Curriculum_math_llama_3_LORA_Arithmetic_4k_Vanilla_test_metrics.json +16 -0
- Curriculum_math_llama_3_LORA_Arithmetic_4k_Vanilla_val_metrics.json +16 -0
- Curriculum_math_llama_3_LORA_Arithmetic_6k_Vanilla_test_metrics.json +16 -0
- Curriculum_math_llama_3_LORA_Arithmetic_6k_Vanilla_val_metrics.json +16 -0
- Curriculum_math_llama_3_LORA_Arithmetic_8k_MWPMix_10k_Vanilla_test_metrics.json +16 -0
- Curriculum_math_llama_3_LORA_Arithmetic_8k_MWPMix_10k_Vanilla_val_metrics.json +16 -0
- Curriculum_math_llama_3_LORA_Arithmetic_8k_MWPMix_6k_Vanilla_test_metrics.json +16 -0
- Curriculum_math_llama_3_LORA_Arithmetic_8k_MWPMix_6k_Vanilla_val_metrics.json +16 -0
- Curriculum_math_llama_3_LORA_Arithmetic_8k_MWPMix_8k_Vanilla_test_metrics.json +16 -0
- Curriculum_math_llama_3_LORA_Arithmetic_8k_MWPMix_8k_Vanilla_val_metrics.json +16 -0
- Curriculum_math_llama_3_LORA_Arithmetic_8k_Vanilla_test_metrics.json +16 -0
Curriculum_eval_results_Meta_Llama_3_8B_Vanilla_test.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Curriculum_eval_results_math_llama3_instruct_MWP2K_Vanilla_test.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Curriculum_eval_results_math_llama3_instruct_MWP2K_Vanilla_val.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Curriculum_eval_results_math_llama3_instruct_MWP4K_Vanilla_test.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Curriculum_eval_results_math_llama3_instruct_MWP4K_Vanilla_val.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Curriculum_eval_results_math_llama_3_LORA_Arithmetic_10k_Vanilla_test.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Curriculum_eval_results_math_llama_3_LORA_Arithmetic_10k_Vanilla_val.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Curriculum_eval_results_math_llama_3_LORA_Arithmetic_2k_Vanilla_test.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Curriculum_eval_results_math_llama_3_LORA_Arithmetic_2k_Vanilla_val.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Curriculum_eval_results_math_llama_3_LORA_Arithmetic_4k_Vanilla_test.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Curriculum_eval_results_math_llama_3_LORA_Arithmetic_4k_Vanilla_val.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Curriculum_eval_results_math_llama_3_LORA_Arithmetic_6k_Vanilla_test.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Curriculum_eval_results_math_llama_3_LORA_Arithmetic_6k_Vanilla_val.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Curriculum_eval_results_math_llama_3_LORA_Arithmetic_8k_MWPMix_10k_Vanilla_test.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Curriculum_eval_results_math_llama_3_LORA_Arithmetic_8k_MWPMix_10k_Vanilla_val.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Curriculum_eval_results_math_llama_3_LORA_Arithmetic_8k_MWPMix_6k_Vanilla_test.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Curriculum_eval_results_math_llama_3_LORA_Arithmetic_8k_MWPMix_6k_Vanilla_val.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Curriculum_eval_results_math_llama_3_LORA_Arithmetic_8k_MWPMix_8k_Vanilla_test.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Curriculum_eval_results_math_llama_3_LORA_Arithmetic_8k_MWPMix_8k_Vanilla_val.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Curriculum_eval_results_math_llama_3_LORA_Arithmetic_8k_Vanilla_test.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Curriculum_eval_results_math_llama_3_LORA_Arithmetic_8k_Vanilla_val.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Curriculum_eval_results_math_llama_3_instruct_model_arith_10k_Vanilla_test.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Curriculum_eval_results_math_llama_3_instruct_model_arith_10k_Vanilla_val.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Curriculum_eval_results_math_llama_3_instruct_model_arith_2k_Vanilla_test.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Curriculum_eval_results_math_llama_3_instruct_model_arith_2k_Vanilla_val.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Curriculum_eval_results_math_llama_3_instruct_model_arith_4k_Vanilla_test.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Curriculum_eval_results_math_llama_3_instruct_model_arith_4k_Vanilla_val.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Curriculum_eval_results_math_llama_3_instruct_model_arith_6k_Vanilla_test.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Curriculum_eval_results_math_llama_3_instruct_model_arith_6k_Vanilla_val.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Curriculum_eval_results_math_llama_3_instruct_model_arith_8k_Vanilla_test.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Curriculum_eval_results_math_llama_3_instruct_model_arith_8k_Vanilla_val.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Curriculum_math_llama3_instruct_MWP2K_Vanilla_test_metrics.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"total_count": 2160,
|
3 |
+
"none_count": 0,
|
4 |
+
"match_count": 204,
|
5 |
+
"wrong_count": 1956,
|
6 |
+
"acc": 0.09444444444444444,
|
7 |
+
"avg_error": 5.616682130077772e+189,
|
8 |
+
"var_error": Infinity,
|
9 |
+
"mse": Infinity,
|
10 |
+
"vse": Infinity,
|
11 |
+
"mpe": 4.618319750152281e+171,
|
12 |
+
"vpe": Infinity,
|
13 |
+
"pass_at_1": null,
|
14 |
+
"ttr": 0.012804913378527145,
|
15 |
+
"entropy": 7.028671490219006
|
16 |
+
}
|
Curriculum_math_llama3_instruct_MWP2K_Vanilla_val_metrics.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"total_count": 2160,
|
3 |
+
"none_count": 1,
|
4 |
+
"match_count": 652,
|
5 |
+
"wrong_count": 1507,
|
6 |
+
"acc": 0.30185185185185187,
|
7 |
+
"avg_error": 5.146415521589214e+187,
|
8 |
+
"var_error": Infinity,
|
9 |
+
"mse": 8.326508267078813e+304,
|
10 |
+
"vse": Infinity,
|
11 |
+
"mpe": 5.1464155215892134e+190,
|
12 |
+
"vpe": Infinity,
|
13 |
+
"pass_at_1": null,
|
14 |
+
"ttr": 0.008738425925925926,
|
15 |
+
"entropy": 5.190855468404559
|
16 |
+
}
|
Curriculum_math_llama3_instruct_MWP4K_Vanilla_test_metrics.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"total_count": 2160,
|
3 |
+
"none_count": 0,
|
4 |
+
"match_count": 192,
|
5 |
+
"wrong_count": 1968,
|
6 |
+
"acc": 0.08888888888888889,
|
7 |
+
"avg_error": 4.071264763292613e+189,
|
8 |
+
"var_error": Infinity,
|
9 |
+
"mse": Infinity,
|
10 |
+
"vse": Infinity,
|
11 |
+
"mpe": 4.689228636126781e+172,
|
12 |
+
"vpe": Infinity,
|
13 |
+
"pass_at_1": null,
|
14 |
+
"ttr": 0.01227282600825429,
|
15 |
+
"entropy": 6.90205202173977
|
16 |
+
}
|
Curriculum_math_llama3_instruct_MWP4K_Vanilla_val_metrics.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"total_count": 2160,
|
3 |
+
"none_count": 0,
|
4 |
+
"match_count": 627,
|
5 |
+
"wrong_count": 1533,
|
6 |
+
"acc": 0.2902777777777778,
|
7 |
+
"avg_error": 1.2745884773662552e+92,
|
8 |
+
"var_error": 3.507459123344596e+187,
|
9 |
+
"mse": 3.50908369913123e+187,
|
10 |
+
"vse": Infinity,
|
11 |
+
"mpe": -5.982995116305566e+79,
|
12 |
+
"vpe": 7.728406178278855e+162,
|
13 |
+
"pass_at_1": null,
|
14 |
+
"ttr": 0.00861545138888889,
|
15 |
+
"entropy": 5.384155831142046
|
16 |
+
}
|
Curriculum_math_llama_3_LORA_Arithmetic_10k_Vanilla_test_metrics.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"total_count": 2160,
|
3 |
+
"none_count": 0,
|
4 |
+
"match_count": 396,
|
5 |
+
"wrong_count": 1764,
|
6 |
+
"acc": 0.18333333333333332,
|
7 |
+
"avg_error": 1.4032777777777778e+189,
|
8 |
+
"var_error": Infinity,
|
9 |
+
"mse": Infinity,
|
10 |
+
"vse": Infinity,
|
11 |
+
"mpe": 9.915285070575362e+170,
|
12 |
+
"vpe": Infinity,
|
13 |
+
"pass_at_1": null,
|
14 |
+
"ttr": 0.10221021136955892,
|
15 |
+
"entropy": 6.4392100361924545
|
16 |
+
}
|
Curriculum_math_llama_3_LORA_Arithmetic_10k_Vanilla_val_metrics.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"total_count": 2160,
|
3 |
+
"none_count": 0,
|
4 |
+
"match_count": 743,
|
5 |
+
"wrong_count": 1417,
|
6 |
+
"acc": 0.3439814814814815,
|
7 |
+
"avg_error": -140973754299483.12,
|
8 |
+
"var_error": 4.291224315746973e+31,
|
9 |
+
"mse": 4.293211675687104e+31,
|
10 |
+
"vse": 3.979396756529488e+66,
|
11 |
+
"mpe": 6024.610757526451,
|
12 |
+
"vpe": 43895294489.4146,
|
13 |
+
"pass_at_1": null,
|
14 |
+
"ttr": 0.14054153619451096,
|
15 |
+
"entropy": 5.416380862616709
|
16 |
+
}
|
Curriculum_math_llama_3_LORA_Arithmetic_2k_Vanilla_test_metrics.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"total_count": 2160,
|
3 |
+
"none_count": 0,
|
4 |
+
"match_count": 385,
|
5 |
+
"wrong_count": 1775,
|
6 |
+
"acc": 0.17824074074074073,
|
7 |
+
"avg_error": 1.210185185185185e+189,
|
8 |
+
"var_error": Infinity,
|
9 |
+
"mse": Infinity,
|
10 |
+
"vse": Infinity,
|
11 |
+
"mpe": 5.091730293284713e+167,
|
12 |
+
"vpe": Infinity,
|
13 |
+
"pass_at_1": null,
|
14 |
+
"ttr": 0.11367673179396093,
|
15 |
+
"entropy": 6.554592188868327
|
16 |
+
}
|
Curriculum_math_llama_3_LORA_Arithmetic_2k_Vanilla_val_metrics.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"total_count": 2160,
|
3 |
+
"none_count": 0,
|
4 |
+
"match_count": 727,
|
5 |
+
"wrong_count": 1433,
|
6 |
+
"acc": 0.3365740740740741,
|
7 |
+
"avg_error": -140934063106490.0,
|
8 |
+
"var_error": 4.2912259689610905e+31,
|
9 |
+
"mse": 4.293212209975465e+31,
|
10 |
+
"vse": 3.979396756070794e+66,
|
11 |
+
"mpe": 55461.0563656412,
|
12 |
+
"vpe": 6636951408951.518,
|
13 |
+
"pass_at_1": null,
|
14 |
+
"ttr": 0.13636363636363635,
|
15 |
+
"entropy": 5.263913468908822
|
16 |
+
}
|
Curriculum_math_llama_3_LORA_Arithmetic_4k_Vanilla_test_metrics.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"total_count": 2160,
|
3 |
+
"none_count": 0,
|
4 |
+
"match_count": 393,
|
5 |
+
"wrong_count": 1767,
|
6 |
+
"acc": 0.18194444444444444,
|
7 |
+
"avg_error": 1.2497233342592593e+189,
|
8 |
+
"var_error": Infinity,
|
9 |
+
"mse": Infinity,
|
10 |
+
"vse": Infinity,
|
11 |
+
"mpe": 1.9452414325632677e+168,
|
12 |
+
"vpe": Infinity,
|
13 |
+
"pass_at_1": null,
|
14 |
+
"ttr": 0.1115919629057187,
|
15 |
+
"entropy": 6.784684446529436
|
16 |
+
}
|
Curriculum_math_llama_3_LORA_Arithmetic_4k_Vanilla_val_metrics.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"total_count": 2160,
|
3 |
+
"none_count": 0,
|
4 |
+
"match_count": 731,
|
5 |
+
"wrong_count": 1429,
|
6 |
+
"acc": 0.3384259259259259,
|
7 |
+
"avg_error": -140981271425810.9,
|
8 |
+
"var_error": 4.291224090103069e+31,
|
9 |
+
"mse": 4.2932116619923545e+31,
|
10 |
+
"vse": 3.979396756541245e+66,
|
11 |
+
"mpe": 1129.4876436687503,
|
12 |
+
"vpe": 1324937235.6857886,
|
13 |
+
"pass_at_1": null,
|
14 |
+
"ttr": 0.14141978759083287,
|
15 |
+
"entropy": 5.352818129373723
|
16 |
+
}
|
Curriculum_math_llama_3_LORA_Arithmetic_6k_Vanilla_test_metrics.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"total_count": 2160,
|
3 |
+
"none_count": 0,
|
4 |
+
"match_count": 398,
|
5 |
+
"wrong_count": 1762,
|
6 |
+
"acc": 0.18425925925925926,
|
7 |
+
"avg_error": 1.6762690746301857e+189,
|
8 |
+
"var_error": Infinity,
|
9 |
+
"mse": Infinity,
|
10 |
+
"vse": Infinity,
|
11 |
+
"mpe": 4.541545149151995e+162,
|
12 |
+
"vpe": Infinity,
|
13 |
+
"pass_at_1": null,
|
14 |
+
"ttr": 0.10695134061569017,
|
15 |
+
"entropy": 6.73971045647783
|
16 |
+
}
|
Curriculum_math_llama_3_LORA_Arithmetic_6k_Vanilla_val_metrics.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"total_count": 2160,
|
3 |
+
"none_count": 0,
|
4 |
+
"match_count": 734,
|
5 |
+
"wrong_count": 1426,
|
6 |
+
"acc": 0.3398148148148148,
|
7 |
+
"avg_error": -140981026030063.3,
|
8 |
+
"var_error": 4.291224096739589e+31,
|
9 |
+
"mse": 4.293211661709641e+31,
|
10 |
+
"vse": 3.979396756541489e+66,
|
11 |
+
"mpe": 4834.195309575507,
|
12 |
+
"vpe": 40631470456.149704,
|
13 |
+
"pass_at_1": null,
|
14 |
+
"ttr": 0.14211406279026564,
|
15 |
+
"entropy": 5.395096266869767
|
16 |
+
}
|
Curriculum_math_llama_3_LORA_Arithmetic_8k_MWPMix_10k_Vanilla_test_metrics.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"total_count": 2160,
|
3 |
+
"none_count": 0,
|
4 |
+
"match_count": 401,
|
5 |
+
"wrong_count": 1759,
|
6 |
+
"acc": 0.18564814814814815,
|
7 |
+
"avg_error": 3.692215216512346e+189,
|
8 |
+
"var_error": Infinity,
|
9 |
+
"mse": Infinity,
|
10 |
+
"vse": Infinity,
|
11 |
+
"mpe": -1.1574074074074075e+188,
|
12 |
+
"vpe": Infinity,
|
13 |
+
"pass_at_1": null,
|
14 |
+
"ttr": 0.06564885496183206,
|
15 |
+
"entropy": 7.278446099742158
|
16 |
+
}
|
Curriculum_math_llama_3_LORA_Arithmetic_8k_MWPMix_10k_Vanilla_val_metrics.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"total_count": 2160,
|
3 |
+
"none_count": 0,
|
4 |
+
"match_count": 752,
|
5 |
+
"wrong_count": 1408,
|
6 |
+
"acc": 0.34814814814814815,
|
7 |
+
"avg_error": -140414372883907.27,
|
8 |
+
"var_error": 4.291287238301365e+31,
|
9 |
+
"mse": 4.293258857912603e+31,
|
10 |
+
"vse": 3.979396716479524e+66,
|
11 |
+
"mpe": 6917.35793587359,
|
12 |
+
"vpe": 51383846068.49652,
|
13 |
+
"pass_at_1": null,
|
14 |
+
"ttr": 0.06228567865827046,
|
15 |
+
"entropy": 6.543151155783696
|
16 |
+
}
|
Curriculum_math_llama_3_LORA_Arithmetic_8k_MWPMix_6k_Vanilla_test_metrics.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"total_count": 2160,
|
3 |
+
"none_count": 0,
|
4 |
+
"match_count": 406,
|
5 |
+
"wrong_count": 1754,
|
6 |
+
"acc": 0.18796296296296297,
|
7 |
+
"avg_error": 2.961113072993827e+189,
|
8 |
+
"var_error": Infinity,
|
9 |
+
"mse": Infinity,
|
10 |
+
"vse": Infinity,
|
11 |
+
"mpe": -1.2203703703703704e+188,
|
12 |
+
"vpe": Infinity,
|
13 |
+
"pass_at_1": null,
|
14 |
+
"ttr": 0.07898080652711831,
|
15 |
+
"entropy": 7.184033252728473
|
16 |
+
}
|
Curriculum_math_llama_3_LORA_Arithmetic_8k_MWPMix_6k_Vanilla_val_metrics.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"total_count": 2160,
|
3 |
+
"none_count": 0,
|
4 |
+
"match_count": 764,
|
5 |
+
"wrong_count": 1396,
|
6 |
+
"acc": 0.3537037037037037,
|
7 |
+
"avg_error": -140470420031222.45,
|
8 |
+
"var_error": 4.291285215005026e+31,
|
9 |
+
"mse": 4.2932584088954035e+31,
|
10 |
+
"vse": 3.979396716865031e+66,
|
11 |
+
"mpe": 6917.484537931135,
|
12 |
+
"vpe": 51383845229.859825,
|
13 |
+
"pass_at_1": null,
|
14 |
+
"ttr": 0.0746025591314463,
|
15 |
+
"entropy": 6.437822505397191
|
16 |
+
}
|
Curriculum_math_llama_3_LORA_Arithmetic_8k_MWPMix_8k_Vanilla_test_metrics.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"total_count": 2160,
|
3 |
+
"none_count": 0,
|
4 |
+
"match_count": 402,
|
5 |
+
"wrong_count": 1758,
|
6 |
+
"acc": 0.18611111111111112,
|
7 |
+
"avg_error": 3.943071482407408e+189,
|
8 |
+
"var_error": Infinity,
|
9 |
+
"mse": Infinity,
|
10 |
+
"vse": Infinity,
|
11 |
+
"mpe": -1.1574074074074075e+188,
|
12 |
+
"vpe": Infinity,
|
13 |
+
"pass_at_1": null,
|
14 |
+
"ttr": 0.06736594121606462,
|
15 |
+
"entropy": 7.249353734435205
|
16 |
+
}
|
Curriculum_math_llama_3_LORA_Arithmetic_8k_MWPMix_8k_Vanilla_val_metrics.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"total_count": 2160,
|
3 |
+
"none_count": 0,
|
4 |
+
"match_count": 754,
|
5 |
+
"wrong_count": 1406,
|
6 |
+
"acc": 0.3490740740740741,
|
7 |
+
"avg_error": -140469233772431.44,
|
8 |
+
"var_error": 4.2912852490802965e+31,
|
9 |
+
"mse": 4.293258409643957e+31,
|
10 |
+
"vse": 3.979396716864392e+66,
|
11 |
+
"mpe": 6918.673170334994,
|
12 |
+
"vpe": 51383829442.65376,
|
13 |
+
"pass_at_1": null,
|
14 |
+
"ttr": 0.06512508981644784,
|
15 |
+
"entropy": 6.534206821455875
|
16 |
+
}
|
Curriculum_math_llama_3_LORA_Arithmetic_8k_Vanilla_test_metrics.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"total_count": 2160,
|
3 |
+
"none_count": 0,
|
4 |
+
"match_count": 393,
|
5 |
+
"wrong_count": 1767,
|
6 |
+
"acc": 0.18194444444444444,
|
7 |
+
"avg_error": 1.6635089181486113e+189,
|
8 |
+
"var_error": Infinity,
|
9 |
+
"mse": Infinity,
|
10 |
+
"vse": Infinity,
|
11 |
+
"mpe": 1.037829848896136e+171,
|
12 |
+
"vpe": Infinity,
|
13 |
+
"pass_at_1": null,
|
14 |
+
"ttr": 0.10583580613254204,
|
15 |
+
"entropy": 6.574374474108696
|
16 |
+
}
|