Spaces:
Build error
Build error
ready for final run
Browse files- data/Llama3.1-70B-Chinese-Chat_metrics.csv +11 -11
- data/Llama3.1-70B-Chinese-Chat_shots_metrics.csv +5 -5
- data/Llama3.1-8B-Chinese-Chat_metrics.csv +11 -11
- data/Llama3.1-8B-Chinese-Chat_shots_metrics.csv +7 -5
- data/Mistral-7B-v0.3-Chinese-Chat_metrics.csv +11 -11
- data/Mistral-7B-v0.3-Chinese-Chat_shots_metrics.csv +5 -5
- data/Qwen2-72B-Instruct_metrics.csv +11 -11
- data/Qwen2-72B-Instruct_shots_metrics.csv +1 -1
- data/Qwen2-7B-Instruct_metrics.csv +11 -11
- data/Qwen2-7B-Instruct_shots_metrics.csv +2 -2
- data/Qwen2.5-0.5B-Instruct_metrics.csv +11 -11
- data/Qwen2.5-0.5B-Instruct_shots_metrics.csv +7 -7
- data/Qwen2.5-1.5B-Instruct_metrics.csv +11 -11
- data/Qwen2.5-1.5B-Instruct_shots_metrics.csv +7 -7
- data/Qwen2.5-72B-Instruct_metrics.csv +11 -4
- data/Qwen2.5-72B-Instruct_shots_metrics.csv +3 -1
- data/all_model_token_counts.csv +49 -28
- data/best_metrics.csv +15 -13
- data/best_results.csv +0 -0
- data/few-shots_metrics.csv +90 -39
- data/fine-tuning_metrics.csv +121 -77
- data/internlm2_5-20b-chat_metrics.csv +11 -11
- data/internlm2_5-20b-chat_shots_metrics.csv +1 -1
- data/internlm2_5-7b-chat-1m_metrics.csv +11 -11
- data/internlm2_5-7b-chat-1m_shots_metrics.csv +7 -6
- data/internlm2_5-7b-chat_metrics.csv +11 -11
- data/internlm2_5-7b-chat_shots_metrics.csv +7 -5
- data/openai_metrics.csv +28 -25
- datasets/mgtv/train.csv +2 -2
- datasets/mgtv/val.csv +0 -0
- llm_toolkit/logical_reasoning_utils.py +8 -3
- notebooks/00_Data Analysis.ipynb +0 -0
- notebooks/01a_internlm2_5-20b-chat_analysis.ipynb +0 -0
- notebooks/01a_internlm2_5-7b-chat-1m_analysis.ipynb +0 -0
- notebooks/01a_internlm2_5-7b-chat_analysis.ipynb +0 -0
- notebooks/01b_Mistral-7B-v0.3-Chinese-Chat_analysis.ipynb +0 -0
- notebooks/02a_Qwen2-7B-Instruct_analysis.ipynb +0 -0
- notebooks/02b_Qwen2-72B-Instruct_analysis.ipynb +0 -0
- notebooks/02e_Qwen2.5-1.5B-Instruct_analysis.ipynb +0 -0
- notebooks/02f_Qwen2.5-0.5B-Instruct_analysis.ipynb +0 -0
- notebooks/02g_Qwen2.5-72B-Instruct_analysis.ipynb +0 -0
- notebooks/03a_Llama3.1-8B-Chinese-Chat_analysis.ipynb +0 -0
- notebooks/03b_Llama3.1-70B-Chinese-Chat_analysis.ipynb +0 -0
- notebooks/04b_OpenAI-Models_analysis.ipynb +0 -0
- notebooks/06b_Open-Source-Models_analysis.ipynb +0 -0
- scripts/eval-mgtv-qwen2.5_4bit.sh +6 -5
- scripts/eval-mgtv.sh +1 -1
- scripts/eval-shots.sh +2 -7
data/Llama3.1-70B-Chinese-Chat_metrics.csv
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat_torch.bfloat16_4bit_lf,0.
|
3 |
-
0.2,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-35_torch.bfloat16_4bit_lf,0.
|
4 |
-
0.4,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-70_torch.bfloat16_4bit_lf,0.
|
5 |
-
0.6,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-105_torch.bfloat16_4bit_lf,0.
|
6 |
-
0.8,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-140_torch.bfloat16_4bit_lf,0.
|
7 |
-
1.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-175_torch.bfloat16_4bit_lf,0.
|
8 |
-
1.2,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-210_torch.bfloat16_4bit_lf,0.
|
9 |
-
1.4,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-245_torch.bfloat16_4bit_lf,0.
|
10 |
-
1.6,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-280_torch.bfloat16_4bit_lf,0.
|
11 |
-
1.8,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-315_torch.bfloat16_4bit_lf,0.
|
12 |
-
2.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-350_torch.bfloat16_4bit_lf,0.
|
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat_torch.bfloat16_4bit_lf,0.7646666666666667,0.7804609488644828,0.7646666666666667,0.7497548621711109,0.009666666666666667
|
3 |
+
0.2,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-35_torch.bfloat16_4bit_lf,0.784,0.8105343792887019,0.784,0.7931742141608462,0.9996666666666667
|
4 |
+
0.4,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-70_torch.bfloat16_4bit_lf,0.7426666666666667,0.8117033235947096,0.7426666666666667,0.7673825750808414,1.0
|
5 |
+
0.6,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-105_torch.bfloat16_4bit_lf,0.736,0.8227236574891071,0.736,0.7650739090144549,1.0
|
6 |
+
0.8,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-140_torch.bfloat16_4bit_lf,0.7686666666666667,0.8259659464402258,0.7686666666666667,0.7880870865039342,1.0
|
7 |
+
1.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-175_torch.bfloat16_4bit_lf,0.809,0.8282732906153989,0.809,0.8166997776775797,1.0
|
8 |
+
1.2,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-210_torch.bfloat16_4bit_lf,0.75,0.8287348768409003,0.75,0.7741734526674708,1.0
|
9 |
+
1.4,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-245_torch.bfloat16_4bit_lf,0.7703333333333333,0.8271894042316865,0.7703333333333333,0.7907617274354051,1.0
|
10 |
+
1.6,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-280_torch.bfloat16_4bit_lf,0.776,0.8315436250878178,0.776,0.7959870550088912,1.0
|
11 |
+
1.8,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-315_torch.bfloat16_4bit_lf,0.7733333333333333,0.8327336470976,0.7733333333333333,0.7947537193805649,1.0
|
12 |
+
2.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-350_torch.bfloat16_4bit_lf,0.7686666666666667,0.8329633784586954,0.7686666666666667,0.7914454794587963,1.0
|
data/Llama3.1-70B-Chinese-Chat_shots_metrics.csv
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-00,0.
|
3 |
-
5,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-05,0.
|
4 |
-
10,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-10,0.
|
5 |
-
20,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-20,0.
|
6 |
-
30,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-30,0.
|
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-00,0.7646666666666667,0.7804609488644828,0.7646666666666667,0.7497548621711109,0.009666666666666667
|
3 |
+
5,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-05,0.754,0.7675695134276339,0.754,0.7530665717237273,0.79
|
4 |
+
10,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-10,0.756,0.7695738042762151,0.756,0.7563878737797524,0.8326666666666667
|
5 |
+
20,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-20,0.7406666666666667,0.7560876641054418,0.7406666666666667,0.7360011002310723,0.819
|
6 |
+
30,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-30,0.7603333333333333,0.7710641222872985,0.7603333333333333,0.7570501796584528,0.548
|
data/Llama3.1-8B-Chinese-Chat_metrics.csv
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat_torch.float16_lf,0.
|
3 |
-
0.2,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-35_torch.float16_lf,0.
|
4 |
-
0.4,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-70_torch.float16_lf,0.
|
5 |
-
0.6,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-105_torch.float16_lf,0.
|
6 |
-
0.8,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-140_torch.float16_lf,0.
|
7 |
-
1.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-175_torch.float16_lf,0.
|
8 |
-
1.2,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-210_torch.float16_lf,0.
|
9 |
-
1.4,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-245_torch.float16_lf,0.
|
10 |
-
1.6,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-280_torch.float16_lf,0.
|
11 |
-
1.8,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-315_torch.float16_lf,0.
|
12 |
-
2.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-350_torch.float16_lf,0.
|
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat_torch.float16_lf,0.7343333333333333,0.7375752740091942,0.7343333333333333,0.7270283652909943,0.8033333333333333
|
3 |
+
0.2,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-35_torch.float16_lf,0.717,0.7933072428707201,0.717,0.7447412977676989,1.0
|
4 |
+
0.4,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-70_torch.float16_lf,0.7226666666666667,0.7983383063141186,0.7226666666666667,0.7489397350174751,0.9993333333333333
|
5 |
+
0.6,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-105_torch.float16_lf,0.7083333333333334,0.7967030927405547,0.7083333333333334,0.738836849803633,1.0
|
6 |
+
0.8,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-140_torch.float16_lf,0.7773333333333333,0.805139129977305,0.7773333333333333,0.7882159693114585,1.0
|
7 |
+
1.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-175_torch.float16_lf,0.7853333333333333,0.8062405645226312,0.7853333333333333,0.7938991590982061,1.0
|
8 |
+
1.2,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-210_torch.float16_lf,0.7436666666666667,0.8148316221752646,0.7436666666666667,0.7689773286065246,1.0
|
9 |
+
1.4,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-245_torch.float16_lf,0.759,0.8080929326806991,0.759,0.7772842274293189,1.0
|
10 |
+
1.6,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-280_torch.float16_lf,0.745,0.8027959680086005,0.745,0.7666181725503965,1.0
|
11 |
+
1.8,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-315_torch.float16_lf,0.7303333333333333,0.806805925253305,0.7303333333333333,0.7580841794383364,1.0
|
12 |
+
2.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-350_torch.float16_lf,0.737,0.808786608325944,0.737,0.7629963845364953,1.0
|
data/Llama3.1-8B-Chinese-Chat_shots_metrics.csv
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-00,0.
|
3 |
-
5,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-05,0.7056666666666667,0.
|
4 |
-
10,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-10,0.
|
5 |
-
20,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-20,0.767,0.
|
6 |
-
30,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-30,0.
|
|
|
|
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-00,0.7343333333333333,0.7375752740091942,0.7343333333333333,0.7270283652909943,0.8033333333333333
|
3 |
+
5,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-05,0.7056666666666667,0.7508515184863084,0.7056666666666667,0.7230574380518462,0.9886666666666667
|
4 |
+
10,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-10,0.6736666666666666,0.7776004745989736,0.6736666666666666,0.7094104807112239,0.9623333333333334
|
5 |
+
20,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-20,0.767,0.764982587229615,0.767,0.7638473265780445,0.979
|
6 |
+
30,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-30,0.7713333333333333,0.7725685630276532,0.7713333333333333,0.7692692690410152,0.7326666666666667
|
7 |
+
40,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-40,0.6873333333333334,0.773294758147205,0.6873333333333334,0.7075877720686631,0.759
|
8 |
+
50,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-50,0.7176666666666667,0.7599215931134234,0.7176666666666667,0.7203550920641806,0.6623333333333333
|
data/Mistral-7B-v0.3-Chinese-Chat_metrics.csv
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat_torch.float16_lf,0.
|
3 |
-
0.2,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-35_torch.float16_lf,0.
|
4 |
-
0.4,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-70_torch.float16_lf,0.
|
5 |
-
0.6,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-105_torch.float16_lf,0.
|
6 |
-
0.8,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-140_torch.float16_lf,0.
|
7 |
-
1.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-175_torch.float16_lf,0.
|
8 |
-
1.2,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-210_torch.float16_lf,0.
|
9 |
-
1.4,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-245_torch.float16_lf,0.
|
10 |
-
1.6,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-280_torch.float16_lf,0.
|
11 |
-
1.8,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-315_torch.float16_lf,0.
|
12 |
-
2.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-350_torch.float16_lf,0.
|
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat_torch.float16_lf,0.6923333333333334,0.7009179792741449,0.6923333333333334,0.6605899639694456,0.011666666666666667
|
3 |
+
0.2,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-35_torch.float16_lf,0.706,0.7832545046834243,0.706,0.7323466131711432,1.0
|
4 |
+
0.4,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-70_torch.float16_lf,0.7476666666666667,0.7836120158306894,0.7476666666666667,0.7557791381509955,1.0
|
5 |
+
0.6,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-105_torch.float16_lf,0.6736666666666666,0.7908140272002406,0.6736666666666666,0.7129951145360993,1.0
|
6 |
+
0.8,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-140_torch.float16_lf,0.7293333333333333,0.788387677637057,0.7293333333333333,0.7494137469900564,1.0
|
7 |
+
1.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-175_torch.float16_lf,0.74,0.7833068129490098,0.74,0.7499935485741815,1.0
|
8 |
+
1.2,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-210_torch.float16_lf,0.7146666666666667,0.7890760288118991,0.7146666666666667,0.7411240160229633,1.0
|
9 |
+
1.4,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-245_torch.float16_lf,0.7616666666666667,0.789634957005121,0.7616666666666667,0.7721210086098353,1.0
|
10 |
+
1.6,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-280_torch.float16_lf,0.7296666666666667,0.7854982015370922,0.7296666666666667,0.7491267995936699,1.0
|
11 |
+
1.8,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-315_torch.float16_lf,0.7076666666666667,0.7877874532247918,0.7076666666666667,0.7346283562321456,1.0
|
12 |
+
2.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-350_torch.float16_lf,0.713,0.7895690867103055,0.713,0.739013227401175,1.0
|
data/Mistral-7B-v0.3-Chinese-Chat_shots_metrics.csv
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-00,0.
|
3 |
-
5,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-05,0.
|
4 |
-
10,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-10,0.
|
5 |
-
20,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-20,0.
|
6 |
-
30,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-30,0.
|
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-00,0.6923333333333334,0.7009179792741449,0.6923333333333334,0.6605899639694456,0.011666666666666667
|
3 |
+
5,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-05,0.6546666666666666,0.7415422757067709,0.6546666666666666,0.684189810233595,0.142
|
4 |
+
10,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-10,0.612,0.7259976964524691,0.612,0.6501410678512595,0.10633333333333334
|
5 |
+
20,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-20,0.6336666666666667,0.7315100617022602,0.6336666666666667,0.6683245802083553,0.08266666666666667
|
6 |
+
30,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-30,0.665,0.7374233826761456,0.665,0.6872462947319797,0.07
|
data/Qwen2-72B-Instruct_metrics.csv
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0.0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct_torch.bfloat16_4bit_lf,0.
|
3 |
-
0.2,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-35_torch.bfloat16_4bit_lf,0.
|
4 |
-
0.4,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-70_torch.bfloat16_4bit_lf,0.
|
5 |
-
0.6,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-105_torch.bfloat16_4bit_lf,0.
|
6 |
-
0.8,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-140_torch.bfloat16_4bit_lf,0.
|
7 |
-
1.0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-175_torch.bfloat16_4bit_lf,0.
|
8 |
-
1.2,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-210_torch.bfloat16_4bit_lf,0.
|
9 |
-
1.4,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-245_torch.bfloat16_4bit_lf,0.
|
10 |
-
1.6,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-280_torch.bfloat16_4bit_lf,0.
|
11 |
-
1.8,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-315_torch.bfloat16_4bit_lf,0.
|
12 |
-
2.0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-350_torch.bfloat16_4bit_lf,0.
|
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0.0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct_torch.bfloat16_4bit_lf,0.757,0.7973819870472458,0.757,0.7602606947698078,0.9773333333333334
|
3 |
+
0.2,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-35_torch.bfloat16_4bit_lf,0.772,0.8214192168152544,0.772,0.7910898276003457,1.0
|
4 |
+
0.4,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-70_torch.bfloat16_4bit_lf,0.757,0.828747966447233,0.757,0.783516715780864,1.0
|
5 |
+
0.6,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-105_torch.bfloat16_4bit_lf,0.772,0.8277697933855978,0.772,0.7932982172336923,1.0
|
6 |
+
0.8,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-140_torch.bfloat16_4bit_lf,0.8036666666666666,0.8277228453985896,0.8036666666666666,0.8136774676398189,1.0
|
7 |
+
1.0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-175_torch.bfloat16_4bit_lf,0.753,0.8267761287574541,0.753,0.7793434248302783,1.0
|
8 |
+
1.2,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-210_torch.bfloat16_4bit_lf,0.7793333333333333,0.8358618807490109,0.7793333333333333,0.800734522365308,1.0
|
9 |
+
1.4,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-245_torch.bfloat16_4bit_lf,0.7883333333333333,0.8390667295473608,0.7883333333333333,0.8075446360016978,1.0
|
10 |
+
1.6,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-280_torch.bfloat16_4bit_lf,0.7856666666666666,0.8333912862981965,0.7856666666666666,0.8038536915174684,1.0
|
11 |
+
1.8,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-315_torch.bfloat16_4bit_lf,0.805,0.8442903406198344,0.805,0.8197956174225439,1.0
|
12 |
+
2.0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-350_torch.bfloat16_4bit_lf,0.7936666666666666,0.8399561173931658,0.7936666666666666,0.8112524138737499,1.0
|
data/Qwen2-72B-Instruct_shots_metrics.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct_torch/shots-00,0.
|
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct_torch/shots-00,0.757,0.7973819870472458,0.757,0.7602606947698078,0.9773333333333334
|
data/Qwen2-7B-Instruct_metrics.csv
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0.0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct_torch.float16_lf,0.
|
3 |
-
0.2,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-35_torch.float16_lf,0.
|
4 |
-
0.4,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-70_torch.float16_lf,0.
|
5 |
-
0.6,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-105_torch.float16_lf,0.
|
6 |
-
0.8,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-140_torch.float16_lf,0.
|
7 |
-
1.0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-175_torch.float16_lf,0.
|
8 |
-
1.2,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-210_torch.float16_lf,0.
|
9 |
-
1.4,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-245_torch.float16_lf,0.
|
10 |
-
1.6,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-280_torch.float16_lf,0.
|
11 |
-
1.8,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-315_torch.float16_lf,0.
|
12 |
-
2.0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-350_torch.float16_lf,0.
|
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0.0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct_torch.float16_lf,0.6853333333333333,0.7434931541561965,0.6853333333333333,0.7090778261894969,0.9996666666666667
|
3 |
+
0.2,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-35_torch.float16_lf,0.7313333333333333,0.7782207073448913,0.7313333333333333,0.7498580605712221,0.9996666666666667
|
4 |
+
0.4,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-70_torch.float16_lf,0.767,0.7975691979811874,0.767,0.7784908005204111,1.0
|
5 |
+
0.6,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-105_torch.float16_lf,0.706,0.8028770302127605,0.706,0.7396402026345186,1.0
|
6 |
+
0.8,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-140_torch.float16_lf,0.7313333333333333,0.7899967378450532,0.7313333333333333,0.7491181057755286,1.0
|
7 |
+
1.0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-175_torch.float16_lf,0.6853333333333333,0.7776902509375624,0.6853333333333333,0.7122906026955259,1.0
|
8 |
+
1.2,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-210_torch.float16_lf,0.7156666666666667,0.7981854285684257,0.7156666666666667,0.7440952985881264,1.0
|
9 |
+
1.4,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-245_torch.float16_lf,0.743,0.7909260776868464,0.743,0.7603582063225583,0.9996666666666667
|
10 |
+
1.6,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-280_torch.float16_lf,0.7106666666666667,0.7844615294470283,0.7106666666666667,0.7354379359862141,1.0
|
11 |
+
1.8,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-315_torch.float16_lf,0.6926666666666667,0.7852752054045592,0.6926666666666667,0.7234458732476875,1.0
|
12 |
+
2.0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-350_torch.float16_lf,0.6996666666666667,0.7892137201429604,0.6996666666666667,0.7296312152658814,1.0
|
data/Qwen2-7B-Instruct_shots_metrics.csv
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/shots-00,0.
|
3 |
-
10,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/shots-10,0.
|
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/shots-00,0.6853333333333333,0.7434931541561965,0.6853333333333333,0.7090778261894969,0.9996666666666667
|
3 |
+
10,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/shots-10,0.5723333333333334,0.738817429885796,0.5723333333333334,0.6112549880619311,0.9896666666666667
|
data/Qwen2.5-0.5B-Instruct_metrics.csv
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0.0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct_torch.float16_lf,0.
|
3 |
-
0.2,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-35_torch.float16_lf,0.
|
4 |
-
0.4,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-70_torch.float16_lf,0.
|
5 |
-
0.6,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-105_torch.float16_lf,0.
|
6 |
-
0.8,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-140_torch.float16_lf,0.
|
7 |
-
1.0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-175_torch.float16_lf,0.
|
8 |
-
1.2,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-210_torch.float16_lf,0.
|
9 |
-
1.4,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-245_torch.float16_lf,0.
|
10 |
-
1.6,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-280_torch.float16_lf,0.
|
11 |
-
1.8,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-315_torch.float16_lf,0.
|
12 |
-
2.0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-350_torch.float16_lf,0.
|
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0.0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct_torch.float16_lf,0.43833333333333335,0.5292917259914629,0.43833333333333335,0.42286875992486556,0.594
|
3 |
+
0.2,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-35_torch.float16_lf,0.5223333333333333,0.5704911830866488,0.5223333333333333,0.454387436259078,1.0
|
4 |
+
0.4,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-70_torch.float16_lf,0.542,0.6358012674347429,0.542,0.5272438410312219,1.0
|
5 |
+
0.6,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-105_torch.float16_lf,0.44633333333333336,0.6477441598024034,0.44633333333333336,0.4917457459702999,1.0
|
6 |
+
0.8,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-140_torch.float16_lf,0.5053333333333333,0.6438300456580985,0.5053333333333333,0.4995247505211914,1.0
|
7 |
+
1.0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-175_torch.float16_lf,0.558,0.6560369730369926,0.558,0.5632487818615118,1.0
|
8 |
+
1.2,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-210_torch.float16_lf,0.5453333333333333,0.6357935773889876,0.5453333333333333,0.5594242895140294,1.0
|
9 |
+
1.4,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-245_torch.float16_lf,0.5903333333333334,0.6503049529377274,0.5903333333333334,0.6094397514027766,1.0
|
10 |
+
1.6,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-280_torch.float16_lf,0.5286666666666666,0.6532851084098983,0.5286666666666666,0.5617239467523474,1.0
|
11 |
+
1.8,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-315_torch.float16_lf,0.5336666666666666,0.6607103736450911,0.5336666666666666,0.5622949959647037,1.0
|
12 |
+
2.0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-350_torch.float16_lf,0.5156666666666667,0.652809461208547,0.5156666666666667,0.549955024535151,1.0
|
data/Qwen2.5-0.5B-Instruct_shots_metrics.csv
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-00,0.
|
3 |
-
5,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-05,0.
|
4 |
-
10,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-10,0.
|
5 |
-
20,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-20,0.
|
6 |
-
30,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-30,0.
|
7 |
-
40,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-40,0.
|
8 |
-
50,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-50,0.
|
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-00,0.43833333333333335,0.5292917259914629,0.43833333333333335,0.42286875992486556,0.594
|
3 |
+
5,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-05,0.17966666666666667,0.47516573853109806,0.17966666666666667,0.214144872117911,0.004
|
4 |
+
10,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-10,0.351,0.5084853117995367,0.351,0.39097839594031075,0.068
|
5 |
+
20,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-20,0.43366666666666664,0.513186330900278,0.43366666666666664,0.463747974034812,0.37266666666666665
|
6 |
+
30,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-30,0.39,0.5367753683204347,0.39,0.4299603249123421,0.07566666666666666
|
7 |
+
40,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-40,0.466,0.5400134144413437,0.466,0.49542975613961904,0.324
|
8 |
+
50,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-50,0.496,0.5465409839032335,0.496,0.5069942984615308,0.24333333333333335
|
data/Qwen2.5-1.5B-Instruct_metrics.csv
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0.0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct_torch.float16_lf,0.
|
3 |
-
0.2,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-35_torch.float16_lf,0.
|
4 |
-
0.4,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-70_torch.float16_lf,0.
|
5 |
-
0.6,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-105_torch.float16_lf,0.
|
6 |
-
0.8,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-140_torch.float16_lf,0.
|
7 |
-
1.0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-175_torch.float16_lf,0.
|
8 |
-
1.2,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-210_torch.float16_lf,0.
|
9 |
-
1.4,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-245_torch.float16_lf,0.
|
10 |
-
1.6,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-280_torch.float16_lf,0.
|
11 |
-
1.8,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-315_torch.float16_lf,0.
|
12 |
-
2.0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-350_torch.float16_lf,0.
|
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0.0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct_torch.float16_lf,0.20166666666666666,0.5269756683734005,0.20166666666666666,0.24069835329504388,0.9223333333333333
|
3 |
+
0.2,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-35_torch.float16_lf,0.48533333333333334,0.654166887199198,0.48533333333333334,0.5381849571995003,0.9996666666666667
|
4 |
+
0.4,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-70_torch.float16_lf,0.573,0.7037737273232145,0.573,0.6131069400231612,0.9996666666666667
|
5 |
+
0.6,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-105_torch.float16_lf,0.539,0.7162869126454278,0.539,0.5961610389687657,1.0
|
6 |
+
0.8,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-140_torch.float16_lf,0.6443333333333333,0.7218750831357578,0.6443333333333333,0.6721473356905486,1.0
|
7 |
+
1.0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-175_torch.float16_lf,0.6296666666666667,0.7065049203038848,0.6296666666666667,0.6496809196018393,1.0
|
8 |
+
1.2,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-210_torch.float16_lf,0.5836666666666667,0.7222805944180548,0.5836666666666667,0.6314346830311218,1.0
|
9 |
+
1.4,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-245_torch.float16_lf,0.6493333333333333,0.7440287895607589,0.6493333333333333,0.6815314583590799,1.0
|
10 |
+
1.6,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-280_torch.float16_lf,0.6293333333333333,0.7332138067544355,0.6293333333333333,0.6634330572585689,1.0
|
11 |
+
1.8,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-315_torch.float16_lf,0.599,0.7297954686265763,0.599,0.6396292878324805,1.0
|
12 |
+
2.0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-350_torch.float16_lf,0.6056666666666667,0.7305580205770756,0.6056666666666667,0.6426785514786738,1.0
|
data/Qwen2.5-1.5B-Instruct_shots_metrics.csv
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-00,0.
|
3 |
-
5,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-05,0.
|
4 |
-
10,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-10,0.
|
5 |
-
20,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-20,0.
|
6 |
-
30,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-30,0.
|
7 |
-
40,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-40,0.
|
8 |
-
50,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-50,0.
|
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-00,0.20166666666666666,0.5269756683734005,0.20166666666666666,0.24069835329504388,0.9223333333333333
|
3 |
+
5,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-05,0.3933333333333333,0.578886379886985,0.3933333333333333,0.43554636943558694,0.8283333333333334
|
4 |
+
10,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-10,0.407,0.5820145311822223,0.407,0.459589777544246,0.9156666666666666
|
5 |
+
20,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-20,0.232,0.5282610881631451,0.232,0.3093707499897376,0.676
|
6 |
+
30,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-30,0.23,0.5479545947886839,0.23,0.3064381040560128,0.661
|
7 |
+
40,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-40,0.29233333333333333,0.5608411738006117,0.29233333333333333,0.3751714671158081,0.5206666666666667
|
8 |
+
50,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-50,0.29,0.5646814860840066,0.29,0.36883826526592467,0.4603333333333333
|
data/Qwen2.5-72B-Instruct_metrics.csv
CHANGED
@@ -1,5 +1,12 @@
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0.0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct_torch.
|
3 |
-
0.2,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-35_torch.
|
4 |
-
0.4,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-70_torch.
|
5 |
-
0.6,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-105_torch.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0.0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct_torch.bfloat16_4bit_lf,0.7956666666666666,0.8098073411161181,0.7956666666666666,0.7771317592221199,0.994
|
3 |
+
0.2,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-35_torch.bfloat16_4bit_lf,0.792,0.8180793658647517,0.792,0.80166512366027,1.0
|
4 |
+
0.4,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-70_torch.bfloat16_4bit_lf,0.7716666666666666,0.8199569804721152,0.7716666666666666,0.7895879011938259,1.0
|
5 |
+
0.6,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-105_torch.bfloat16_4bit_lf,0.798,0.8379062379534957,0.798,0.812148680520218,1.0
|
6 |
+
0.8,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-140_torch.bfloat16_4bit_lf,0.8213333333333334,0.8447926258362122,0.8213333333333334,0.8299486611547571,1.0
|
7 |
+
1.0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-175_torch.bfloat16_4bit_lf,0.7643333333333333,0.8235366724638146,0.7643333333333333,0.7858148913986999,1.0
|
8 |
+
1.2,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-210_torch.bfloat16_4bit_lf,0.7986666666666666,0.83233218480008,0.7986666666666666,0.8115886421806521,1.0
|
9 |
+
1.4,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-245_torch.bfloat16_4bit_lf,0.7923333333333333,0.8231874218285514,0.7923333333333333,0.803363661387202,1.0
|
10 |
+
1.6,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-280_torch.bfloat16_4bit_lf,0.7936666666666666,0.8268750473800219,0.7936666666666666,0.8057720333101867,1.0
|
11 |
+
1.8,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-315_torch.bfloat16_4bit_lf,0.801,0.830389411421043,0.801,0.8117656427717702,1.0
|
12 |
+
2.0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-350_torch.bfloat16_4bit_lf,0.795,0.8280696193638868,0.795,0.8068114730639832,1.0
|
data/Qwen2.5-72B-Instruct_shots_metrics.csv
CHANGED
@@ -1 +1,3 @@
|
|
1 |
-
shots,model,run,accuracy,precision,recall,f1
|
|
|
|
|
|
1 |
+
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/shots-00,0.7956666666666666,0.8098073411161181,0.7956666666666666,0.7771317592221199,0.994
|
3 |
+
5,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/shots-05,0.819,0.8182324679666184,0.819,0.8095367865845521,0.9416666666666667
|
data/all_model_token_counts.csv
CHANGED
@@ -1,60 +1,81 @@
|
|
1 |
model_name,num_shots,max,min,mean,std
|
|
|
|
|
2 |
Mistral-7B-v0.3-Chinese-Chat,0,928,694,799.354,15.567384660085061
|
3 |
internlm2_5-7b-chat,0,511,426,461.91766666666666,7.767732430462529
|
4 |
internlm2_5-7b-chat-1m,0,511,426,461.91766666666666,7.767732430462529
|
5 |
-
Qwen2-7B-Instruct,0,517,426,465.33866666666665,8.617118029244592
|
6 |
-
Llama3.1-8B-Chinese-Chat,0,652,512,571.091,9.115687078710652
|
7 |
internlm2_5-20b-chat,0,511,426,461.91766666666666,7.767732430462529
|
8 |
-
|
9 |
-
Qwen2-
|
|
|
|
|
|
|
|
|
|
|
10 |
Mistral-7B-v0.3-Chinese-Chat,5,2573,2339,2444.354,15.567384660085061
|
11 |
internlm2_5-7b-chat,5,1351,1266,1301.9176666666667,7.767732430462529
|
12 |
internlm2_5-7b-chat-1m,5,1351,1266,1301.9176666666667,7.767732430462529
|
13 |
-
Qwen2-7B-Instruct,5,1381,1290,1329.3386666666668,8.617118029244592
|
14 |
-
Llama3.1-8B-Chinese-Chat,5,1818,1678,1737.091,9.115687078710652
|
15 |
internlm2_5-20b-chat,5,1351,1266,1301.9176666666667,7.767732430462529
|
16 |
-
|
17 |
-
Qwen2-
|
|
|
|
|
|
|
|
|
|
|
18 |
Mistral-7B-v0.3-Chinese-Chat,10,4119,3885,3990.354,15.567384660085061
|
19 |
internlm2_5-7b-chat,10,2245,2160,2195.9176666666667,7.767732430462529
|
20 |
internlm2_5-7b-chat-1m,10,2245,2160,2195.9176666666667,7.767732430462529
|
21 |
-
Qwen2-7B-Instruct,10,2289,2198,2237.3386666666665,8.617118029244592
|
22 |
-
Llama3.1-8B-Chinese-Chat,10,2914,2774,2833.091,9.115687078710652
|
23 |
internlm2_5-20b-chat,10,2245,2160,2195.9176666666667,7.767732430462529
|
24 |
-
|
25 |
-
Qwen2-
|
|
|
|
|
|
|
|
|
|
|
26 |
Mistral-7B-v0.3-Chinese-Chat,20,7392,7158,7263.354,15.567384660085061
|
27 |
internlm2_5-7b-chat,20,4065,3980,4015.9176666666667,7.767732430462529
|
28 |
internlm2_5-7b-chat-1m,20,4065,3980,4015.9176666666667,7.767732430462529
|
29 |
-
Qwen2-7B-Instruct,20,4176,4085,4124.3386666666665,8.617118029244592
|
30 |
-
Llama3.1-8B-Chinese-Chat,20,5283,5143,5202.091,9.115687078710652
|
31 |
internlm2_5-20b-chat,20,4065,3980,4015.9176666666667,7.767732430462529
|
32 |
-
|
33 |
-
Qwen2-
|
|
|
|
|
|
|
|
|
|
|
34 |
Mistral-7B-v0.3-Chinese-Chat,30,10804,10570,10675.354,15.567384660085061
|
35 |
internlm2_5-7b-chat,30,5903,5818,5853.917666666666,7.767732430462529
|
36 |
internlm2_5-7b-chat-1m,30,5903,5818,5853.917666666666,7.767732430462529
|
37 |
-
Qwen2-7B-Instruct,30,6107,6016,6055.3386666666665,8.617118029244592
|
38 |
-
Llama3.1-8B-Chinese-Chat,30,7768,7628,7687.091,9.115687078710652
|
39 |
internlm2_5-20b-chat,30,5903,5818,5853.917666666666,7.767732430462529
|
40 |
-
|
41 |
-
Qwen2-
|
|
|
|
|
|
|
|
|
|
|
42 |
Mistral-7B-v0.3-Chinese-Chat,40,14152,13918,14023.354,15.567384660085061
|
43 |
internlm2_5-7b-chat,40,7709,7624,7659.917666666666,7.767732430462529
|
44 |
internlm2_5-7b-chat-1m,40,7709,7624,7659.917666666666,7.767732430462529
|
45 |
-
Qwen2-7B-Instruct,40,8010,7919,7958.3386666666665,8.617118029244592
|
46 |
-
Llama3.1-8B-Chinese-Chat,40,10217,10077,10136.091,9.115687078710652
|
47 |
internlm2_5-20b-chat,40,7709,7624,7659.917666666666,7.767732430462529
|
48 |
-
|
49 |
-
Qwen2-
|
|
|
|
|
|
|
|
|
|
|
50 |
Mistral-7B-v0.3-Chinese-Chat,50,17588,17354,17459.354,15.567384660085061
|
51 |
internlm2_5-7b-chat,50,9561,9476,9511.917666666666,7.767732430462529
|
52 |
internlm2_5-7b-chat-1m,50,9561,9476,9511.917666666666,7.767732430462529
|
53 |
-
Qwen2-7B-Instruct,50,9961,9870,9909.338666666667,8.617118029244592
|
54 |
-
Llama3.1-8B-Chinese-Chat,50,12719,12579,12638.091,9.115687078710652
|
55 |
internlm2_5-20b-chat,50,9561,9476,9511.917666666666,7.767732430462529
|
56 |
-
|
57 |
-
Qwen2-
|
|
|
|
|
|
|
58 |
gpt-4o,0,606,464,524.8063333333333,10.057594723695004
|
59 |
gpt-4o-mini,0,606,464,524.8063333333333,10.057594723695004
|
60 |
o1-preview,0,925,682,797.5953333333333,16.41724967580933
|
|
|
1 |
model_name,num_shots,max,min,mean,std
|
2 |
+
Llama3.1-8B-Chinese-Chat,0,652,512,571.091,9.115687078710652
|
3 |
+
Llama3.1-70B-Chinese-Chat,0,652,512,571.091,9.115687078710652
|
4 |
Mistral-7B-v0.3-Chinese-Chat,0,928,694,799.354,15.567384660085061
|
5 |
internlm2_5-7b-chat,0,511,426,461.91766666666666,7.767732430462529
|
6 |
internlm2_5-7b-chat-1m,0,511,426,461.91766666666666,7.767732430462529
|
|
|
|
|
7 |
internlm2_5-20b-chat,0,511,426,461.91766666666666,7.767732430462529
|
8 |
+
Qwen2.5-0.5B-Instruct,0,517,426,465.33866666666665,8.617118029244592
|
9 |
+
Qwen2.5-1.5B-Instruct,0,517,426,465.33866666666665,8.617118029244592
|
10 |
+
Qwen2.5-3B-Instruct,0,517,426,465.33866666666665,8.617118029244592
|
11 |
+
Qwen2.5-7B-Instruct,0,517,426,465.33866666666665,8.617118029244592
|
12 |
+
Qwen2.5-72B-Instruct,0,517,426,465.33866666666665,8.617118029244592
|
13 |
+
Llama3.1-8B-Chinese-Chat,5,1818,1678,1737.091,9.115687078710652
|
14 |
+
Llama3.1-70B-Chinese-Chat,5,1818,1678,1737.091,9.115687078710652
|
15 |
Mistral-7B-v0.3-Chinese-Chat,5,2573,2339,2444.354,15.567384660085061
|
16 |
internlm2_5-7b-chat,5,1351,1266,1301.9176666666667,7.767732430462529
|
17 |
internlm2_5-7b-chat-1m,5,1351,1266,1301.9176666666667,7.767732430462529
|
|
|
|
|
18 |
internlm2_5-20b-chat,5,1351,1266,1301.9176666666667,7.767732430462529
|
19 |
+
Qwen2.5-0.5B-Instruct,5,1381,1290,1329.3386666666668,8.617118029244592
|
20 |
+
Qwen2.5-1.5B-Instruct,5,1381,1290,1329.3386666666668,8.617118029244592
|
21 |
+
Qwen2.5-3B-Instruct,5,1381,1290,1329.3386666666668,8.617118029244592
|
22 |
+
Qwen2.5-7B-Instruct,5,1381,1290,1329.3386666666668,8.617118029244592
|
23 |
+
Qwen2.5-72B-Instruct,5,1381,1290,1329.3386666666668,8.617118029244592
|
24 |
+
Llama3.1-8B-Chinese-Chat,10,2914,2774,2833.091,9.115687078710652
|
25 |
+
Llama3.1-70B-Chinese-Chat,10,2914,2774,2833.091,9.115687078710652
|
26 |
Mistral-7B-v0.3-Chinese-Chat,10,4119,3885,3990.354,15.567384660085061
|
27 |
internlm2_5-7b-chat,10,2245,2160,2195.9176666666667,7.767732430462529
|
28 |
internlm2_5-7b-chat-1m,10,2245,2160,2195.9176666666667,7.767732430462529
|
|
|
|
|
29 |
internlm2_5-20b-chat,10,2245,2160,2195.9176666666667,7.767732430462529
|
30 |
+
Qwen2.5-0.5B-Instruct,10,2289,2198,2237.3386666666665,8.617118029244592
|
31 |
+
Qwen2.5-1.5B-Instruct,10,2289,2198,2237.3386666666665,8.617118029244592
|
32 |
+
Qwen2.5-3B-Instruct,10,2289,2198,2237.3386666666665,8.617118029244592
|
33 |
+
Qwen2.5-7B-Instruct,10,2289,2198,2237.3386666666665,8.617118029244592
|
34 |
+
Qwen2.5-72B-Instruct,10,2289,2198,2237.3386666666665,8.617118029244592
|
35 |
+
Llama3.1-8B-Chinese-Chat,20,5283,5143,5202.091,9.115687078710652
|
36 |
+
Llama3.1-70B-Chinese-Chat,20,5283,5143,5202.091,9.115687078710652
|
37 |
Mistral-7B-v0.3-Chinese-Chat,20,7392,7158,7263.354,15.567384660085061
|
38 |
internlm2_5-7b-chat,20,4065,3980,4015.9176666666667,7.767732430462529
|
39 |
internlm2_5-7b-chat-1m,20,4065,3980,4015.9176666666667,7.767732430462529
|
|
|
|
|
40 |
internlm2_5-20b-chat,20,4065,3980,4015.9176666666667,7.767732430462529
|
41 |
+
Qwen2.5-0.5B-Instruct,20,4176,4085,4124.3386666666665,8.617118029244592
|
42 |
+
Qwen2.5-1.5B-Instruct,20,4176,4085,4124.3386666666665,8.617118029244592
|
43 |
+
Qwen2.5-3B-Instruct,20,4176,4085,4124.3386666666665,8.617118029244592
|
44 |
+
Qwen2.5-7B-Instruct,20,4176,4085,4124.3386666666665,8.617118029244592
|
45 |
+
Qwen2.5-72B-Instruct,20,4176,4085,4124.3386666666665,8.617118029244592
|
46 |
+
Llama3.1-8B-Chinese-Chat,30,7768,7628,7687.091,9.115687078710652
|
47 |
+
Llama3.1-70B-Chinese-Chat,30,7768,7628,7687.091,9.115687078710652
|
48 |
Mistral-7B-v0.3-Chinese-Chat,30,10804,10570,10675.354,15.567384660085061
|
49 |
internlm2_5-7b-chat,30,5903,5818,5853.917666666666,7.767732430462529
|
50 |
internlm2_5-7b-chat-1m,30,5903,5818,5853.917666666666,7.767732430462529
|
|
|
|
|
51 |
internlm2_5-20b-chat,30,5903,5818,5853.917666666666,7.767732430462529
|
52 |
+
Qwen2.5-0.5B-Instruct,30,6107,6016,6055.3386666666665,8.617118029244592
|
53 |
+
Qwen2.5-1.5B-Instruct,30,6107,6016,6055.3386666666665,8.617118029244592
|
54 |
+
Qwen2.5-3B-Instruct,30,6107,6016,6055.3386666666665,8.617118029244592
|
55 |
+
Qwen2.5-7B-Instruct,30,6107,6016,6055.3386666666665,8.617118029244592
|
56 |
+
Qwen2.5-72B-Instruct,30,6107,6016,6055.3386666666665,8.617118029244592
|
57 |
+
Llama3.1-8B-Chinese-Chat,40,10217,10077,10136.091,9.115687078710652
|
58 |
+
Llama3.1-70B-Chinese-Chat,40,10217,10077,10136.091,9.115687078710652
|
59 |
Mistral-7B-v0.3-Chinese-Chat,40,14152,13918,14023.354,15.567384660085061
|
60 |
internlm2_5-7b-chat,40,7709,7624,7659.917666666666,7.767732430462529
|
61 |
internlm2_5-7b-chat-1m,40,7709,7624,7659.917666666666,7.767732430462529
|
|
|
|
|
62 |
internlm2_5-20b-chat,40,7709,7624,7659.917666666666,7.767732430462529
|
63 |
+
Qwen2.5-0.5B-Instruct,40,8010,7919,7958.3386666666665,8.617118029244592
|
64 |
+
Qwen2.5-1.5B-Instruct,40,8010,7919,7958.3386666666665,8.617118029244592
|
65 |
+
Qwen2.5-3B-Instruct,40,8010,7919,7958.3386666666665,8.617118029244592
|
66 |
+
Qwen2.5-7B-Instruct,40,8010,7919,7958.3386666666665,8.617118029244592
|
67 |
+
Qwen2.5-72B-Instruct,40,8010,7919,7958.3386666666665,8.617118029244592
|
68 |
+
Llama3.1-8B-Chinese-Chat,50,12719,12579,12638.091,9.115687078710652
|
69 |
+
Llama3.1-70B-Chinese-Chat,50,12719,12579,12638.091,9.115687078710652
|
70 |
Mistral-7B-v0.3-Chinese-Chat,50,17588,17354,17459.354,15.567384660085061
|
71 |
internlm2_5-7b-chat,50,9561,9476,9511.917666666666,7.767732430462529
|
72 |
internlm2_5-7b-chat-1m,50,9561,9476,9511.917666666666,7.767732430462529
|
|
|
|
|
73 |
internlm2_5-20b-chat,50,9561,9476,9511.917666666666,7.767732430462529
|
74 |
+
Qwen2.5-0.5B-Instruct,50,9961,9870,9909.338666666667,8.617118029244592
|
75 |
+
Qwen2.5-1.5B-Instruct,50,9961,9870,9909.338666666667,8.617118029244592
|
76 |
+
Qwen2.5-3B-Instruct,50,9961,9870,9909.338666666667,8.617118029244592
|
77 |
+
Qwen2.5-7B-Instruct,50,9961,9870,9909.338666666667,8.617118029244592
|
78 |
+
Qwen2.5-72B-Instruct,50,9961,9870,9909.338666666667,8.617118029244592
|
79 |
gpt-4o,0,606,464,524.8063333333333,10.057594723695004
|
80 |
gpt-4o-mini,0,606,464,524.8063333333333,10.057594723695004
|
81 |
o1-preview,0,925,682,797.5953333333333,16.41724967580933
|
data/best_metrics.csv
CHANGED
@@ -1,14 +1,16 @@
|
|
1 |
index,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
1,
|
3 |
-
2,
|
4 |
-
3,
|
5 |
-
4,
|
6 |
-
5,
|
7 |
-
6,
|
8 |
-
7,
|
9 |
-
8,
|
10 |
-
9,
|
11 |
-
10,
|
12 |
-
11,
|
13 |
-
12,
|
14 |
-
13,
|
|
|
|
|
|
1 |
index,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
1,Llama3.1-8B (1.0-epoch),Llama3.1-8B (1.0-epoch),0.7853333333333333,0.8062405645226312,0.7853333333333333,0.7938991590982061,1.0
|
3 |
+
2,Llama3.1-70B (1.0-epoch),Llama3.1-70B (1.0-epoch),0.809,0.8282732906153989,0.809,0.8166997776775797,1.0
|
4 |
+
3,Mistral-7B (1.4-epoch),Mistral-7B (1.4-epoch),0.7616666666666667,0.789634957005121,0.7616666666666667,0.7721210086098353,1.0
|
5 |
+
4,InternLM2.5-7B (1.4-epoch),InternLM2.5-7B (1.4-epoch),0.762,0.8089123492151512,0.762,0.7753217972757948,1.0
|
6 |
+
5,InternLM2.5-7B-1M (0.8-epoch),InternLM2.5-7B-1M (0.8-epoch),0.8076666666666666,0.8048844422436796,0.8076666666666666,0.8049749805997191,1.0
|
7 |
+
6,InternLM2.5-20B (0.8-epoch),InternLM2.5-20B (0.8-epoch),0.8063333333333333,0.8207793607428686,0.8063333333333333,0.811239851005161,1.0
|
8 |
+
7,Qwen2.5-0.5B (1.4-epoch),Qwen2.5-0.5B (1.4-epoch),0.5903333333333334,0.6503049529377274,0.5903333333333334,0.6094397514027766,1.0
|
9 |
+
8,Qwen2.5-1.5B (1.4-epoch),Qwen2.5-1.5B (1.4-epoch),0.6493333333333333,0.7440287895607589,0.6493333333333333,0.6815314583590799,1.0
|
10 |
+
9,Qwen2.5-3B (1.4-epoch),Qwen2.5-3B (1.4-epoch),0.7326666666666667,0.7716369414239331,0.7326666666666667,0.7468182490858526,1.0
|
11 |
+
10,Qwen2.5-7B (1.0-epoch),Qwen2.5-7B (1.0-epoch),0.782,0.8023938029436536,0.782,0.7888740758699296,0.9993333333333333
|
12 |
+
11,Qwen2.5-72B (0.8-epoch),Qwen2.5-72B (0.8-epoch),0.8213333333333334,0.8447926258362122,0.8213333333333334,0.8299486611547571,1.0
|
13 |
+
12,gpt-4o-mini (0-shot),gpt-4o-mini (0-shot),0.7166666666666667,0.7800918028217227,0.7166666666666667,0.7260056154268697,1.0
|
14 |
+
13,gpt-4o (10-shot),gpt-4o (10-shot),0.8013333333333333,0.8246834383036209,0.8013333333333333,0.8098901724387172,0.9996666666666667
|
15 |
+
14,o1-mini (50-shot),o1-mini (50-shot),0.7536666666666667,0.7755130422727871,0.7536666666666667,0.7602241520634903,1.0
|
16 |
+
15,o1-preview (50-shot),o1-preview (50-shot),0.7576666666666667,0.7986597718440941,0.7576666666666667,0.7718331604189232,0.9996666666666667
|
data/best_results.csv
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
data/few-shots_metrics.csv
CHANGED
@@ -1,40 +1,91 @@
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0,
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
0,
|
10 |
-
|
11 |
-
10,
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
0,
|
17 |
-
5,
|
18 |
-
10,
|
19 |
-
20,
|
20 |
-
30,
|
21 |
-
40,
|
22 |
-
50,
|
23 |
-
0,o1-
|
24 |
-
5,o1-
|
25 |
-
10,o1-
|
26 |
-
20,o1-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0,gpt-4o-mini,gpt-4o-mini/shots-00,0.7166666666666667,0.7800918028217227,0.7166666666666667,0.7260056154268697,0.9916666666666668
|
3 |
+
5,gpt-4o-mini,gpt-4o-mini/shots-05,0.7203333333333334,0.7754800244789168,0.7203333333333334,0.718540502683781,0.9996666666666668
|
4 |
+
10,gpt-4o-mini,gpt-4o-mini/shots-10,0.6836666666666666,0.7701177891593667,0.6836666666666666,0.6932016303210964,0.9983333333333332
|
5 |
+
20,gpt-4o-mini,gpt-4o-mini/shots-20,0.6616666666666666,0.7627467933668375,0.6616666666666666,0.677372757519069,0.998
|
6 |
+
30,gpt-4o-mini,gpt-4o-mini/shots-30,0.6876666666666666,0.7663381611066244,0.6876666666666666,0.6896169854446027,0.999
|
7 |
+
40,gpt-4o-mini,gpt-4o-mini/shots-40,0.6903333333333334,0.7603850760051853,0.6903333333333334,0.688393665975117,0.9986666666666668
|
8 |
+
50,gpt-4o-mini,gpt-4o-mini/shots-50,0.7143333333333334,0.7654214682013311,0.7143333333333334,0.7056961582308003,0.9993333333333332
|
9 |
+
0,gpt-4o,gpt-4o/shots-00,0.792,0.8234582231232066,0.792,0.8022633746318892,0.066
|
10 |
+
5,gpt-4o,gpt-4o/shots-05,0.7973333333333333,0.8251066339666824,0.7973333333333333,0.8066429877716694,0.998
|
11 |
+
10,gpt-4o,gpt-4o/shots-10,0.8013333333333333,0.8246834383036209,0.8013333333333333,0.8098901724387172,0.9996666666666668
|
12 |
+
20,gpt-4o,gpt-4o/shots-20,0.79,0.822098231279132,0.79,0.8020290214439503,0.9993333333333332
|
13 |
+
30,gpt-4o,gpt-4o/shots-30,0.7946666666666666,0.8259436682564079,0.7946666666666666,0.8063113377291872,0.999
|
14 |
+
40,gpt-4o,gpt-4o/shots-40,0.7906666666666666,0.8242154446428003,0.7906666666666666,0.803356987717753,0.9973333333333332
|
15 |
+
50,gpt-4o,gpt-4o/shots-50,0.798,0.8274250231711487,0.798,0.8091066504350897,0.9993333333333332
|
16 |
+
0,o1-mini,o1-mini/shots-00,0.7133333333333334,0.78301872209321,0.7133333333333334,0.7402734333211688,0.999
|
17 |
+
5,o1-mini,o1-mini/shots-05,0.7313333333333333,0.7913577967036569,0.7313333333333333,0.7532525881890013,0.9966666666666668
|
18 |
+
10,o1-mini,o1-mini/shots-10,0.7283333333333334,0.7851844846890333,0.7283333333333334,0.7490987096521479,0.9943333333333332
|
19 |
+
20,o1-mini,o1-mini/shots-20,0.7373333333333333,0.7815727856803751,0.7373333333333333,0.7533353509620383,0.9946666666666668
|
20 |
+
30,o1-mini,o1-mini/shots-30,0.748,0.779168441371953,0.748,0.7583397172973073,0.9976666666666668
|
21 |
+
40,o1-mini,o1-mini/shots-40,0.7496666666666667,0.775765877349714,0.7496666666666667,0.757640226210139,0.9976666666666668
|
22 |
+
50,o1-mini,o1-mini/shots-50,0.7536666666666667,0.7755130422727871,0.7536666666666667,0.7602241520634903,0.9976666666666668
|
23 |
+
0,o1-preview,o1-preview/shots-00,0.725,0.7860443296236067,0.725,0.7471736898827371,0.998
|
24 |
+
5,o1-preview,o1-preview/shots-05,0.736,0.789169445854742,0.736,0.7557068489703724,0.979
|
25 |
+
10,o1-preview,o1-preview/shots-10,0.7513333333333333,0.7947574632958824,0.7513333333333333,0.7673707529850041,0.9873333333333332
|
26 |
+
20,o1-preview,o1-preview/shots-20,0.7483333333333333,0.790639591375103,0.7483333333333333,0.763324860719675,0.9853333333333332
|
27 |
+
30,o1-preview,o1-preview/shots-30,0.7513333333333333,0.792049804996314,0.7513333333333333,0.7654800949250774,0.984
|
28 |
+
40,o1-preview,o1-preview/shots-40,0.7526666666666667,0.795308022968859,0.7526666666666667,0.7672762517397222,0.984
|
29 |
+
50,o1-preview,o1-preview/shots-50,0.7576666666666667,0.7986597718440941,0.7576666666666667,0.7718331604189232,0.9816666666666668
|
30 |
+
0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-00,0.7343333333333333,0.7375752740091942,0.7343333333333333,0.7270283652909943,0.8033333333333333
|
31 |
+
5,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-05,0.7056666666666667,0.7508515184863084,0.7056666666666667,0.7230574380518462,0.9886666666666668
|
32 |
+
10,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-10,0.6736666666666666,0.7776004745989736,0.6736666666666666,0.7094104807112239,0.9623333333333334
|
33 |
+
20,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-20,0.767,0.764982587229615,0.767,0.7638473265780445,0.979
|
34 |
+
30,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-30,0.7713333333333333,0.7725685630276532,0.7713333333333333,0.7692692690410152,0.7326666666666667
|
35 |
+
40,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-40,0.6873333333333334,0.773294758147205,0.6873333333333334,0.7075877720686631,0.759
|
36 |
+
50,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-50,0.7176666666666667,0.7599215931134234,0.7176666666666667,0.7203550920641806,0.6623333333333333
|
37 |
+
0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-00,0.7646666666666667,0.7804609488644828,0.7646666666666667,0.7497548621711109,0.0096666666666666
|
38 |
+
5,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-05,0.754,0.7675695134276339,0.754,0.7530665717237273,0.79
|
39 |
+
10,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-10,0.756,0.7695738042762151,0.756,0.7563878737797524,0.8326666666666667
|
40 |
+
20,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-20,0.7406666666666667,0.7560876641054418,0.7406666666666667,0.7360011002310723,0.819
|
41 |
+
30,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-30,0.7603333333333333,0.7710641222872985,0.7603333333333333,0.7570501796584528,0.548
|
42 |
+
0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-00,0.6923333333333334,0.7009179792741449,0.6923333333333334,0.6605899639694456,0.0116666666666666
|
43 |
+
5,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-05,0.6546666666666666,0.7415422757067709,0.6546666666666666,0.684189810233595,0.142
|
44 |
+
10,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-10,0.612,0.7259976964524691,0.612,0.6501410678512595,0.1063333333333333
|
45 |
+
20,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-20,0.6336666666666667,0.7315100617022602,0.6336666666666667,0.6683245802083553,0.0826666666666666
|
46 |
+
30,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-30,0.665,0.7374233826761456,0.665,0.6872462947319797,0.07
|
47 |
+
0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-00,0.7063333333333334,0.7369785607161373,0.7063333333333334,0.6895815239121195,1.0
|
48 |
+
5,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-05,0.747,0.7433195768374967,0.747,0.7232456014841266,0.999
|
49 |
+
10,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-10,0.559,0.7306434812774306,0.559,0.6287391975839828,0.9883333333333332
|
50 |
+
20,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-20,0.6466666666666666,0.7143354332969056,0.6466666666666666,0.6738164117926014,0.9473333333333334
|
51 |
+
30,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-30,0.626,0.7223442225693745,0.626,0.6494216734706632,0.9403333333333334
|
52 |
+
40,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-40,0.64,0.7020393671564193,0.64,0.611996460461355,0.9813333333333332
|
53 |
+
50,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-50,0.6116666666666667,0.6808793455512054,0.6116666666666667,0.5502581431071487,0.9803333333333332
|
54 |
+
0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-00,0.4923333333333333,0.7570993062022159,0.4923333333333333,0.5279738886353613,0.9986666666666668
|
55 |
+
5,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-05,0.7753333333333333,0.7586378181445387,0.7753333333333333,0.7665405919258307,0.9453333333333334
|
56 |
+
10,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-10,0.654,0.7251381758855274,0.654,0.6681655588675279,0.8866666666666667
|
57 |
+
20,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-20,0.677,0.7296467412730754,0.677,0.6780570012166849,0.8213333333333334
|
58 |
+
30,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-30,0.68,0.7425906069240685,0.68,0.6837924261094331,0.8236666666666667
|
59 |
+
40,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-40,0.726,0.7533750344411337,0.726,0.7132456474026365,0.8336666666666667
|
60 |
+
50,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-50,0.7173333333333334,0.7471186719787132,0.7173333333333334,0.6980283743779222,0.8846666666666667
|
61 |
+
0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/shots-00,0.575,0.7745319004159336,0.575,0.6416875854199033,0.6726666666666666
|
62 |
+
0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-00,0.4383333333333333,0.5292917259914629,0.4383333333333333,0.4228687599248655,0.594
|
63 |
+
5,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-05,0.1796666666666666,0.475165738531098,0.1796666666666666,0.214144872117911,0.004
|
64 |
+
10,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-10,0.351,0.5084853117995367,0.351,0.3909783959403107,0.068
|
65 |
+
20,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-20,0.4336666666666666,0.513186330900278,0.4336666666666666,0.463747974034812,0.3726666666666666
|
66 |
+
30,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-30,0.39,0.5367753683204347,0.39,0.4299603249123421,0.0756666666666666
|
67 |
+
40,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-40,0.466,0.5400134144413437,0.466,0.495429756139619,0.324
|
68 |
+
50,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-50,0.496,0.5465409839032335,0.496,0.5069942984615308,0.2433333333333333
|
69 |
+
0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-00,0.2016666666666666,0.5269756683734005,0.2016666666666666,0.2406983532950438,0.9223333333333332
|
70 |
+
5,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-05,0.3933333333333333,0.578886379886985,0.3933333333333333,0.4355463694355869,0.8283333333333334
|
71 |
+
10,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-10,0.407,0.5820145311822223,0.407,0.459589777544246,0.9156666666666666
|
72 |
+
20,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-20,0.232,0.5282610881631451,0.232,0.3093707499897376,0.676
|
73 |
+
30,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-30,0.23,0.5479545947886839,0.23,0.3064381040560128,0.661
|
74 |
+
40,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-40,0.2923333333333333,0.5608411738006117,0.2923333333333333,0.3751714671158081,0.5206666666666667
|
75 |
+
50,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-50,0.29,0.5646814860840066,0.29,0.3688382652659246,0.4603333333333333
|
76 |
+
0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-00,0.5796666666666667,0.6966500240864278,0.5796666666666667,0.5506370828782681,1.0
|
77 |
+
5,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-05,0.639,0.7226431221398603,0.639,0.641568790114368,0.9973333333333332
|
78 |
+
10,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-10,0.625,0.7164154004131771,0.625,0.6402584852791593,0.995
|
79 |
+
20,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-20,0.4666666666666667,0.6987641430848737,0.4666666666666667,0.5265074036660548,0.9316666666666666
|
80 |
+
30,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-30,0.475,0.6880994914236809,0.475,0.5310948082593374,0.904
|
81 |
+
40,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-40,0.584,0.7065303262365236,0.584,0.6214992664375876,0.7173333333333334
|
82 |
+
50,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-50,0.6093333333333333,0.7120506480394511,0.6093333333333333,0.6451959368825358,0.574
|
83 |
+
0,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-00,0.644,0.7200261355300325,0.644,0.6101052277961244,1.0
|
84 |
+
5,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-05,0.6346666666666667,0.7653343185471776,0.6346666666666667,0.6219419633691871,0.998
|
85 |
+
10,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-10,0.678,0.7675951017673515,0.678,0.6790860659550377,0.9796666666666668
|
86 |
+
20,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-20,0.7353333333333333,0.7702034737275962,0.7353333333333333,0.7278047438569933,0.807
|
87 |
+
30,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-30,0.7646666666666667,0.7787918401418651,0.7646666666666667,0.7527649874769439,0.805
|
88 |
+
40,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-40,0.759,0.7736852689131295,0.759,0.7472252604775926,0.8546666666666667
|
89 |
+
50,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-50,0.7586666666666667,0.7640431634617543,0.7586666666666667,0.7414332963557551,0.7563333333333333
|
90 |
+
0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/shots-00,0.7956666666666666,0.8098073411161181,0.7956666666666666,0.7771317592221199,0.994
|
91 |
+
5,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/shots-05,0.819,0.8182324679666184,0.819,0.8095367865845521,0.9416666666666668
|
data/fine-tuning_metrics.csv
CHANGED
@@ -1,78 +1,122 @@
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0.0,
|
3 |
-
0.2,
|
4 |
-
0.4,
|
5 |
-
0.6,
|
6 |
-
0.8,
|
7 |
-
1.0,
|
8 |
-
1.2,
|
9 |
-
1.4,
|
10 |
-
1.6,
|
11 |
-
1.8,
|
12 |
-
2.0,
|
13 |
-
0.0,
|
14 |
-
0.2,
|
15 |
-
0.4,
|
16 |
-
0.6,
|
17 |
-
0.8,
|
18 |
-
1.0,
|
19 |
-
1.2,
|
20 |
-
1.4,
|
21 |
-
1.6,
|
22 |
-
1.8,
|
23 |
-
2.0,
|
24 |
-
0.0,
|
25 |
-
0.2,
|
26 |
-
0.4,
|
27 |
-
0.6,
|
28 |
-
0.8,
|
29 |
-
1.0,
|
30 |
-
1.2,
|
31 |
-
1.4,
|
32 |
-
1.6,
|
33 |
-
1.8,
|
34 |
-
2.0,
|
35 |
-
0.0,
|
36 |
-
0.2,
|
37 |
-
0.4,
|
38 |
-
0.6,
|
39 |
-
0.8,
|
40 |
-
1.0,
|
41 |
-
1.2,
|
42 |
-
1.4,
|
43 |
-
1.6,
|
44 |
-
1.8,
|
45 |
-
2.0,
|
46 |
-
0.0,
|
47 |
-
0.2,
|
48 |
-
0.4,
|
49 |
-
0.6,
|
50 |
-
0.8,
|
51 |
-
1.0,
|
52 |
-
1.2,
|
53 |
-
1.4,
|
54 |
-
1.6,
|
55 |
-
1.8,
|
56 |
-
2.0,
|
57 |
-
0.0,
|
58 |
-
0.2,
|
59 |
-
0.4,
|
60 |
-
0.6,
|
61 |
-
0.8,
|
62 |
-
1.0,
|
63 |
-
1.2,
|
64 |
-
1.4,
|
65 |
-
1.6,
|
66 |
-
1.8,
|
67 |
-
2.0,
|
68 |
-
0.0,
|
69 |
-
0.2,
|
70 |
-
0.4,
|
71 |
-
0.6,
|
72 |
-
0.8,
|
73 |
-
1.0,
|
74 |
-
1.2,
|
75 |
-
1.4,
|
76 |
-
1.6,
|
77 |
-
1.8,
|
78 |
-
2.0,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat_torch.float16_lf,0.7343333333333333,0.7375752740091942,0.7343333333333333,0.7270283652909943,0.8033333333333333
|
3 |
+
0.2,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-35_torch.float16_lf,0.717,0.7933072428707201,0.717,0.7447412977676989,1.0
|
4 |
+
0.4,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-70_torch.float16_lf,0.7226666666666667,0.7983383063141186,0.7226666666666667,0.7489397350174751,0.9993333333333332
|
5 |
+
0.6,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-105_torch.float16_lf,0.7083333333333334,0.7967030927405547,0.7083333333333334,0.738836849803633,1.0
|
6 |
+
0.8,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-140_torch.float16_lf,0.7773333333333333,0.805139129977305,0.7773333333333333,0.7882159693114585,1.0
|
7 |
+
1.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-175_torch.float16_lf,0.7853333333333333,0.8062405645226312,0.7853333333333333,0.7938991590982061,1.0
|
8 |
+
1.2,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-210_torch.float16_lf,0.7436666666666667,0.8148316221752646,0.7436666666666667,0.7689773286065246,1.0
|
9 |
+
1.4,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-245_torch.float16_lf,0.759,0.8080929326806991,0.759,0.7772842274293189,1.0
|
10 |
+
1.6,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-280_torch.float16_lf,0.745,0.8027959680086005,0.745,0.7666181725503965,1.0
|
11 |
+
1.8,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-315_torch.float16_lf,0.7303333333333333,0.806805925253305,0.7303333333333333,0.7580841794383364,1.0
|
12 |
+
2.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-350_torch.float16_lf,0.737,0.808786608325944,0.737,0.7629963845364953,1.0
|
13 |
+
0.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat_torch.bfloat16_4bit_lf,0.7646666666666667,0.7804609488644828,0.7646666666666667,0.7497548621711109,0.0096666666666666
|
14 |
+
0.2,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-35_torch.bfloat16_4bit_lf,0.784,0.8105343792887019,0.784,0.7931742141608462,0.9996666666666668
|
15 |
+
0.4,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-70_torch.bfloat16_4bit_lf,0.7426666666666667,0.8117033235947096,0.7426666666666667,0.7673825750808414,1.0
|
16 |
+
0.6,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-105_torch.bfloat16_4bit_lf,0.736,0.8227236574891071,0.736,0.7650739090144549,1.0
|
17 |
+
0.8,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-140_torch.bfloat16_4bit_lf,0.7686666666666667,0.8259659464402258,0.7686666666666667,0.7880870865039342,1.0
|
18 |
+
1.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-175_torch.bfloat16_4bit_lf,0.809,0.8282732906153989,0.809,0.8166997776775797,1.0
|
19 |
+
1.2,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-210_torch.bfloat16_4bit_lf,0.75,0.8287348768409003,0.75,0.7741734526674708,1.0
|
20 |
+
1.4,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-245_torch.bfloat16_4bit_lf,0.7703333333333333,0.8271894042316865,0.7703333333333333,0.7907617274354051,1.0
|
21 |
+
1.6,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-280_torch.bfloat16_4bit_lf,0.776,0.8315436250878178,0.776,0.7959870550088912,1.0
|
22 |
+
1.8,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-315_torch.bfloat16_4bit_lf,0.7733333333333333,0.8327336470976,0.7733333333333333,0.7947537193805649,1.0
|
23 |
+
2.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-350_torch.bfloat16_4bit_lf,0.7686666666666667,0.8329633784586954,0.7686666666666667,0.7914454794587963,1.0
|
24 |
+
0.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat_torch.float16_lf,0.6923333333333334,0.7009179792741449,0.6923333333333334,0.6605899639694456,0.0116666666666666
|
25 |
+
0.2,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-35_torch.float16_lf,0.706,0.7832545046834243,0.706,0.7323466131711432,1.0
|
26 |
+
0.4,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-70_torch.float16_lf,0.7476666666666667,0.7836120158306894,0.7476666666666667,0.7557791381509955,1.0
|
27 |
+
0.6,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-105_torch.float16_lf,0.6736666666666666,0.7908140272002406,0.6736666666666666,0.7129951145360993,1.0
|
28 |
+
0.8,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-140_torch.float16_lf,0.7293333333333333,0.788387677637057,0.7293333333333333,0.7494137469900564,1.0
|
29 |
+
1.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-175_torch.float16_lf,0.74,0.7833068129490098,0.74,0.7499935485741815,1.0
|
30 |
+
1.2,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-210_torch.float16_lf,0.7146666666666667,0.7890760288118991,0.7146666666666667,0.7411240160229633,1.0
|
31 |
+
1.4,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-245_torch.float16_lf,0.7616666666666667,0.789634957005121,0.7616666666666667,0.7721210086098353,1.0
|
32 |
+
1.6,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-280_torch.float16_lf,0.7296666666666667,0.7854982015370922,0.7296666666666667,0.7491267995936699,1.0
|
33 |
+
1.8,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-315_torch.float16_lf,0.7076666666666667,0.7877874532247918,0.7076666666666667,0.7346283562321456,1.0
|
34 |
+
2.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-350_torch.float16_lf,0.713,0.7895690867103055,0.713,0.739013227401175,1.0
|
35 |
+
0.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat_torch.bfloat16_lf,0.7063333333333334,0.7369785607161373,0.7063333333333334,0.6895815239121195,1.0
|
36 |
+
0.2,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-35_torch.bfloat16_lf,0.729,0.7861122408311365,0.729,0.7385163226667387,1.0
|
37 |
+
0.4,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-70_torch.bfloat16_lf,0.7336666666666667,0.7857703796539939,0.7336666666666667,0.7427841254119673,1.0
|
38 |
+
0.6,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-105_torch.bfloat16_lf,0.6876666666666666,0.8030976203819039,0.6876666666666666,0.7170750416800897,1.0
|
39 |
+
0.8,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-140_torch.bfloat16_lf,0.762,0.8063331692665241,0.762,0.7740172985498378,1.0
|
40 |
+
1.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-175_torch.bfloat16_lf,0.7416666666666667,0.812190204769964,0.7416666666666667,0.761129466343473,1.0
|
41 |
+
1.2,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-210_torch.bfloat16_lf,0.7443333333333333,0.8084922204218251,0.7443333333333333,0.7599422989743019,1.0
|
42 |
+
1.4,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-245_torch.bfloat16_lf,0.762,0.8089123492151512,0.762,0.7753217972757948,1.0
|
43 |
+
1.6,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-280_torch.bfloat16_lf,0.733,0.8092774765454144,0.733,0.7535080746086277,1.0
|
44 |
+
1.8,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-315_torch.bfloat16_lf,0.7156666666666667,0.814456776214162,0.7156666666666667,0.744622807072089,1.0
|
45 |
+
2.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-350_torch.bfloat16_lf,0.725,0.8148156790328904,0.725,0.7509650741005044,1.0
|
46 |
+
0.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m_torch.bfloat16_lf,0.4923333333333333,0.7570993062022159,0.4923333333333333,0.5279738886353613,0.9986666666666668
|
47 |
+
0.2,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-35_torch.bfloat16_lf,0.7843333333333333,0.7907732469871145,0.7843333333333333,0.7839137508042926,1.0
|
48 |
+
0.4,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-70_torch.bfloat16_lf,0.7876666666666666,0.7961110449860888,0.7876666666666666,0.790011839264191,1.0
|
49 |
+
0.6,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-105_torch.bfloat16_lf,0.74,0.818451985781803,0.74,0.7654385146358808,1.0
|
50 |
+
0.8,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-140_torch.bfloat16_lf,0.8076666666666666,0.8048844422436796,0.8076666666666666,0.8049749805997191,1.0
|
51 |
+
1.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-175_torch.bfloat16_lf,0.7796666666666666,0.8115925869684188,0.7796666666666666,0.7917308842405348,1.0
|
52 |
+
1.2,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-210_torch.bfloat16_lf,0.7816666666666666,0.8082575556171326,0.7816666666666666,0.7920155623671598,1.0
|
53 |
+
1.4,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-245_torch.bfloat16_lf,0.7736666666666666,0.8074649930391711,0.7736666666666666,0.7846002379939621,1.0
|
54 |
+
1.6,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-280_torch.bfloat16_lf,0.771,0.8124579857634519,0.771,0.7859698091956198,1.0
|
55 |
+
1.8,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-315_torch.bfloat16_lf,0.7646666666666667,0.8211516901334176,0.7646666666666667,0.7848541283802248,1.0
|
56 |
+
2.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-350_torch.bfloat16_lf,0.77,0.8144910397034413,0.77,0.7862970454955438,1.0
|
57 |
+
0.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat_torch.bfloat16_4bit_lf,0.575,0.7745319004159336,0.575,0.6416875854199033,0.6726666666666666
|
58 |
+
0.2,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-35_torch.bfloat16_4bit_lf,0.7723333333333333,0.8004877872664371,0.7723333333333333,0.7800315047324102,1.0
|
59 |
+
0.4,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-70_torch.bfloat16_4bit_lf,0.788,0.808878367860496,0.788,0.7952965901503556,1.0
|
60 |
+
0.6,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-105_torch.bfloat16_4bit_lf,0.7223333333333334,0.8101427633407874,0.7223333333333334,0.7527524454293278,1.0
|
61 |
+
0.8,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-140_torch.bfloat16_4bit_lf,0.8063333333333333,0.8207793607428686,0.8063333333333333,0.811239851005161,1.0
|
62 |
+
1.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-175_torch.bfloat16_4bit_lf,0.792,0.8244746715585061,0.792,0.8028680300441688,1.0
|
63 |
+
1.2,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-210_torch.bfloat16_4bit_lf,0.77,0.8305821984199763,0.77,0.7905012003721434,1.0
|
64 |
+
1.4,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-245_torch.bfloat16_4bit_lf,0.8033333333333333,0.8215999742478901,0.8033333333333333,0.8087445768968825,1.0
|
65 |
+
1.6,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-280_torch.bfloat16_4bit_lf,0.795,0.8261993807231882,0.795,0.805022820640186,1.0
|
66 |
+
1.8,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-315_torch.bfloat16_4bit_lf,0.779,0.8256828719565774,0.779,0.7946766547953676,1.0
|
67 |
+
2.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-350_torch.bfloat16_4bit_lf,0.7826666666666666,0.8284951420712369,0.7826666666666666,0.7978785507522372,1.0
|
68 |
+
0.0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct_torch.float16_lf,0.4383333333333333,0.5292917259914629,0.4383333333333333,0.4228687599248655,0.594
|
69 |
+
0.2,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-35_torch.float16_lf,0.5223333333333333,0.5704911830866488,0.5223333333333333,0.454387436259078,1.0
|
70 |
+
0.4,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-70_torch.float16_lf,0.542,0.6358012674347429,0.542,0.5272438410312219,1.0
|
71 |
+
0.6,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-105_torch.float16_lf,0.4463333333333333,0.6477441598024034,0.4463333333333333,0.4917457459702999,1.0
|
72 |
+
0.8,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-140_torch.float16_lf,0.5053333333333333,0.6438300456580985,0.5053333333333333,0.4995247505211914,1.0
|
73 |
+
1.0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-175_torch.float16_lf,0.558,0.6560369730369926,0.558,0.5632487818615118,1.0
|
74 |
+
1.2,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-210_torch.float16_lf,0.5453333333333333,0.6357935773889876,0.5453333333333333,0.5594242895140294,1.0
|
75 |
+
1.4,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-245_torch.float16_lf,0.5903333333333334,0.6503049529377274,0.5903333333333334,0.6094397514027766,1.0
|
76 |
+
1.6,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-280_torch.float16_lf,0.5286666666666666,0.6532851084098983,0.5286666666666666,0.5617239467523474,1.0
|
77 |
+
1.8,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-315_torch.float16_lf,0.5336666666666666,0.6607103736450911,0.5336666666666666,0.5622949959647037,1.0
|
78 |
+
2.0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-350_torch.float16_lf,0.5156666666666667,0.652809461208547,0.5156666666666667,0.549955024535151,1.0
|
79 |
+
0.0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct_torch.float16_lf,0.2016666666666666,0.5269756683734005,0.2016666666666666,0.2406983532950438,0.9223333333333332
|
80 |
+
0.2,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-35_torch.float16_lf,0.4853333333333333,0.654166887199198,0.4853333333333333,0.5381849571995003,0.9996666666666668
|
81 |
+
0.4,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-70_torch.float16_lf,0.573,0.7037737273232145,0.573,0.6131069400231612,0.9996666666666668
|
82 |
+
0.6,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-105_torch.float16_lf,0.539,0.7162869126454278,0.539,0.5961610389687657,1.0
|
83 |
+
0.8,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-140_torch.float16_lf,0.6443333333333333,0.7218750831357578,0.6443333333333333,0.6721473356905486,1.0
|
84 |
+
1.0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-175_torch.float16_lf,0.6296666666666667,0.7065049203038848,0.6296666666666667,0.6496809196018393,1.0
|
85 |
+
1.2,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-210_torch.float16_lf,0.5836666666666667,0.7222805944180548,0.5836666666666667,0.6314346830311218,1.0
|
86 |
+
1.4,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-245_torch.float16_lf,0.6493333333333333,0.7440287895607589,0.6493333333333333,0.6815314583590799,1.0
|
87 |
+
1.6,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-280_torch.float16_lf,0.6293333333333333,0.7332138067544355,0.6293333333333333,0.6634330572585689,1.0
|
88 |
+
1.8,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-315_torch.float16_lf,0.599,0.7297954686265763,0.599,0.6396292878324805,1.0
|
89 |
+
2.0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-350_torch.float16_lf,0.6056666666666667,0.7305580205770756,0.6056666666666667,0.6426785514786738,1.0
|
90 |
+
0.0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct_torch.float16_lf,0.5796666666666667,0.6966500240864278,0.5796666666666667,0.5506370828782681,1.0
|
91 |
+
0.2,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-35_torch.float16_lf,0.689,0.7450174119748659,0.689,0.709114466474576,0.9986666666666668
|
92 |
+
0.4,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-70_torch.float16_lf,0.6556666666666666,0.7590430811422313,0.6556666666666666,0.6934194398116857,1.0
|
93 |
+
0.6,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-105_torch.float16_lf,0.6963333333333334,0.7550938479315918,0.6963333333333334,0.71844324172961,1.0
|
94 |
+
0.8,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-140_torch.float16_lf,0.6853333333333333,0.7542524799326954,0.6853333333333333,0.7128732915785243,1.0
|
95 |
+
1.0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-175_torch.float16_lf,0.6846666666666666,0.7564071354272528,0.6846666666666666,0.7125676758538035,1.0
|
96 |
+
1.2,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-210_torch.float16_lf,0.6896666666666667,0.7690917466956201,0.6896666666666667,0.720231747443145,1.0
|
97 |
+
1.4,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-245_torch.float16_lf,0.7256666666666667,0.7753705482689578,0.7256666666666667,0.7440390153124937,1.0
|
98 |
+
1.6,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-280_torch.float16_lf,0.708,0.7659638403826392,0.708,0.7293997518219294,1.0
|
99 |
+
1.8,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-315_torch.float16_lf,0.7056666666666667,0.7717562122699148,0.7056666666666667,0.729817759784445,1.0
|
100 |
+
2.0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-350_torch.float16_lf,0.7003333333333334,0.7698824212888824,0.7003333333333334,0.726563613830647,1.0
|
101 |
+
0.0,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct_torch.bfloat16_lf,0.644,0.7200261355300325,0.644,0.6101052277961244,1.0
|
102 |
+
0.2,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-35_torch.bfloat16_lf,0.745,0.7643041174791825,0.745,0.7482828029872421,0.998
|
103 |
+
0.4,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-70_torch.bfloat16_lf,0.7446666666666667,0.7800215227839997,0.7446666666666667,0.7576550061479678,0.9996666666666668
|
104 |
+
0.6,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-105_torch.bfloat16_lf,0.7513333333333333,0.7996792149630704,0.7513333333333333,0.7693730206330721,0.9996666666666668
|
105 |
+
0.8,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-140_torch.bfloat16_lf,0.75,0.7923028105975739,0.75,0.7665531868559959,1.0
|
106 |
+
1.0,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-175_torch.bfloat16_lf,0.771,0.8005814962709542,0.771,0.7814602739241332,0.9993333333333332
|
107 |
+
1.2,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-210_torch.bfloat16_lf,0.7443333333333333,0.79978900243777,0.7443333333333333,0.7660506505481828,1.0
|
108 |
+
1.4,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-245_torch.bfloat16_lf,0.7486666666666667,0.7974562319123832,0.7486666666666667,0.7655275916268014,0.9993333333333332
|
109 |
+
1.6,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-280_torch.bfloat16_lf,0.7566666666666667,0.7939852407869384,0.7566666666666667,0.7689495073735431,0.9996666666666668
|
110 |
+
1.8,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-315_torch.bfloat16_lf,0.755,0.7940575522966016,0.755,0.7681326415137147,0.9993333333333332
|
111 |
+
2.0,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-350_torch.bfloat16_lf,0.756,0.7982464722401461,0.756,0.7704035278260453,0.9996666666666668
|
112 |
+
0.0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct_torch.bfloat16_4bit_lf,0.7956666666666666,0.8098073411161181,0.7956666666666666,0.7771317592221199,0.994
|
113 |
+
0.2,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-35_torch.bfloat16_4bit_lf,0.792,0.8180793658647517,0.792,0.80166512366027,1.0
|
114 |
+
0.4,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-70_torch.bfloat16_4bit_lf,0.7716666666666666,0.8199569804721152,0.7716666666666666,0.7895879011938259,1.0
|
115 |
+
0.6,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-105_torch.bfloat16_4bit_lf,0.798,0.8379062379534957,0.798,0.812148680520218,1.0
|
116 |
+
0.8,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-140_torch.bfloat16_4bit_lf,0.8213333333333334,0.8447926258362122,0.8213333333333334,0.8299486611547571,1.0
|
117 |
+
1.0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-175_torch.bfloat16_4bit_lf,0.7643333333333333,0.8235366724638146,0.7643333333333333,0.7858148913986999,1.0
|
118 |
+
1.2,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-210_torch.bfloat16_4bit_lf,0.7986666666666666,0.83233218480008,0.7986666666666666,0.8115886421806521,1.0
|
119 |
+
1.4,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-245_torch.bfloat16_4bit_lf,0.7923333333333333,0.8231874218285514,0.7923333333333333,0.803363661387202,1.0
|
120 |
+
1.6,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-280_torch.bfloat16_4bit_lf,0.7936666666666666,0.8268750473800219,0.7936666666666666,0.8057720333101867,1.0
|
121 |
+
1.8,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-315_torch.bfloat16_4bit_lf,0.801,0.830389411421043,0.801,0.8117656427717702,1.0
|
122 |
+
2.0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-350_torch.bfloat16_4bit_lf,0.795,0.8280696193638868,0.795,0.8068114730639832,1.0
|
data/internlm2_5-20b-chat_metrics.csv
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat_torch.bfloat16_4bit_lf,0.
|
3 |
-
0.2,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-35_torch.bfloat16_4bit_lf,0.
|
4 |
-
0.4,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-70_torch.bfloat16_4bit_lf,0.
|
5 |
-
0.6,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-105_torch.bfloat16_4bit_lf,0.
|
6 |
-
0.8,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-140_torch.bfloat16_4bit_lf,0.
|
7 |
-
1.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-175_torch.bfloat16_4bit_lf,0.
|
8 |
-
1.2,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-210_torch.bfloat16_4bit_lf,0.
|
9 |
-
1.4,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-245_torch.bfloat16_4bit_lf,0.
|
10 |
-
1.6,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-280_torch.bfloat16_4bit_lf,0.
|
11 |
-
1.8,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-315_torch.bfloat16_4bit_lf,0.
|
12 |
-
2.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-350_torch.bfloat16_4bit_lf,0.
|
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat_torch.bfloat16_4bit_lf,0.575,0.7745319004159336,0.575,0.6416875854199033,0.6726666666666666
|
3 |
+
0.2,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-35_torch.bfloat16_4bit_lf,0.7723333333333333,0.8004877872664371,0.7723333333333333,0.7800315047324102,1.0
|
4 |
+
0.4,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-70_torch.bfloat16_4bit_lf,0.788,0.808878367860496,0.788,0.7952965901503556,1.0
|
5 |
+
0.6,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-105_torch.bfloat16_4bit_lf,0.7223333333333334,0.8101427633407874,0.7223333333333334,0.7527524454293278,1.0
|
6 |
+
0.8,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-140_torch.bfloat16_4bit_lf,0.8063333333333333,0.8207793607428686,0.8063333333333333,0.811239851005161,1.0
|
7 |
+
1.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-175_torch.bfloat16_4bit_lf,0.792,0.8244746715585061,0.792,0.8028680300441688,1.0
|
8 |
+
1.2,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-210_torch.bfloat16_4bit_lf,0.77,0.8305821984199763,0.77,0.7905012003721434,1.0
|
9 |
+
1.4,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-245_torch.bfloat16_4bit_lf,0.8033333333333333,0.8215999742478901,0.8033333333333333,0.8087445768968825,1.0
|
10 |
+
1.6,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-280_torch.bfloat16_4bit_lf,0.795,0.8261993807231882,0.795,0.805022820640186,1.0
|
11 |
+
1.8,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-315_torch.bfloat16_4bit_lf,0.779,0.8256828719565774,0.779,0.7946766547953676,1.0
|
12 |
+
2.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-350_torch.bfloat16_4bit_lf,0.7826666666666666,0.8284951420712369,0.7826666666666666,0.7978785507522372,1.0
|
data/internlm2_5-20b-chat_shots_metrics.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/shots-00,0.
|
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/shots-00,0.575,0.7745319004159336,0.575,0.6416875854199033,0.6726666666666666
|
data/internlm2_5-7b-chat-1m_metrics.csv
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m_torch.bfloat16_lf,0.
|
3 |
-
0.2,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-35_torch.bfloat16_lf,0.7843333333333333,0.
|
4 |
-
0.4,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-70_torch.bfloat16_lf,0.
|
5 |
-
0.6,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-105_torch.bfloat16_lf,0.
|
6 |
-
0.8,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-140_torch.bfloat16_lf,0.
|
7 |
-
1.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-175_torch.bfloat16_lf,0.
|
8 |
-
1.2,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-210_torch.bfloat16_lf,0.
|
9 |
-
1.4,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-245_torch.bfloat16_lf,0.
|
10 |
-
1.6,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-280_torch.bfloat16_lf,0.
|
11 |
-
1.8,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-315_torch.bfloat16_lf,0.
|
12 |
-
2.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-350_torch.bfloat16_lf,0.
|
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m_torch.bfloat16_lf,0.49233333333333335,0.7570993062022159,0.49233333333333335,0.5279738886353613,0.9986666666666667
|
3 |
+
0.2,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-35_torch.bfloat16_lf,0.7843333333333333,0.7907732469871145,0.7843333333333333,0.7839137508042926,1.0
|
4 |
+
0.4,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-70_torch.bfloat16_lf,0.7876666666666666,0.7961110449860888,0.7876666666666666,0.790011839264191,1.0
|
5 |
+
0.6,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-105_torch.bfloat16_lf,0.74,0.818451985781803,0.74,0.7654385146358808,1.0
|
6 |
+
0.8,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-140_torch.bfloat16_lf,0.8076666666666666,0.8048844422436796,0.8076666666666666,0.8049749805997191,1.0
|
7 |
+
1.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-175_torch.bfloat16_lf,0.7796666666666666,0.8115925869684188,0.7796666666666666,0.7917308842405348,1.0
|
8 |
+
1.2,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-210_torch.bfloat16_lf,0.7816666666666666,0.8082575556171326,0.7816666666666666,0.7920155623671598,1.0
|
9 |
+
1.4,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-245_torch.bfloat16_lf,0.7736666666666666,0.8074649930391711,0.7736666666666666,0.7846002379939621,1.0
|
10 |
+
1.6,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-280_torch.bfloat16_lf,0.771,0.8124579857634519,0.771,0.7859698091956198,1.0
|
11 |
+
1.8,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-315_torch.bfloat16_lf,0.7646666666666667,0.8211516901334176,0.7646666666666667,0.7848541283802248,1.0
|
12 |
+
2.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-350_torch.bfloat16_lf,0.77,0.8144910397034413,0.77,0.7862970454955438,1.0
|
data/internlm2_5-7b-chat-1m_shots_metrics.csv
CHANGED
@@ -1,7 +1,8 @@
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-00,0.
|
3 |
-
5,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-05,0.
|
4 |
-
10,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-10,0.
|
5 |
-
20,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-20,0.
|
6 |
-
30,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-30,0.
|
7 |
-
40,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-40,0.
|
|
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-00,0.49233333333333335,0.7570993062022159,0.49233333333333335,0.5279738886353613,0.9986666666666667
|
3 |
+
5,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-05,0.7753333333333333,0.7586378181445387,0.7753333333333333,0.7665405919258307,0.9453333333333334
|
4 |
+
10,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-10,0.654,0.7251381758855274,0.654,0.6681655588675279,0.8866666666666667
|
5 |
+
20,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-20,0.677,0.7296467412730754,0.677,0.6780570012166849,0.8213333333333334
|
6 |
+
30,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-30,0.68,0.7425906069240685,0.68,0.6837924261094331,0.8236666666666667
|
7 |
+
40,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-40,0.726,0.7533750344411337,0.726,0.7132456474026365,0.8336666666666667
|
8 |
+
50,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-50,0.7173333333333334,0.7471186719787132,0.7173333333333334,0.6980283743779222,0.8846666666666667
|
data/internlm2_5-7b-chat_metrics.csv
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat_torch.bfloat16_lf,0.
|
3 |
-
0.2,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-35_torch.bfloat16_lf,0.
|
4 |
-
0.4,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-70_torch.bfloat16_lf,0.
|
5 |
-
0.6,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-105_torch.bfloat16_lf,0.
|
6 |
-
0.8,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-140_torch.bfloat16_lf,0.
|
7 |
-
1.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-175_torch.bfloat16_lf,0.
|
8 |
-
1.2,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-210_torch.bfloat16_lf,0.
|
9 |
-
1.4,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-245_torch.bfloat16_lf,0.
|
10 |
-
1.6,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-280_torch.bfloat16_lf,0.
|
11 |
-
1.8,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-315_torch.bfloat16_lf,0.
|
12 |
-
2.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-350_torch.bfloat16_lf,0.
|
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat_torch.bfloat16_lf,0.7063333333333334,0.7369785607161373,0.7063333333333334,0.6895815239121195,1.0
|
3 |
+
0.2,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-35_torch.bfloat16_lf,0.729,0.7861122408311365,0.729,0.7385163226667387,1.0
|
4 |
+
0.4,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-70_torch.bfloat16_lf,0.7336666666666667,0.7857703796539939,0.7336666666666667,0.7427841254119673,1.0
|
5 |
+
0.6,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-105_torch.bfloat16_lf,0.6876666666666666,0.8030976203819039,0.6876666666666666,0.7170750416800897,1.0
|
6 |
+
0.8,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-140_torch.bfloat16_lf,0.762,0.8063331692665241,0.762,0.7740172985498378,1.0
|
7 |
+
1.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-175_torch.bfloat16_lf,0.7416666666666667,0.812190204769964,0.7416666666666667,0.761129466343473,1.0
|
8 |
+
1.2,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-210_torch.bfloat16_lf,0.7443333333333333,0.8084922204218251,0.7443333333333333,0.7599422989743019,1.0
|
9 |
+
1.4,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-245_torch.bfloat16_lf,0.762,0.8089123492151512,0.762,0.7753217972757948,1.0
|
10 |
+
1.6,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-280_torch.bfloat16_lf,0.733,0.8092774765454144,0.733,0.7535080746086277,1.0
|
11 |
+
1.8,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-315_torch.bfloat16_lf,0.7156666666666667,0.814456776214162,0.7156666666666667,0.744622807072089,1.0
|
12 |
+
2.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-350_torch.bfloat16_lf,0.725,0.8148156790328904,0.725,0.7509650741005044,1.0
|
data/internlm2_5-7b-chat_shots_metrics.csv
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-00,0.
|
3 |
-
5,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-05,0.
|
4 |
-
10,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-10,0.
|
5 |
-
20,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-20,0.
|
6 |
-
30,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-30,0.
|
|
|
|
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-00,0.7063333333333334,0.7369785607161373,0.7063333333333334,0.6895815239121195,1.0
|
3 |
+
5,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-05,0.747,0.7433195768374967,0.747,0.7232456014841266,0.999
|
4 |
+
10,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-10,0.559,0.7306434812774306,0.559,0.6287391975839828,0.9883333333333333
|
5 |
+
20,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-20,0.6466666666666666,0.7143354332969056,0.6466666666666666,0.6738164117926014,0.9473333333333334
|
6 |
+
30,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-30,0.626,0.7223442225693745,0.626,0.6494216734706632,0.9403333333333334
|
7 |
+
40,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-40,0.64,0.7020393671564193,0.64,0.611996460461355,0.9813333333333333
|
8 |
+
50,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-50,0.6116666666666667,0.6808793455512054,0.6116666666666667,0.5502581431071487,0.9803333333333333
|
data/openai_metrics.csv
CHANGED
@@ -1,26 +1,29 @@
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0,gpt-4o-mini,gpt-4o-mini/shots-00,0.
|
3 |
-
0,gpt-4o,gpt-4o/shots-00,0.
|
4 |
-
0,o1-mini,o1-mini/shots-00,0.
|
5 |
-
0,o1-preview,o1-preview/shots-00,0.
|
6 |
-
5,gpt-4o-mini,gpt-4o-mini/shots-05,0.
|
7 |
-
5,gpt-4o,gpt-4o/shots-05,0.
|
8 |
-
5,o1-mini,o1-mini/shots-05,0.
|
9 |
-
5,o1-preview,o1-preview/shots-05,0.
|
10 |
-
10,gpt-4o-mini,gpt-4o-mini/shots-10,0.
|
11 |
-
10,gpt-4o,gpt-4o/shots-10,0.
|
12 |
-
10,o1-mini,o1-mini/shots-10,0.
|
13 |
-
10,o1-preview,o1-preview/shots-10,0.
|
14 |
-
20,gpt-4o-mini,gpt-4o-mini/shots-20,0.
|
15 |
-
20,gpt-4o,gpt-4o/shots-20,0.
|
16 |
-
20,o1-mini,o1-mini/shots-20,0.
|
17 |
-
20,o1-preview,o1-preview/shots-20,0.
|
18 |
-
30,gpt-4o-mini,gpt-4o-mini/shots-30,0.
|
19 |
-
30,gpt-4o,gpt-4o/shots-30,0.
|
20 |
-
30,o1-
|
21 |
-
|
22 |
-
40,gpt-4o,gpt-4o/shots-40,0.
|
23 |
-
40,
|
24 |
-
|
25 |
-
|
26 |
-
50,
|
|
|
|
|
|
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0,gpt-4o-mini,gpt-4o-mini/shots-00,0.7166666666666667,0.7800918028217227,0.7166666666666667,0.7260056154268697,0.9916666666666667
|
3 |
+
0,gpt-4o,gpt-4o/shots-00,0.792,0.8234582231232066,0.792,0.8022633746318892,0.066
|
4 |
+
0,o1-mini,o1-mini/shots-00,0.7133333333333334,0.78301872209321,0.7133333333333334,0.7402734333211688,0.999
|
5 |
+
0,o1-preview,o1-preview/shots-00,0.725,0.7860443296236067,0.725,0.7471736898827371,0.998
|
6 |
+
5,gpt-4o-mini,gpt-4o-mini/shots-05,0.7203333333333334,0.7754800244789168,0.7203333333333334,0.718540502683781,0.9996666666666667
|
7 |
+
5,gpt-4o,gpt-4o/shots-05,0.7973333333333333,0.8251066339666824,0.7973333333333333,0.8066429877716694,0.998
|
8 |
+
5,o1-mini,o1-mini/shots-05,0.7313333333333333,0.7913577967036569,0.7313333333333333,0.7532525881890013,0.9966666666666667
|
9 |
+
5,o1-preview,o1-preview/shots-05,0.736,0.789169445854742,0.736,0.7557068489703724,0.979
|
10 |
+
10,gpt-4o-mini,gpt-4o-mini/shots-10,0.6836666666666666,0.7701177891593667,0.6836666666666666,0.6932016303210964,0.9983333333333333
|
11 |
+
10,gpt-4o,gpt-4o/shots-10,0.8013333333333333,0.8246834383036209,0.8013333333333333,0.8098901724387172,0.9996666666666667
|
12 |
+
10,o1-mini,o1-mini/shots-10,0.7283333333333334,0.7851844846890333,0.7283333333333334,0.7490987096521479,0.9943333333333333
|
13 |
+
10,o1-preview,o1-preview/shots-10,0.7513333333333333,0.7947574632958824,0.7513333333333333,0.7673707529850041,0.9873333333333333
|
14 |
+
20,gpt-4o-mini,gpt-4o-mini/shots-20,0.6616666666666666,0.7627467933668375,0.6616666666666666,0.677372757519069,0.998
|
15 |
+
20,gpt-4o,gpt-4o/shots-20,0.79,0.822098231279132,0.79,0.8020290214439503,0.9993333333333333
|
16 |
+
20,o1-mini,o1-mini/shots-20,0.7373333333333333,0.7815727856803751,0.7373333333333333,0.7533353509620383,0.9946666666666667
|
17 |
+
20,o1-preview,o1-preview/shots-20,0.7483333333333333,0.790639591375103,0.7483333333333333,0.763324860719675,0.9853333333333333
|
18 |
+
30,gpt-4o-mini,gpt-4o-mini/shots-30,0.6876666666666666,0.7663381611066244,0.6876666666666666,0.6896169854446027,0.999
|
19 |
+
30,gpt-4o,gpt-4o/shots-30,0.7946666666666666,0.8259436682564079,0.7946666666666666,0.8063113377291872,0.999
|
20 |
+
30,o1-mini,o1-mini/shots-30,0.748,0.779168441371953,0.748,0.7583397172973073,0.9976666666666667
|
21 |
+
30,o1-preview,o1-preview/shots-30,0.7513333333333333,0.792049804996314,0.7513333333333333,0.7654800949250774,0.984
|
22 |
+
40,gpt-4o-mini,gpt-4o-mini/shots-40,0.6903333333333334,0.7603850760051853,0.6903333333333334,0.688393665975117,0.9986666666666667
|
23 |
+
40,gpt-4o,gpt-4o/shots-40,0.7906666666666666,0.8242154446428003,0.7906666666666666,0.803356987717753,0.9973333333333333
|
24 |
+
40,o1-mini,o1-mini/shots-40,0.7496666666666667,0.775765877349714,0.7496666666666667,0.757640226210139,0.9976666666666667
|
25 |
+
40,o1-preview,o1-preview/shots-40,0.7526666666666667,0.795308022968859,0.7526666666666667,0.7672762517397222,0.984
|
26 |
+
50,gpt-4o-mini,gpt-4o-mini/shots-50,0.7143333333333334,0.7654214682013311,0.7143333333333334,0.7056961582308003,0.9993333333333333
|
27 |
+
50,gpt-4o,gpt-4o/shots-50,0.798,0.8274250231711487,0.798,0.8091066504350897,0.9993333333333333
|
28 |
+
50,o1-mini,o1-mini/shots-50,0.7536666666666667,0.7755130422727871,0.7536666666666667,0.7602241520634903,0.9976666666666667
|
29 |
+
50,o1-preview,o1-preview/shots-50,0.7576666666666667,0.7986597718440941,0.7576666666666667,0.7718331604189232,0.9816666666666667
|
datasets/mgtv/train.csv
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6a7703f495679958e64d334d8c16ae22e30de50a7b3b6dfd98c9966dae731688
|
3 |
+
size 20697987
|
datasets/mgtv/val.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
llm_toolkit/logical_reasoning_utils.py
CHANGED
@@ -208,6 +208,9 @@ def extract_answer(text, debug=False):
|
|
208 |
return ""
|
209 |
|
210 |
def extract_answer_from_text(text, question):
|
|
|
|
|
|
|
211 |
labels = ['不是', '是', '不重要', '回答正确', '问法错误']
|
212 |
original_text = text
|
213 |
text = text.split("回答:")[-1]
|
@@ -302,7 +305,7 @@ def load_logical_reasoning_dataset(
|
|
302 |
):
|
303 |
postfix = "" if chinese_prompt else "_en"
|
304 |
train_data_file = data_path + f"/train{postfix}.csv"
|
305 |
-
test_data_file = data_path + f"/{test_data if test_data else '
|
306 |
|
307 |
print("loading train/test data files")
|
308 |
datasets = load_dataset(
|
@@ -424,7 +427,7 @@ def load_alpaca_data(data_path, using_p1=True, use_english_datasets=False):
|
|
424 |
return df_alpaca
|
425 |
|
426 |
|
427 |
-
def plot_value_counts(df, column_name, offset=0.1, title=None, preprocess_func=None):
|
428 |
# font_family = rcParams["font.family"]
|
429 |
# # Set the font to SimHei to support Chinese characters
|
430 |
# rcParams["font.family"] = "SimHei"
|
@@ -440,6 +443,8 @@ def plot_value_counts(df, column_name, offset=0.1, title=None, preprocess_func=N
|
|
440 |
plt.figure(figsize=(8, 4))
|
441 |
|
442 |
value_counts = df[column_name].value_counts()
|
|
|
|
|
443 |
value_counts = value_counts.rename(index=translation_dict)
|
444 |
value_counts.plot(kind="bar")
|
445 |
|
@@ -558,7 +563,7 @@ def plot_metrics(perf_df, model_name, variant="epoch", offset=0.01):
|
|
558 |
ax.set_ylabel("Accuracy and F1 Score")
|
559 |
|
560 |
ax.xaxis.set_major_locator(MultipleLocator(0.2 if variant == "epoch" else 5))
|
561 |
-
ax.set_title(f"Performance Analysis Across Checkpoints for the {model_name} Model")
|
562 |
|
563 |
# Rotate x labels
|
564 |
plt.xticks(rotation=0)
|
|
|
208 |
return ""
|
209 |
|
210 |
def extract_answer_from_text(text, question):
|
211 |
+
if True:
|
212 |
+
return extract_answer(text)
|
213 |
+
|
214 |
labels = ['不是', '是', '不重要', '回答正确', '问法错误']
|
215 |
original_text = text
|
216 |
text = text.split("回答:")[-1]
|
|
|
305 |
):
|
306 |
postfix = "" if chinese_prompt else "_en"
|
307 |
train_data_file = data_path + f"/train{postfix}.csv"
|
308 |
+
test_data_file = data_path + f"/{test_data if test_data else 'val'}{postfix}.csv"
|
309 |
|
310 |
print("loading train/test data files")
|
311 |
datasets = load_dataset(
|
|
|
427 |
return df_alpaca
|
428 |
|
429 |
|
430 |
+
def plot_value_counts(df, column_name, offset=0.1, title=None, preprocess_func=None, debug=False):
|
431 |
# font_family = rcParams["font.family"]
|
432 |
# # Set the font to SimHei to support Chinese characters
|
433 |
# rcParams["font.family"] = "SimHei"
|
|
|
443 |
plt.figure(figsize=(8, 4))
|
444 |
|
445 |
value_counts = df[column_name].value_counts()
|
446 |
+
if debug:
|
447 |
+
print(value_counts)
|
448 |
value_counts = value_counts.rename(index=translation_dict)
|
449 |
value_counts.plot(kind="bar")
|
450 |
|
|
|
563 |
ax.set_ylabel("Accuracy and F1 Score")
|
564 |
|
565 |
ax.xaxis.set_major_locator(MultipleLocator(0.2 if variant == "epoch" else 5))
|
566 |
+
ax.set_title(f"Performance Analysis Across {'Checkpoints' if variant == 'epoch' else 'Shots'} for the {model_name} Model")
|
567 |
|
568 |
# Rotate x labels
|
569 |
plt.xticks(rotation=0)
|
notebooks/00_Data Analysis.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/01a_internlm2_5-20b-chat_analysis.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/01a_internlm2_5-7b-chat-1m_analysis.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/01a_internlm2_5-7b-chat_analysis.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/01b_Mistral-7B-v0.3-Chinese-Chat_analysis.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/02a_Qwen2-7B-Instruct_analysis.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/02b_Qwen2-72B-Instruct_analysis.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/02e_Qwen2.5-1.5B-Instruct_analysis.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/02f_Qwen2.5-0.5B-Instruct_analysis.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/02g_Qwen2.5-72B-Instruct_analysis.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/03a_Llama3.1-8B-Chinese-Chat_analysis.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/03b_Llama3.1-70B-Chinese-Chat_analysis.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/04b_OpenAI-Models_analysis.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/06b_Open-Source-Models_analysis.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
scripts/eval-mgtv-qwen2.5_4bit.sh
CHANGED
@@ -14,11 +14,12 @@ lscpu
|
|
14 |
grep MemTotal /proc/meminfo
|
15 |
|
16 |
export USING_LLAMA_FACTORY=true
|
17 |
-
export LOAD_IN_4BIT=true
|
18 |
-
$BASEDIR/scripts/eval-epochs.sh Qwen Qwen2.5-72B-Instruct
|
19 |
|
20 |
-
export START_NUM_SHOTS=
|
|
|
21 |
$BASEDIR/scripts/eval-shots_4bit.sh Qwen Qwen2.5-72B-Instruct
|
22 |
|
23 |
-
export START_NUM_SHOTS=
|
24 |
-
$BASEDIR/scripts/eval-shots_4bit.sh
|
|
|
14 |
grep MemTotal /proc/meminfo
|
15 |
|
16 |
export USING_LLAMA_FACTORY=true
|
17 |
+
# export LOAD_IN_4BIT=true
|
18 |
+
# $BASEDIR/scripts/eval-epochs.sh Qwen Qwen2.5-72B-Instruct
|
19 |
|
20 |
+
export START_NUM_SHOTS=10
|
21 |
+
export END_NUM_SHOTS=20
|
22 |
$BASEDIR/scripts/eval-shots_4bit.sh Qwen Qwen2.5-72B-Instruct
|
23 |
|
24 |
+
export START_NUM_SHOTS=5
|
25 |
+
$BASEDIR/scripts/eval-shots_4bit.sh internlm internlm2_5-20b-chat
|
scripts/eval-mgtv.sh
CHANGED
@@ -1 +1 @@
|
|
1 |
-
eval-mgtv-qwen2.
|
|
|
1 |
+
eval-mgtv-qwen2.5_4bit.sh
|
scripts/eval-shots.sh
CHANGED
@@ -5,6 +5,8 @@ cd $BASEDIR/..
|
|
5 |
echo Current Directory:
|
6 |
pwd
|
7 |
|
|
|
|
|
8 |
export LOGICAL_REASONING_DATA_PATH=datasets/mgtv
|
9 |
export RESIZE_TOKEN_EMBEDDINGS=true
|
10 |
# export USING_LLAMA_FACTORY=true
|
@@ -25,10 +27,3 @@ fi
|
|
25 |
|
26 |
echo Evaluating $MODEL_NAME with few-shot learning
|
27 |
python llm_toolkit/eval_shots.py
|
28 |
-
|
29 |
-
if [ "$MODEL" == "internlm2_5-7b-chat-1m" ];
|
30 |
-
then
|
31 |
-
export LOAD_IN_4BIT=true
|
32 |
-
export START_NUM_SHOTS=5
|
33 |
-
$BASEDIR/scripts/eval-shots.sh internlm internlm2_5-20b-chat
|
34 |
-
fi
|
|
|
5 |
echo Current Directory:
|
6 |
pwd
|
7 |
|
8 |
+
BASEDIR=`pwd`
|
9 |
+
|
10 |
export LOGICAL_REASONING_DATA_PATH=datasets/mgtv
|
11 |
export RESIZE_TOKEN_EMBEDDINGS=true
|
12 |
# export USING_LLAMA_FACTORY=true
|
|
|
27 |
|
28 |
echo Evaluating $MODEL_NAME with few-shot learning
|
29 |
python llm_toolkit/eval_shots.py
|
|
|
|
|
|
|
|
|
|
|
|
|
|