dh-mc commited on
Commit
d5ab5d2
·
1 Parent(s): 06dfa32

ready for qwen2.5

Browse files
data/Llama3.1-70B-Chinese-Chat_shots_metrics.csv CHANGED
@@ -1,2 +1,5 @@
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
  0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-00,0.7636666666666667,0.7806653325131986,0.7636666666666667,0.7525813484548423,0.009666666666666667
 
 
 
 
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
  0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-00,0.7636666666666667,0.7806653325131986,0.7636666666666667,0.7525813484548423,0.009666666666666667
3
+ 5,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-05,0.7536666666666667,0.772126097633354,0.7536666666666667,0.7545029613768596,0.79
4
+ 10,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-10,0.754,0.7729477984842943,0.754,0.756682017266956,0.8326666666666667
5
+ 20,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-20,0.738,0.7566938786102072,0.738,0.7348961489952073,0.819
data/Mistral-7B-v0.3-Chinese-Chat_metrics.csv CHANGED
@@ -1,12 +1,12 @@
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
  0.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat_torch.float16_lf,0.6946666666666667,0.701136267898111,0.6946666666666667,0.6634078645357937,0.011666666666666667
3
- 0.2,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-105_torch.float16_lf,0.6596666666666666,0.7923396753604393,0.6596666666666666,0.7067542301676931,1.0
4
- 0.4,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-140_torch.float16_lf,0.7146666666666667,0.7861341885687435,0.7146666666666667,0.7404677278137267,1.0
5
- 0.6,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-175_torch.float16_lf,0.7326666666666667,0.7876867721932461,0.7326666666666667,0.7471869515031995,1.0
6
- 0.8,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-210_torch.float16_lf,0.7016666666666667,0.7903119228393193,0.7016666666666667,0.7348708822385348,1.0
7
- 1.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-245_torch.float16_lf,0.75,0.7885868317699068,0.75,0.7648234347578796,1.0
8
- 1.2,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-280_torch.float16_lf,0.7156666666666667,0.7846106674095725,0.7156666666666667,0.7410042005708856,1.0
9
- 1.4,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-315_torch.float16_lf,0.6916666666666667,0.7864256994491394,0.6916666666666667,0.7257499426487266,1.0
10
- 1.6,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-35_torch.float16_lf,0.702,0.7932731014186957,0.702,0.7342714734731689,1.0
11
- 1.8,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-350_torch.float16_lf,0.6976666666666667,0.7889443494370009,0.6976666666666667,0.7307996137659796,1.0
12
- 2.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-70_torch.float16_lf,0.742,0.78982949223512,0.742,0.7536681109811127,1.0
 
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
  0.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat_torch.float16_lf,0.6946666666666667,0.701136267898111,0.6946666666666667,0.6634078645357937,0.011666666666666667
3
+ 0.2,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-35_torch.float16_lf,0.702,0.7932731014186957,0.702,0.7342714734731689,1.0
4
+ 0.4,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-70_torch.float16_lf,0.742,0.78982949223512,0.742,0.7536681109811127,1.0
5
+ 0.6,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-105_torch.float16_lf,0.6596666666666666,0.7923396753604393,0.6596666666666666,0.7067542301676931,1.0
6
+ 0.8,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-140_torch.float16_lf,0.7146666666666667,0.7861341885687435,0.7146666666666667,0.7404677278137267,1.0
7
+ 1.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-175_torch.float16_lf,0.7326666666666667,0.7876867721932461,0.7326666666666667,0.7471869515031995,1.0
8
+ 1.2,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-210_torch.float16_lf,0.7016666666666667,0.7903119228393193,0.7016666666666667,0.7348708822385348,1.0
9
+ 1.4,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-245_torch.float16_lf,0.75,0.7885868317699068,0.75,0.7648234347578796,1.0
10
+ 1.6,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-280_torch.float16_lf,0.7156666666666667,0.7846106674095725,0.7156666666666667,0.7410042005708856,1.0
11
+ 1.8,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-315_torch.float16_lf,0.6916666666666667,0.7864256994491394,0.6916666666666667,0.7257499426487266,1.0
12
+ 2.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-350_torch.float16_lf,0.6976666666666667,0.7889443494370009,0.6976666666666667,0.7307996137659796,1.0
data/Mistral-7B-v0.3-Chinese-Chat_shots_metrics.csv CHANGED
@@ -1,3 +1,5 @@
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
  0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-00,0.6946666666666667,0.701136267898111,0.6946666666666667,0.6634078645357937,0.011666666666666667
 
3
  10,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-10,0.6036666666666667,0.7334913867282189,0.6036666666666667,0.6493185547247415,0.10633333333333334
 
 
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
  0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-00,0.6946666666666667,0.701136267898111,0.6946666666666667,0.6634078645357937,0.011666666666666667
3
+ 5,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-05,0.6446666666666667,0.7451807329096397,0.6446666666666667,0.681030628954011,0.142
4
  10,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-10,0.6036666666666667,0.7334913867282189,0.6036666666666667,0.6493185547247415,0.10633333333333334
5
+ 20,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-20,0.6276666666666667,0.7398894455389585,0.6276666666666667,0.6690543758928521,0.08266666666666667
data/Qwen2.5-3B-Instruct_metrics.csv ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0.0,Qwen2.5-3B-Instruct,qwen/Qwen2.5-3B-Instruct/checkpoint-35_torch.bfloat16_lf,0.7033333333333334,0.7493686353899274,0.7033333333333334,0.7196581245915875,1.0
3
+ 0.2,Qwen2.5-3B-Instruct,qwen/Qwen2.5-3B-Instruct/checkpoint-70_torch.bfloat16_lf,0.664,0.7490874767990094,0.664,0.6954540806463714,1.0
4
+ 0.4,Qwen2.5-3B-Instruct,qwen/Qwen2.5-3B-Instruct/checkpoint-88_torch.bfloat16_lf,0.6743333333333333,0.7591682267298503,0.6743333333333333,0.7069378240575964,1.0
data/Qwen2.5-3B-Instruct_shots_metrics.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-00,0.5796666666666667,0.6966500240864278,0.5796666666666667,0.5506370828782681,1.0
data/best_metrics.csv CHANGED
@@ -1,13 +1,14 @@
1
  index,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 1,internlm2_5-7b-chat,internlm2_5-7b-chat,0.7496666666666667,0.8041871978859686,0.7496666666666667,0.7660159670998776,1.0
3
- 2,internlm2_5-7b-chat-1m,internlm2_5-7b-chat-1m,0.803,0.8031411888150441,0.803,0.8028064320197301,1.0
4
- 3,Mistral-7B-v0.3-Chinese-Chat,Mistral-7B-v0.3-Chinese-Chat,0.75,0.7885868317699068,0.75,0.7648234347578796,1.0
5
- 4,Qwen2-7B-Instruct,Qwen2-7B-Instruct,0.759,0.8005303465799652,0.759,0.7748745026535183,1.0
6
- 5,Llama3.1-8B-Chinese-Chat,Llama3.1-8B-Chinese-Chat,0.78,0.810582723471486,0.78,0.7924651054056209,1.0
7
- 6,Llama3.1-70B-Chinese-Chat,Llama3.1-70B-Chinese-Chat,0.7963333333333333,0.8248972880055918,0.7963333333333333,0.8076868978089201,1.0
8
- 7,Qwen2-72B-Instruct,Qwen2-72B-Instruct,0.784,0.8354349234761956,0.784,0.804194683154365,1.0
9
- 8,Ensemble Model,Ensemble Model,0.8193333333333334,0.8407464756633664,0.8193333333333334,0.828054127213081,1.0
10
- 9,gpt-4o-mini (0-shot),gpt-4o-mini (0-shot),0.7176666666666667,0.785706730193659,0.7176666666666667,0.7296061848734905,1.0
11
- 10,o1-mini (10-shot),o1-mini (10-shot),0.725,0.7892485648334764,0.725,0.7485623974683336,1.0
12
- 11,gpt-4o (10-shot),gpt-4o (10-shot),0.7916666666666666,0.8227707658360168,0.7916666666666666,0.803614688453356,0.9996666666666667
13
- 12,o1-preview (10-shot),o1-preview (10-shot),0.749,0.7964482186234537,0.749,0.7677316493549238,1.0
 
 
1
  index,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 1,internlm2_5-7b-chat (0.8-epoch),internlm2_5-7b-chat (0.8-epoch),0.7496666666666667,0.8041871978859686,0.7496666666666667,0.7660159670998776,1.0
3
+ 2,internlm2_5-7b-chat-1m (0.8-epoch),internlm2_5-7b-chat-1m (0.8-epoch),0.803,0.8031411888150441,0.803,0.8028064320197301,1.0
4
+ 3,internlm2_5-20b-chat (0.8-epoch),internlm2_5-20b-chat (0.8-epoch),0.795,0.817457691710893,0.795,0.8027552955647029,1.0
5
+ 4,Qwen2-7B-Instruct (0.4-epoch),Qwen2-7B-Instruct (0.4-epoch),0.759,0.8005303465799652,0.759,0.7748745026535183,1.0
6
+ 5,Qwen2-72B-Instruct (1.8-epoch),Qwen2-72B-Instruct (1.8-epoch),0.784,0.8354349234761956,0.784,0.804194683154365,1.0
7
+ 6,Llama3.1-8B-Chinese-Chat (1.0-epoch),Llama3.1-8B-Chinese-Chat (1.0-epoch),0.78,0.810582723471486,0.78,0.7924651054056209,1.0
8
+ 7,Llama3.1-70B-Chinese-Chat (1.0-epoch),Llama3.1-70B-Chinese-Chat (1.0-epoch),0.7963333333333333,0.8248972880055918,0.7963333333333333,0.8076868978089201,1.0
9
+ 8,gpt-4o-mini (0-shot),gpt-4o-mini (0-shot),0.7176666666666667,0.785706730193659,0.7176666666666667,0.7296061848734905,1.0
10
+ 9,o1-mini (20-shot),o1-mini (20-shot),0.7343333333333333,0.786101455887261,0.7343333333333333,0.7535300565051624,0.999
11
+ 10,gpt-4o (10-shot),gpt-4o (10-shot),0.7916666666666666,0.8227707658360168,0.7916666666666666,0.803614688453356,0.9996666666666667
12
+ 11,o1-preview (50-shot),o1-preview (50-shot),0.7546666666666667,0.7979981023789272,0.7546666666666667,0.7708181822112403,0.9996666666666667
13
+ 12,Ensemble Model (Open Source),Ensemble Model (Open Source),0.8193333333333334,0.8407464756633664,0.8193333333333334,0.828054127213081,1.0
14
+ 13,Ensemble Model (OpenAI),Ensemble Model (OpenAI),0.7986666666666666,0.8223071972084313,0.7986666666666666,0.8080230503376233,1.0
data/best_results.csv CHANGED
The diff for this file is too large to render. See raw diff
 
data/few-shots_metrics.csv ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-00,0.705,0.7398041613378253,0.705,0.6906357423169466,1.0
3
+ 10,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-10,0.5533333333333333,0.7301739373336078,0.5533333333333333,0.625097481985829,0.9883333333333332
4
+ 0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-00,0.4813333333333333,0.7605248207587668,0.4813333333333333,0.5244515621126862,0.9986666666666668
5
+ 10,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-10,0.6473333333333333,0.7282065610714444,0.6473333333333333,0.665824871588245,0.8866666666666667
6
+ 0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/shots-00,0.564,0.7745256693833624,0.564,0.6352190975436365,0.6726666666666666
7
+ 0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/shots-00,0.683,0.7493103872717293,0.683,0.710140098232232,0.9996666666666668
8
+ 10,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/shots-10,0.5646666666666667,0.7391197908117386,0.5646666666666667,0.6064049121095652,0.9896666666666668
9
+ 0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct_torch/shots-00,0.7516666666666667,0.7949378981748352,0.7516666666666667,0.7572499605227642,0.9773333333333334
10
+ 0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-00,0.742,0.7477056799746837,0.742,0.7371050181385632,0.8033333333333333
11
+ 10,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-10,0.6676666666666666,0.7834080522821993,0.6676666666666666,0.7082605860921491,0.9623333333333334
12
+ 0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-00,0.7636666666666667,0.7806653325131986,0.7636666666666667,0.7525813484548423,0.0096666666666666
13
+ 5,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-05,0.7536666666666667,0.772126097633354,0.7536666666666667,0.7545029613768596,0.79
14
+ 10,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-10,0.754,0.7729477984842943,0.754,0.756682017266956,0.8326666666666667
15
+ 20,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-20,0.738,0.7566938786102072,0.738,0.7348961489952073,0.819
16
+ 0,gpt-4o-mini,gpt-4o-mini/shots-00,0.7176666666666667,0.785706730193659,0.7176666666666667,0.7296061848734905,0.9916666666666668
17
+ 5,gpt-4o-mini,gpt-4o-mini/shots-05,0.7176666666666667,0.7767294185987051,0.7176666666666667,0.7181068311028772,0.9996666666666668
18
+ 10,gpt-4o-mini,gpt-4o-mini/shots-10,0.6793333333333333,0.7728086050218999,0.6793333333333333,0.6916749681933937,0.9983333333333332
19
+ 20,gpt-4o-mini,gpt-4o-mini/shots-20,0.6623333333333333,0.7686706009175459,0.6623333333333333,0.6798015109939115,0.998
20
+ 30,gpt-4o-mini,gpt-4o-mini/shots-30,0.6873333333333334,0.7684209723431035,0.6873333333333334,0.6913018667081989,0.999
21
+ 40,gpt-4o-mini,gpt-4o-mini/shots-40,0.6923333333333334,0.7639874967862498,0.6923333333333334,0.6924934068935911,0.9986666666666668
22
+ 50,gpt-4o-mini,gpt-4o-mini/shots-50,0.717,0.7692638634416518,0.717,0.7105227254860433,0.9993333333333332
23
+ 0,o1-mini,o1-mini/shots-00,0.7083333333333334,0.7848098266888749,0.7083333333333334,0.7377068425566796,0.999
24
+ 5,o1-mini,o1-mini/shots-05,0.724,0.7905045610386181,0.724,0.7482963122126776,0.9966666666666668
25
+ 10,o1-mini,o1-mini/shots-10,0.725,0.7892485648334764,0.725,0.7485623974683336,0.9943333333333332
26
+ 20,o1-mini,o1-mini/shots-20,0.7343333333333333,0.786101455887261,0.7343333333333333,0.7535300565051624,0.9946666666666668
27
+ 0,gpt-4o,gpt-4o/shots-00,0.782,0.8204048322982596,0.782,0.7953019682198627,0.066
28
+ 5,gpt-4o,gpt-4o/shots-05,0.7873333333333333,0.8230974205170392,0.7873333333333333,0.8000290527498529,0.998
29
+ 10,gpt-4o,gpt-4o/shots-10,0.7916666666666666,0.8227707658360168,0.7916666666666666,0.803614688453356,0.9996666666666668
30
+ 20,gpt-4o,gpt-4o/shots-20,0.7816666666666666,0.8204541793856629,0.7816666666666666,0.7967017169880498,0.9993333333333332
31
+ 30,gpt-4o,gpt-4o/shots-30,0.7886666666666666,0.8260847852316618,0.7886666666666666,0.8030949295928699,0.999
32
+ 40,gpt-4o,gpt-4o/shots-40,0.784,0.8233509309291644,0.784,0.7993336791122846,0.9973333333333332
33
+ 50,gpt-4o,gpt-4o/shots-50,0.787,0.8234800466218334,0.787,0.8013530974301947,0.9993333333333332
34
+ 0,o1-preview,o1-preview/shots-00,0.721,0.7849371317342158,0.721,0.7451207069815194,0.998
35
+ 5,o1-preview,o1-preview/shots-05,0.7313333333333333,0.7878283093765627,0.7313333333333333,0.7535489719321234,0.979
36
+ 10,o1-preview,o1-preview/shots-10,0.749,0.7964482186234537,0.749,0.7677316493549238,0.9873333333333332
37
+ 20,o1-preview,o1-preview/shots-20,0.7443333333333333,0.7911442834260676,0.7443333333333333,0.7625144090816939,0.9853333333333332
38
+ 30,o1-preview,o1-preview/shots-30,0.7473333333333333,0.7920604378746952,0.7473333333333333,0.7643977099599287,0.984
39
+ 40,o1-preview,o1-preview/shots-40,0.7506666666666667,0.7964679024468982,0.7506666666666667,0.7674109766459014,0.984
40
+ 50,o1-preview,o1-preview/shots-50,0.7546666666666667,0.7979981023789272,0.7546666666666667,0.7708181822112403,0.9816666666666668
data/fine-tuning_metrics.csv ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat_torch.bfloat16_lf,0.705,0.7398041613378253,0.705,0.6906357423169466,1.0
3
+ 0.2,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-35_torch.bfloat16_lf,0.7193333333333334,0.7863486093365692,0.7193333333333334,0.7330498811142795,1.0
4
+ 0.4,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-70_torch.bfloat16_lf,0.726,0.7900250828103491,0.726,0.7396583495246526,1.0
5
+ 0.6,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-105_torch.bfloat16_lf,0.6736666666666666,0.8044565554629858,0.6736666666666666,0.7104123104529902,1.0
6
+ 0.8,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-140_torch.bfloat16_lf,0.7496666666666667,0.8041871978859686,0.7496666666666667,0.7660159670998776,1.0
7
+ 1.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-175_torch.bfloat16_lf,0.726,0.8094634420846424,0.726,0.751394838822856,1.0
8
+ 1.2,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-210_torch.bfloat16_lf,0.7276666666666667,0.8039673699820601,0.7276666666666667,0.7488653386949028,1.0
9
+ 1.4,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-245_torch.bfloat16_lf,0.747,0.8055537753403307,0.747,0.76527383722639,1.0
10
+ 1.6,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-280_torch.bfloat16_lf,0.7166666666666667,0.8059535682746547,0.7166666666666667,0.7432427946178835,1.0
11
+ 1.8,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-315_torch.bfloat16_lf,0.6983333333333334,0.8119110469658597,0.6983333333333334,0.7347246872892312,1.0
12
+ 2.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-350_torch.bfloat16_lf,0.7076666666666667,0.8120132783051135,0.7076666666666667,0.7408145046817652,1.0
13
+ 0.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m_torch.bfloat16_lf,0.4813333333333333,0.7605248207587668,0.4813333333333333,0.5244515621126862,0.9986666666666668
14
+ 0.2,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-35_torch.bfloat16_lf,0.7843333333333333,0.7977648302848388,0.7843333333333333,0.7864944570659659,1.0
15
+ 0.4,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-70_torch.bfloat16_lf,0.7836666666666666,0.7996977262947886,0.7836666666666666,0.7886881726841081,1.0
16
+ 0.6,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-105_torch.bfloat16_lf,0.7243333333333334,0.8171172705912051,0.7243333333333334,0.7565804830382912,1.0
17
+ 0.8,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-140_torch.bfloat16_lf,0.803,0.8031411888150441,0.803,0.8028064320197301,1.0
18
+ 1.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-175_torch.bfloat16_lf,0.7676666666666667,0.8108441731715863,0.7676666666666667,0.7843187816704813,1.0
19
+ 1.2,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-210_torch.bfloat16_lf,0.7736666666666666,0.8091671780923799,0.7736666666666666,0.7876874850235454,1.0
20
+ 1.4,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-245_torch.bfloat16_lf,0.7623333333333333,0.8062291602218205,0.7623333333333333,0.777669094563925,1.0
21
+ 1.6,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-280_torch.bfloat16_lf,0.7553333333333333,0.8086197936829652,0.7553333333333333,0.7755588811428297,1.0
22
+ 1.8,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-315_torch.bfloat16_lf,0.748,0.8171996792797457,0.748,0.773990849396903,1.0
23
+ 2.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-350_torch.bfloat16_lf,0.756,0.8126875394266148,0.756,0.7777812522863184,1.0
24
+ 0.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat_torch.bfloat16_4bit_lf,0.564,0.7745256693833624,0.564,0.6352190975436365,0.6726666666666666
25
+ 0.2,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-35_torch.bfloat16_4bit_lf,0.7576666666666667,0.7960640143421251,0.7576666666666667,0.769346697622254,1.0
26
+ 0.4,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-70_torch.bfloat16_4bit_lf,0.7743333333333333,0.8042791719587958,0.7743333333333333,0.7849233169481004,1.0
27
+ 0.6,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-105_torch.bfloat16_4bit_lf,0.7053333333333334,0.8070587351344375,0.7053333333333334,0.7421985241641746,1.0
28
+ 0.8,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-140_torch.bfloat16_4bit_lf,0.795,0.817457691710893,0.795,0.8027552955647029,1.0
29
+ 1.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-175_torch.bfloat16_4bit_lf,0.7786666666666666,0.8220512342362645,0.7786666666666666,0.7938353741035283,1.0
30
+ 1.2,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-210_torch.bfloat16_4bit_lf,0.7516666666666667,0.8264680853251051,0.7516666666666667,0.7787088167337303,1.0
31
+ 1.4,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-245_torch.bfloat16_4bit_lf,0.7876666666666666,0.8154190698395475,0.7876666666666666,0.7965399224841393,1.0
32
+ 1.6,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-280_torch.bfloat16_4bit_lf,0.7753333333333333,0.8181125383376948,0.7753333333333333,0.7899794199099057,1.0
33
+ 1.8,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-315_torch.bfloat16_4bit_lf,0.7583333333333333,0.8179523170315577,0.7583333333333333,0.7795358413482081,1.0
34
+ 2.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-350_torch.bfloat16_4bit_lf,0.7616666666666667,0.8208475549648238,0.7616666666666667,0.7826736174247095,1.0
35
+ 0.0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct_torch.float16_lf,0.683,0.7493103872717293,0.683,0.710140098232232,0.9996666666666668
36
+ 0.2,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-35_torch.float16_lf,0.725,0.7840171468707405,0.725,0.748994536667058,0.9996666666666668
37
+ 0.4,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-70_torch.float16_lf,0.759,0.8005303465799652,0.759,0.7748745026535183,1.0
38
+ 0.6,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-105_torch.float16_lf,0.6926666666666667,0.8039176975550218,0.6926666666666667,0.7332481528585848,1.0
39
+ 0.8,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-140_torch.float16_lf,0.725,0.7952719247171957,0.725,0.7476238017654298,1.0
40
+ 1.0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-175_torch.float16_lf,0.6756666666666666,0.7810148934939715,0.6756666666666666,0.708653993277772,1.0
41
+ 1.2,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-210_torch.float16_lf,0.7013333333333334,0.7969562600853992,0.7013333333333334,0.7362679665494508,1.0
42
+ 1.4,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-245_torch.float16_lf,0.7326666666666667,0.7922538479314682,0.7326666666666667,0.755402136631717,0.9996666666666668
43
+ 1.6,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-280_torch.float16_lf,0.6983333333333334,0.785127298428753,0.6983333333333334,0.7292251109166867,1.0
44
+ 1.8,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-315_torch.float16_lf,0.6783333333333333,0.785390767631834,0.6783333333333333,0.7164131321837346,1.0
45
+ 2.0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-350_torch.float16_lf,0.689,0.7929715746898984,0.689,0.7259993126510194,1.0
46
+ 0.0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct_torch.bfloat16_4bit_lf,0.7516666666666667,0.7949378981748352,0.7516666666666667,0.7572499605227642,0.9773333333333334
47
+ 0.2,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-35_torch.bfloat16_4bit_lf,0.7583333333333333,0.8199928526815756,0.7583333333333333,0.782751089787442,1.0
48
+ 0.4,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-70_torch.bfloat16_4bit_lf,0.7366666666666667,0.8224865755517643,0.7366666666666667,0.7700627366337021,1.0
49
+ 0.6,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-105_torch.bfloat16_4bit_lf,0.757,0.8253824826209251,0.757,0.784000409833628,1.0
50
+ 0.8,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-140_torch.bfloat16_4bit_lf,0.7893333333333333,0.8229104753645825,0.7893333333333333,0.8033124955993173,1.0
51
+ 1.0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-175_torch.bfloat16_4bit_lf,0.7376666666666667,0.8243654864769323,0.7376666666666667,0.7699617360961548,1.0
52
+ 1.2,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-210_torch.bfloat16_4bit_lf,0.763,0.8318882808702871,0.763,0.7901075708186186,1.0
53
+ 1.4,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-245_torch.bfloat16_4bit_lf,0.7656666666666667,0.8288272203240518,0.7656666666666667,0.790627109330698,1.0
54
+ 1.6,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-280_torch.bfloat16_4bit_lf,0.7693333333333333,0.8292798021666021,0.7693333333333333,0.7930169589012503,1.0
55
+ 1.8,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-315_torch.bfloat16_4bit_lf,0.784,0.8354349234761956,0.784,0.804194683154365,1.0
56
+ 2.0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-350_torch.bfloat16_4bit_lf,0.7736666666666666,0.8330147983140184,0.7736666666666666,0.7973657072550873,1.0
57
+ 0.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat_torch.float16_lf,0.742,0.7477056799746837,0.742,0.7371050181385632,0.8033333333333333
58
+ 0.2,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-35_torch.float16_lf,0.709,0.7987219597893886,0.709,0.7427961200958145,1.0
59
+ 0.4,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-70_torch.float16_lf,0.7163333333333334,0.8058657875960304,0.7163333333333334,0.7487811196109319,0.9993333333333332
60
+ 0.6,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-105_torch.float16_lf,0.6996666666666667,0.802722482275839,0.6996666666666667,0.7370938556711591,1.0
61
+ 0.8,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-140_torch.float16_lf,0.7716666666666666,0.8092193821623755,0.7716666666666666,0.7864287269398251,1.0
62
+ 1.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-175_torch.float16_lf,0.78,0.810582723471486,0.78,0.7924651054056209,1.0
63
+ 1.2,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-210_torch.float16_lf,0.7313333333333333,0.8157783263996798,0.7313333333333333,0.7628807622782868,1.0
64
+ 1.4,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-245_torch.float16_lf,0.751,0.8125856808988221,0.751,0.7745416635653988,1.0
65
+ 1.6,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-280_torch.float16_lf,0.739,0.8097375095673094,0.739,0.7662329023371559,1.0
66
+ 1.8,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-315_torch.float16_lf,0.7236666666666667,0.8145530585912838,0.7236666666666667,0.7580428816095297,1.0
67
+ 2.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-350_torch.float16_lf,0.7293333333333333,0.8151184301713545,0.7293333333333333,0.7616699266814145,1.0
68
+ 0.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat_torch.bfloat16_4bit_lf,0.7636666666666667,0.7806653325131986,0.7636666666666667,0.7525813484548423,0.0096666666666666
69
+ 0.2,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-35_torch.bfloat16_4bit_lf,0.778,0.8148707737020212,0.778,0.7910805488003003,0.9996666666666668
70
+ 0.4,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-70_torch.bfloat16_4bit_lf,0.7306666666666667,0.8145782271710159,0.7306666666666667,0.7624724104697406,1.0
71
+ 0.6,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-105_torch.bfloat16_4bit_lf,0.7193333333333334,0.8213567226911125,0.7193333333333334,0.7560702640626931,1.0
72
+ 0.8,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-140_torch.bfloat16_4bit_lf,0.7563333333333333,0.826789897753756,0.7563333333333333,0.7815164366677209,1.0
73
+ 1.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-175_torch.bfloat16_4bit_lf,0.7963333333333333,0.8248972880055918,0.7963333333333333,0.8076868978089201,1.0
74
+ 1.2,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-210_torch.bfloat16_4bit_lf,0.7326666666666667,0.8265345821998035,0.7326666666666667,0.7644418492070342,1.0
75
+ 1.4,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-245_torch.bfloat16_4bit_lf,0.7556666666666667,0.8258994609525315,0.7556666666666667,0.7820405339757727,1.0
76
+ 1.6,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-280_torch.bfloat16_4bit_lf,0.757,0.8264461657684251,0.757,0.7834496144681513,1.0
77
+ 1.8,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-315_torch.bfloat16_4bit_lf,0.7546666666666667,0.8277723752096544,0.7546666666666667,0.7823584779069335,1.0
78
+ 2.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-350_torch.bfloat16_4bit_lf,0.7496666666666667,0.8282310230333227,0.7496666666666667,0.7791947625361637,1.0
data/internlm2_5-20b-chat_metrics.csv CHANGED
@@ -1,12 +1,12 @@
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
  0.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat_torch.bfloat16_4bit_lf,0.564,0.7745256693833624,0.564,0.6352190975436365,0.6726666666666666
3
- 0.2,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-105_torch.bfloat16_4bit_lf,0.7053333333333334,0.8070587351344375,0.7053333333333334,0.7421985241641746,1.0
4
- 0.4,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-140_torch.bfloat16_4bit_lf,0.795,0.817457691710893,0.795,0.8027552955647029,1.0
5
- 0.6,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-175_torch.bfloat16_4bit_lf,0.7786666666666666,0.8220512342362645,0.7786666666666666,0.7938353741035283,1.0
6
- 0.8,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-210_torch.bfloat16_4bit_lf,0.7516666666666667,0.8264680853251051,0.7516666666666667,0.7787088167337303,1.0
7
- 1.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-245_torch.bfloat16_4bit_lf,0.7876666666666666,0.8154190698395475,0.7876666666666666,0.7965399224841393,1.0
8
- 1.2,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-280_torch.bfloat16_4bit_lf,0.7753333333333333,0.8181125383376948,0.7753333333333333,0.7899794199099057,1.0
9
- 1.4,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-315_torch.bfloat16_4bit_lf,0.7583333333333333,0.8179523170315577,0.7583333333333333,0.7795358413482081,1.0
10
- 1.6,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-35_torch.bfloat16_4bit_lf,0.7576666666666667,0.7960640143421251,0.7576666666666667,0.769346697622254,1.0
11
- 1.8,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-350_torch.bfloat16_4bit_lf,0.7616666666666667,0.8208475549648238,0.7616666666666667,0.7826736174247095,1.0
12
- 2.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-70_torch.bfloat16_4bit_lf,0.7743333333333333,0.8042791719587958,0.7743333333333333,0.7849233169481004,1.0
 
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
  0.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat_torch.bfloat16_4bit_lf,0.564,0.7745256693833624,0.564,0.6352190975436365,0.6726666666666666
3
+ 0.2,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-35_torch.bfloat16_4bit_lf,0.7576666666666667,0.7960640143421251,0.7576666666666667,0.769346697622254,1.0
4
+ 0.4,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-70_torch.bfloat16_4bit_lf,0.7743333333333333,0.8042791719587958,0.7743333333333333,0.7849233169481004,1.0
5
+ 0.6,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-105_torch.bfloat16_4bit_lf,0.7053333333333334,0.8070587351344375,0.7053333333333334,0.7421985241641746,1.0
6
+ 0.8,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-140_torch.bfloat16_4bit_lf,0.795,0.817457691710893,0.795,0.8027552955647029,1.0
7
+ 1.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-175_torch.bfloat16_4bit_lf,0.7786666666666666,0.8220512342362645,0.7786666666666666,0.7938353741035283,1.0
8
+ 1.2,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-210_torch.bfloat16_4bit_lf,0.7516666666666667,0.8264680853251051,0.7516666666666667,0.7787088167337303,1.0
9
+ 1.4,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-245_torch.bfloat16_4bit_lf,0.7876666666666666,0.8154190698395475,0.7876666666666666,0.7965399224841393,1.0
10
+ 1.6,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-280_torch.bfloat16_4bit_lf,0.7753333333333333,0.8181125383376948,0.7753333333333333,0.7899794199099057,1.0
11
+ 1.8,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-315_torch.bfloat16_4bit_lf,0.7583333333333333,0.8179523170315577,0.7583333333333333,0.7795358413482081,1.0
12
+ 2.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-350_torch.bfloat16_4bit_lf,0.7616666666666667,0.8208475549648238,0.7616666666666667,0.7826736174247095,1.0
data/internlm2_5-7b-chat_metrics.csv CHANGED
@@ -1,12 +1,12 @@
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
  0.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat_torch.bfloat16_lf,0.705,0.7398041613378253,0.705,0.6906357423169466,1.0
3
- 0.2,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-105_torch.bfloat16_lf,0.6736666666666666,0.8044565554629858,0.6736666666666666,0.7104123104529902,1.0
4
- 0.4,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-140_torch.bfloat16_lf,0.7496666666666667,0.8041871978859686,0.7496666666666667,0.7660159670998776,1.0
5
- 0.6,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-175_torch.bfloat16_lf,0.726,0.8094634420846424,0.726,0.751394838822856,1.0
6
- 0.8,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-210_torch.bfloat16_lf,0.7276666666666667,0.8039673699820601,0.7276666666666667,0.7488653386949028,1.0
7
- 1.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-245_torch.bfloat16_lf,0.747,0.8055537753403307,0.747,0.76527383722639,1.0
8
- 1.2,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-280_torch.bfloat16_lf,0.7166666666666667,0.8059535682746547,0.7166666666666667,0.7432427946178835,1.0
9
- 1.4,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-315_torch.bfloat16_lf,0.6983333333333334,0.8119110469658597,0.6983333333333334,0.7347246872892312,1.0
10
- 1.6,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-35_torch.bfloat16_lf,0.7193333333333334,0.7863486093365692,0.7193333333333334,0.7330498811142795,1.0
11
- 1.8,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-350_torch.bfloat16_lf,0.7076666666666667,0.8120132783051135,0.7076666666666667,0.7408145046817652,1.0
12
- 2.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-70_torch.bfloat16_lf,0.726,0.7900250828103491,0.726,0.7396583495246526,1.0
 
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
  0.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat_torch.bfloat16_lf,0.705,0.7398041613378253,0.705,0.6906357423169466,1.0
3
+ 0.2,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-35_torch.bfloat16_lf,0.7193333333333334,0.7863486093365692,0.7193333333333334,0.7330498811142795,1.0
4
+ 0.4,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-70_torch.bfloat16_lf,0.726,0.7900250828103491,0.726,0.7396583495246526,1.0
5
+ 0.6,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-105_torch.bfloat16_lf,0.6736666666666666,0.8044565554629858,0.6736666666666666,0.7104123104529902,1.0
6
+ 0.8,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-140_torch.bfloat16_lf,0.7496666666666667,0.8041871978859686,0.7496666666666667,0.7660159670998776,1.0
7
+ 1.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-175_torch.bfloat16_lf,0.726,0.8094634420846424,0.726,0.751394838822856,1.0
8
+ 1.2,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-210_torch.bfloat16_lf,0.7276666666666667,0.8039673699820601,0.7276666666666667,0.7488653386949028,1.0
9
+ 1.4,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-245_torch.bfloat16_lf,0.747,0.8055537753403307,0.747,0.76527383722639,1.0
10
+ 1.6,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-280_torch.bfloat16_lf,0.7166666666666667,0.8059535682746547,0.7166666666666667,0.7432427946178835,1.0
11
+ 1.8,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-315_torch.bfloat16_lf,0.6983333333333334,0.8119110469658597,0.6983333333333334,0.7347246872892312,1.0
12
+ 2.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-350_torch.bfloat16_lf,0.7076666666666667,0.8120132783051135,0.7076666666666667,0.7408145046817652,1.0
data/openai_metrics.csv CHANGED
@@ -1,23 +1,26 @@
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
  0,gpt-4o-mini,gpt-4o-mini/shots-00,0.7176666666666667,0.785706730193659,0.7176666666666667,0.7296061848734905,0.9916666666666667
3
- 5,gpt-4o-mini,gpt-4o-mini/shots-05,0.7176666666666667,0.7767294185987051,0.7176666666666667,0.7181068311028772,0.9996666666666667
4
- 10,gpt-4o-mini,gpt-4o-mini/shots-10,0.6793333333333333,0.7728086050218999,0.6793333333333333,0.6916749681933937,0.9983333333333333
5
- 20,gpt-4o-mini,gpt-4o-mini/shots-20,0.6623333333333333,0.7686706009175459,0.6623333333333333,0.6798015109939115,0.998
6
- 30,gpt-4o-mini,gpt-4o-mini/shots-30,0.6873333333333334,0.7684209723431035,0.6873333333333334,0.6913018667081989,0.999
7
- 40,gpt-4o-mini,gpt-4o-mini/shots-40,0.6923333333333334,0.7639874967862498,0.6923333333333334,0.6924934068935911,0.9986666666666667
8
- 50,gpt-4o-mini,gpt-4o-mini/shots-50,0.717,0.7692638634416518,0.717,0.7105227254860433,0.9993333333333333
9
  0,gpt-4o,gpt-4o/shots-00,0.782,0.8204048322982596,0.782,0.7953019682198627,0.066
 
 
 
10
  5,gpt-4o,gpt-4o/shots-05,0.7873333333333333,0.8230974205170392,0.7873333333333333,0.8000290527498529,0.998
 
 
 
11
  10,gpt-4o,gpt-4o/shots-10,0.7916666666666666,0.8227707658360168,0.7916666666666666,0.803614688453356,0.9996666666666667
 
 
 
12
  20,gpt-4o,gpt-4o/shots-20,0.7816666666666666,0.8204541793856629,0.7816666666666666,0.7967017169880498,0.9993333333333333
 
 
 
13
  30,gpt-4o,gpt-4o/shots-30,0.7886666666666666,0.8260847852316618,0.7886666666666666,0.8030949295928699,0.999
 
 
14
  40,gpt-4o,gpt-4o/shots-40,0.784,0.8233509309291644,0.784,0.7993336791122846,0.9973333333333333
 
 
15
  50,gpt-4o,gpt-4o/shots-50,0.787,0.8234800466218334,0.787,0.8013530974301947,0.9993333333333333
16
- 0,o1-mini,o1-mini/shots-00,0.7083333333333334,0.7848098266888749,0.7083333333333334,0.7377068425566796,0.999
17
- 5,o1-mini,o1-mini/shots-05,0.724,0.7905045610386181,0.724,0.7482963122126776,0.9966666666666667
18
- 10,o1-mini,o1-mini/shots-10,0.725,0.7892485648334764,0.725,0.7485623974683336,0.9943333333333333
19
- 20,o1-mini,o1-mini/shots-20,0.7343333333333333,0.786101455887261,0.7343333333333333,0.7535300565051624,0.9946666666666667
20
- 0,o1-preview,o1-preview/shots-00,0.721,0.7849371317342158,0.721,0.7451207069815194,0.998
21
- 5,o1-preview,o1-preview/shots-05,0.7313333333333333,0.7878283093765627,0.7313333333333333,0.7535489719321234,0.979
22
- 10,o1-preview,o1-preview/shots-10,0.749,0.7964482186234537,0.749,0.7677316493549238,0.9873333333333333
23
- 20,o1-preview,o1-preview/shots-20,0.7443333333333333,0.7911442834260676,0.7443333333333333,0.7625144090816939,0.9853333333333333
 
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
  0,gpt-4o-mini,gpt-4o-mini/shots-00,0.7176666666666667,0.785706730193659,0.7176666666666667,0.7296061848734905,0.9916666666666667
 
 
 
 
 
 
3
  0,gpt-4o,gpt-4o/shots-00,0.782,0.8204048322982596,0.782,0.7953019682198627,0.066
4
+ 0,o1-mini,o1-mini/shots-00,0.7083333333333334,0.7848098266888749,0.7083333333333334,0.7377068425566796,0.999
5
+ 0,o1-preview,o1-preview/shots-00,0.721,0.7849371317342158,0.721,0.7451207069815194,0.998
6
+ 5,gpt-4o-mini,gpt-4o-mini/shots-05,0.7176666666666667,0.7767294185987051,0.7176666666666667,0.7181068311028772,0.9996666666666667
7
  5,gpt-4o,gpt-4o/shots-05,0.7873333333333333,0.8230974205170392,0.7873333333333333,0.8000290527498529,0.998
8
+ 5,o1-mini,o1-mini/shots-05,0.724,0.7905045610386181,0.724,0.7482963122126776,0.9966666666666667
9
+ 5,o1-preview,o1-preview/shots-05,0.7313333333333333,0.7878283093765627,0.7313333333333333,0.7535489719321234,0.979
10
+ 10,gpt-4o-mini,gpt-4o-mini/shots-10,0.6793333333333333,0.7728086050218999,0.6793333333333333,0.6916749681933937,0.9983333333333333
11
  10,gpt-4o,gpt-4o/shots-10,0.7916666666666666,0.8227707658360168,0.7916666666666666,0.803614688453356,0.9996666666666667
12
+ 10,o1-mini,o1-mini/shots-10,0.725,0.7892485648334764,0.725,0.7485623974683336,0.9943333333333333
13
+ 10,o1-preview,o1-preview/shots-10,0.749,0.7964482186234537,0.749,0.7677316493549238,0.9873333333333333
14
+ 20,gpt-4o-mini,gpt-4o-mini/shots-20,0.6623333333333333,0.7686706009175459,0.6623333333333333,0.6798015109939115,0.998
15
  20,gpt-4o,gpt-4o/shots-20,0.7816666666666666,0.8204541793856629,0.7816666666666666,0.7967017169880498,0.9993333333333333
16
+ 20,o1-mini,o1-mini/shots-20,0.7343333333333333,0.786101455887261,0.7343333333333333,0.7535300565051624,0.9946666666666667
17
+ 20,o1-preview,o1-preview/shots-20,0.7443333333333333,0.7911442834260676,0.7443333333333333,0.7625144090816939,0.9853333333333333
18
+ 30,gpt-4o-mini,gpt-4o-mini/shots-30,0.6873333333333334,0.7684209723431035,0.6873333333333334,0.6913018667081989,0.999
19
  30,gpt-4o,gpt-4o/shots-30,0.7886666666666666,0.8260847852316618,0.7886666666666666,0.8030949295928699,0.999
20
+ 30,o1-preview,o1-preview/shots-30,0.7473333333333333,0.7920604378746952,0.7473333333333333,0.7643977099599287,0.984
21
+ 40,gpt-4o-mini,gpt-4o-mini/shots-40,0.6923333333333334,0.7639874967862498,0.6923333333333334,0.6924934068935911,0.9986666666666667
22
  40,gpt-4o,gpt-4o/shots-40,0.784,0.8233509309291644,0.784,0.7993336791122846,0.9973333333333333
23
+ 40,o1-preview,o1-preview/shots-40,0.7506666666666667,0.7964679024468982,0.7506666666666667,0.7674109766459014,0.984
24
+ 50,gpt-4o-mini,gpt-4o-mini/shots-50,0.717,0.7692638634416518,0.717,0.7105227254860433,0.9993333333333333
25
  50,gpt-4o,gpt-4o/shots-50,0.787,0.8234800466218334,0.787,0.8013530974301947,0.9993333333333333
26
+ 50,o1-preview,o1-preview/shots-50,0.7546666666666667,0.7979981023789272,0.7546666666666667,0.7708181822112403,0.9816666666666667
 
 
 
 
 
 
 
datasets/mgtv/unique_translations.csv CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e84d985ddb2974831e549de74def6a16bff41e6eed39b38d8d78529a6615289
3
- size 1619169
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e99225113dcee9de7882586a73e8e741ccd90d55b12341bf4625811a7324ec58
3
+ size 1619162
llm_toolkit/logical_reasoning_utils.py CHANGED
@@ -116,6 +116,9 @@ P2_few_shot = """你是一个情景猜谜游戏的主持人。游戏规则如下
116
  回答:
117
  """
118
 
 
 
 
119
 
120
  def get_prompt_template(using_p1=True, chinese_prompt=True):
121
  if using_p1:
@@ -399,27 +402,31 @@ def load_alpaca_data(data_path, using_p1=True, use_english_datasets=False):
399
 
400
 
401
  def plot_value_counts(df, column_name, offset=0.1, title=None, preprocess_func=None):
402
- font_family = rcParams["font.family"]
403
- # Set the font to SimHei to support Chinese characters
404
- rcParams["font.family"] = "STHeiti"
405
- rcParams["axes.unicode_minus"] = (
406
- False # This is to support the minus sign in Chinese.
407
- )
408
  if preprocess_func:
409
  df["backup"] = df[column_name]
410
  df[column_name] = df[column_name].apply(preprocess_func)
411
 
412
  plt.figure(figsize=(8, 4))
413
- df[column_name].value_counts().plot(kind="bar")
 
 
 
 
414
  # add values on top of bars
415
- for i, v in enumerate(df[column_name].value_counts()):
416
  plt.text(i, v + offset, str(v), ha="center")
417
 
418
  plt.xlabel(title or column_name)
419
 
420
  plt.show()
421
 
422
- rcParams["font.family"] = font_family
423
 
424
  if preprocess_func:
425
  plot_confusion_matrix(df["label"], df[column_name])
@@ -432,12 +439,12 @@ def calc_metrics_for_col(df, col):
432
  return metrics["accuracy"], metrics["precision"], metrics["recall"], metrics["f1"]
433
 
434
 
435
- def get_metrics_df(df, variant="epoch"):
436
  perf_df = pd.DataFrame(
437
  columns=[variant, "model", "run", "accuracy", "precision", "recall", "f1"]
438
  )
439
  columns = (
440
- df.columns[5:]
441
  if variant == "index"
442
  else [
443
  col
@@ -445,6 +452,8 @@ def get_metrics_df(df, variant="epoch"):
445
  if variant in col or variant == "epoch" and "_torch." in col
446
  ]
447
  )
 
 
448
  print("columns:", columns)
449
  for i, col in enumerate(columns):
450
  metrics = calc_metrics(df["label"], df[col], debug=False)
@@ -592,21 +601,22 @@ def eval_openai(
592
 
593
 
594
  def plot_confusion_matrix(y_true, y_pred, title="Confusion Matrix"):
595
- font_family = rcParams["font.family"]
596
- # Set the font to SimHei to support Chinese characters
597
- rcParams["font.family"] = "STHeiti"
598
- rcParams["axes.unicode_minus"] = (
599
- False # This is to support the minus sign in Chinese.
600
- )
601
 
602
  labels = np.unique(y_true)
603
-
604
  y_pred = [extract_answer(text) for text in y_pred]
605
 
606
- cm = confusion_matrix(y_true, y_pred)
607
  cm = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis]
608
 
609
  fig, ax = plt.subplots(figsize=(8, 8))
 
 
610
  sns.heatmap(
611
  cm,
612
  annot=True,
@@ -620,7 +630,7 @@ def plot_confusion_matrix(y_true, y_pred, title="Confusion Matrix"):
620
  ax.set_ylabel("True labels")
621
  plt.show()
622
 
623
- rcParams["font.family"] = font_family
624
 
625
 
626
  def majority_vote(r1, r2, r3):
 
116
  回答:
117
  """
118
 
119
+ df_translation = pd.read_csv("datasets/mgtv/unique_translations.csv")
120
+ translation_dict = df_translation.set_index("chinese").to_dict(orient="index")
121
+ translation_dict = {k: v["english"] for k, v in translation_dict.items()}
122
 
123
  def get_prompt_template(using_p1=True, chinese_prompt=True):
124
  if using_p1:
 
402
 
403
 
404
  def plot_value_counts(df, column_name, offset=0.1, title=None, preprocess_func=None):
405
+ # font_family = rcParams["font.family"]
406
+ # # Set the font to SimHei to support Chinese characters
407
+ # rcParams["font.family"] = "SimHei"
408
+ # rcParams["axes.unicode_minus"] = (
409
+ # False # This is to support the minus sign in Chinese.
410
+ # )
411
  if preprocess_func:
412
  df["backup"] = df[column_name]
413
  df[column_name] = df[column_name].apply(preprocess_func)
414
 
415
  plt.figure(figsize=(8, 4))
416
+
417
+ value_counts = df[column_name].value_counts()
418
+ value_counts = value_counts.rename(index=translation_dict)
419
+ value_counts.plot(kind="bar")
420
+
421
  # add values on top of bars
422
+ for i, v in enumerate(value_counts):
423
  plt.text(i, v + offset, str(v), ha="center")
424
 
425
  plt.xlabel(title or column_name)
426
 
427
  plt.show()
428
 
429
+ # rcParams["font.family"] = font_family
430
 
431
  if preprocess_func:
432
  plot_confusion_matrix(df["label"], df[column_name])
 
439
  return metrics["accuracy"], metrics["precision"], metrics["recall"], metrics["f1"]
440
 
441
 
442
+ def get_metrics_df(df, variant="epoch", sort_columns=True):
443
  perf_df = pd.DataFrame(
444
  columns=[variant, "model", "run", "accuracy", "precision", "recall", "f1"]
445
  )
446
  columns = (
447
+ df.columns[5:].tolist()
448
  if variant == "index"
449
  else [
450
  col
 
452
  if variant in col or variant == "epoch" and "_torch." in col
453
  ]
454
  )
455
+ if sort_columns:
456
+ columns = sorted(columns, key=lambda x: int(x.lower().replace("-1m", "").replace("chat", "0").replace("instruct", "0").split("-")[-1].split("_")[0]))
457
  print("columns:", columns)
458
  for i, col in enumerate(columns):
459
  metrics = calc_metrics(df["label"], df[col], debug=False)
 
601
 
602
 
603
  def plot_confusion_matrix(y_true, y_pred, title="Confusion Matrix"):
604
+ # font_family = rcParams["font.family"]
605
+ # # Set the font to SimHei to support Chinese characters
606
+ # rcParams["font.family"] = "SimHei"
607
+ # rcParams["axes.unicode_minus"] = (
608
+ # False # This is to support the minus sign in Chinese.
609
+ # )
610
 
611
  labels = np.unique(y_true)
 
612
  y_pred = [extract_answer(text) for text in y_pred]
613
 
614
+ cm = confusion_matrix(y_true, y_pred, labels=labels)
615
  cm = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis]
616
 
617
  fig, ax = plt.subplots(figsize=(8, 8))
618
+ labels = [translation_dict[x] for x in labels]
619
+
620
  sns.heatmap(
621
  cm,
622
  annot=True,
 
630
  ax.set_ylabel("True labels")
631
  plt.show()
632
 
633
+ # rcParams["font.family"] = font_family
634
 
635
 
636
  def majority_vote(r1, r2, r3):
notebooks/00_Data Analysis.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
notebooks/01a_internlm2_5-20b-chat_analysis.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
notebooks/01a_internlm2_5-7b-chat-1m_analysis.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
notebooks/01a_internlm2_5-7b-chat_analysis.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
notebooks/01b_Mistral-7B-v0.3-Chinese-Chat_analysis.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
notebooks/02a_Qwen2-7B-Instruct_analysis.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
notebooks/02b_Qwen2-72B-Instruct_analysis.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
notebooks/02c_Qwen2.5-3B-Instruct_analysis.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
notebooks/03a_Llama3.1-8B-Chinese-Chat_analysis.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
notebooks/03b_Llama3.1-70B-Chinese-Chat_analysis.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
notebooks/04b_OpenAI-Models_analysis.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
scripts/eval-mgtv-qwen2.5.sh CHANGED
@@ -13,12 +13,18 @@ cat /etc/os-release
13
  lscpu
14
  grep MemTotal /proc/meminfo
15
 
 
 
 
 
16
  $BASEDIR/scripts/eval-shots.sh Qwen Qwen2.5-3B-Instruct
17
 
18
- $BASEDIR/scripts/eval-epochs.sh Qwen Qwen2.5-1.5B-Instruct
 
 
19
 
20
- $BASEDIR/scripts/eval-shots.sh Qwen Qwen2.5-1.5B-Instruct
21
 
22
- $BASEDIR/scripts/eval-epochs.sh Qwen Qwen2.5-0.5B-Instruct
23
 
24
- $BASEDIR/scripts/eval-shots.sh Qwen Qwen2.5-0.5B-Instruct
 
13
  lscpu
14
  grep MemTotal /proc/meminfo
15
 
16
+ $BASEDIR/scripts/eval-shots.sh Qwen Qwen2.5-7B-Instruct
17
+
18
+ $BASEDIR/scripts/eval-epochs.sh Qwen Qwen2.5-7B-Instruct
19
+
20
  $BASEDIR/scripts/eval-shots.sh Qwen Qwen2.5-3B-Instruct
21
 
22
+ $BASEDIR/scripts/eval-epochs.sh Qwen Qwen2.5-3B-Instruct
23
+
24
+ # $BASEDIR/scripts/eval-epochs.sh Qwen Qwen2.5-1.5B-Instruct
25
 
26
+ # $BASEDIR/scripts/eval-shots.sh Qwen Qwen2.5-1.5B-Instruct
27
 
28
+ # $BASEDIR/scripts/eval-epochs.sh Qwen Qwen2.5-0.5B-Instruct
29
 
30
+ # $BASEDIR/scripts/eval-shots.sh Qwen Qwen2.5-0.5B-Instruct
scripts/tune-mgtv-bf16.sh CHANGED
@@ -25,10 +25,12 @@ export LOGICAL_REASONING_RESULTS_PATH=results/mgtv-results_bf16.csv
25
 
26
  # $BASEDIR/scripts/tune-lf_v2.sh internlm internlm2_5-7b-chat intern2
27
 
28
- $BASEDIR/scripts/tune-lf_v2.sh Qwen Qwen2.5-7B-Instruct qwen
29
 
30
- #$BASEDIR/scripts/tune-lf_v2.sh Qwen Qwen2.5-3B-Instruct qwen
31
 
32
  #$BASEDIR/scripts/tune-lf_v2.sh Qwen Qwen2.5-1.5B-Instruct qwen
33
 
34
  #$BASEDIR/scripts/tune-lf_v2.sh Qwen Qwen2.5-0.5B-Instruct qwen
 
 
 
25
 
26
  # $BASEDIR/scripts/tune-lf_v2.sh internlm internlm2_5-7b-chat intern2
27
 
28
+ # $BASEDIR/scripts/tune-lf_v2.sh Qwen Qwen2.5-7B-Instruct qwen
29
 
30
+ $BASEDIR/scripts/tune-lf_v2.sh Qwen Qwen2.5-3B-Instruct qwen
31
 
32
  #$BASEDIR/scripts/tune-lf_v2.sh Qwen Qwen2.5-1.5B-Instruct qwen
33
 
34
  #$BASEDIR/scripts/tune-lf_v2.sh Qwen Qwen2.5-0.5B-Instruct qwen
35
+
36
+ $BASEDIR/scripts/eval-mgtv-shots-qwen2.5.sh