Spaces:
Build error
Build error
ready for qwen2.5
Browse files- data/Llama3.1-70B-Chinese-Chat_shots_metrics.csv +3 -0
- data/Mistral-7B-v0.3-Chinese-Chat_metrics.csv +10 -10
- data/Mistral-7B-v0.3-Chinese-Chat_shots_metrics.csv +2 -0
- data/Qwen2.5-3B-Instruct_metrics.csv +4 -0
- data/Qwen2.5-3B-Instruct_shots_metrics.csv +2 -0
- data/best_metrics.csv +13 -12
- data/best_results.csv +0 -0
- data/few-shots_metrics.csv +40 -0
- data/fine-tuning_metrics.csv +78 -0
- data/internlm2_5-20b-chat_metrics.csv +10 -10
- data/internlm2_5-7b-chat_metrics.csv +10 -10
- data/openai_metrics.csv +17 -14
- datasets/mgtv/unique_translations.csv +2 -2
- llm_toolkit/logical_reasoning_utils.py +30 -20
- notebooks/00_Data Analysis.ipynb +0 -0
- notebooks/01a_internlm2_5-20b-chat_analysis.ipynb +0 -0
- notebooks/01a_internlm2_5-7b-chat-1m_analysis.ipynb +0 -0
- notebooks/01a_internlm2_5-7b-chat_analysis.ipynb +0 -0
- notebooks/01b_Mistral-7B-v0.3-Chinese-Chat_analysis.ipynb +0 -0
- notebooks/02a_Qwen2-7B-Instruct_analysis.ipynb +0 -0
- notebooks/02b_Qwen2-72B-Instruct_analysis.ipynb +0 -0
- notebooks/02c_Qwen2.5-3B-Instruct_analysis.ipynb +0 -0
- notebooks/03a_Llama3.1-8B-Chinese-Chat_analysis.ipynb +0 -0
- notebooks/03b_Llama3.1-70B-Chinese-Chat_analysis.ipynb +0 -0
- notebooks/04b_OpenAI-Models_analysis.ipynb +0 -0
- scripts/eval-mgtv-qwen2.5.sh +10 -4
- scripts/tune-mgtv-bf16.sh +4 -2
data/Llama3.1-70B-Chinese-Chat_shots_metrics.csv
CHANGED
@@ -1,2 +1,5 @@
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-00,0.7636666666666667,0.7806653325131986,0.7636666666666667,0.7525813484548423,0.009666666666666667
|
|
|
|
|
|
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-00,0.7636666666666667,0.7806653325131986,0.7636666666666667,0.7525813484548423,0.009666666666666667
|
3 |
+
5,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-05,0.7536666666666667,0.772126097633354,0.7536666666666667,0.7545029613768596,0.79
|
4 |
+
10,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-10,0.754,0.7729477984842943,0.754,0.756682017266956,0.8326666666666667
|
5 |
+
20,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-20,0.738,0.7566938786102072,0.738,0.7348961489952073,0.819
|
data/Mistral-7B-v0.3-Chinese-Chat_metrics.csv
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
0.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat_torch.float16_lf,0.6946666666666667,0.701136267898111,0.6946666666666667,0.6634078645357937,0.011666666666666667
|
3 |
-
0.2,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-
|
4 |
-
0.4,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-
|
5 |
-
0.6,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-
|
6 |
-
0.8,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-
|
7 |
-
1.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-
|
8 |
-
1.2,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-
|
9 |
-
1.4,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-
|
10 |
-
1.6,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-
|
11 |
-
1.8,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-
|
12 |
-
2.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-
|
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
0.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat_torch.float16_lf,0.6946666666666667,0.701136267898111,0.6946666666666667,0.6634078645357937,0.011666666666666667
|
3 |
+
0.2,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-35_torch.float16_lf,0.702,0.7932731014186957,0.702,0.7342714734731689,1.0
|
4 |
+
0.4,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-70_torch.float16_lf,0.742,0.78982949223512,0.742,0.7536681109811127,1.0
|
5 |
+
0.6,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-105_torch.float16_lf,0.6596666666666666,0.7923396753604393,0.6596666666666666,0.7067542301676931,1.0
|
6 |
+
0.8,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-140_torch.float16_lf,0.7146666666666667,0.7861341885687435,0.7146666666666667,0.7404677278137267,1.0
|
7 |
+
1.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-175_torch.float16_lf,0.7326666666666667,0.7876867721932461,0.7326666666666667,0.7471869515031995,1.0
|
8 |
+
1.2,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-210_torch.float16_lf,0.7016666666666667,0.7903119228393193,0.7016666666666667,0.7348708822385348,1.0
|
9 |
+
1.4,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-245_torch.float16_lf,0.75,0.7885868317699068,0.75,0.7648234347578796,1.0
|
10 |
+
1.6,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-280_torch.float16_lf,0.7156666666666667,0.7846106674095725,0.7156666666666667,0.7410042005708856,1.0
|
11 |
+
1.8,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-315_torch.float16_lf,0.6916666666666667,0.7864256994491394,0.6916666666666667,0.7257499426487266,1.0
|
12 |
+
2.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-350_torch.float16_lf,0.6976666666666667,0.7889443494370009,0.6976666666666667,0.7307996137659796,1.0
|
data/Mistral-7B-v0.3-Chinese-Chat_shots_metrics.csv
CHANGED
@@ -1,3 +1,5 @@
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-00,0.6946666666666667,0.701136267898111,0.6946666666666667,0.6634078645357937,0.011666666666666667
|
|
|
3 |
10,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-10,0.6036666666666667,0.7334913867282189,0.6036666666666667,0.6493185547247415,0.10633333333333334
|
|
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-00,0.6946666666666667,0.701136267898111,0.6946666666666667,0.6634078645357937,0.011666666666666667
|
3 |
+
5,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-05,0.6446666666666667,0.7451807329096397,0.6446666666666667,0.681030628954011,0.142
|
4 |
10,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-10,0.6036666666666667,0.7334913867282189,0.6036666666666667,0.6493185547247415,0.10633333333333334
|
5 |
+
20,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-20,0.6276666666666667,0.7398894455389585,0.6276666666666667,0.6690543758928521,0.08266666666666667
|
data/Qwen2.5-3B-Instruct_metrics.csv
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0.0,Qwen2.5-3B-Instruct,qwen/Qwen2.5-3B-Instruct/checkpoint-35_torch.bfloat16_lf,0.7033333333333334,0.7493686353899274,0.7033333333333334,0.7196581245915875,1.0
|
3 |
+
0.2,Qwen2.5-3B-Instruct,qwen/Qwen2.5-3B-Instruct/checkpoint-70_torch.bfloat16_lf,0.664,0.7490874767990094,0.664,0.6954540806463714,1.0
|
4 |
+
0.4,Qwen2.5-3B-Instruct,qwen/Qwen2.5-3B-Instruct/checkpoint-88_torch.bfloat16_lf,0.6743333333333333,0.7591682267298503,0.6743333333333333,0.7069378240575964,1.0
|
data/Qwen2.5-3B-Instruct_shots_metrics.csv
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-00,0.5796666666666667,0.6966500240864278,0.5796666666666667,0.5506370828782681,1.0
|
data/best_metrics.csv
CHANGED
@@ -1,13 +1,14 @@
|
|
1 |
index,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
1,internlm2_5-7b-chat,internlm2_5-7b-chat,0.7496666666666667,0.8041871978859686,0.7496666666666667,0.7660159670998776,1.0
|
3 |
-
2,internlm2_5-7b-chat-1m,internlm2_5-7b-chat-1m,0.803,0.8031411888150441,0.803,0.8028064320197301,1.0
|
4 |
-
3,
|
5 |
-
4,Qwen2-7B-Instruct,Qwen2-7B-Instruct,0.759,0.8005303465799652,0.759,0.7748745026535183,1.0
|
6 |
-
5,
|
7 |
-
6,Llama3.1-
|
8 |
-
7,
|
9 |
-
8,
|
10 |
-
9,
|
11 |
-
10,
|
12 |
-
11,
|
13 |
-
12,
|
|
|
|
1 |
index,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
1,internlm2_5-7b-chat (0.8-epoch),internlm2_5-7b-chat (0.8-epoch),0.7496666666666667,0.8041871978859686,0.7496666666666667,0.7660159670998776,1.0
|
3 |
+
2,internlm2_5-7b-chat-1m (0.8-epoch),internlm2_5-7b-chat-1m (0.8-epoch),0.803,0.8031411888150441,0.803,0.8028064320197301,1.0
|
4 |
+
3,internlm2_5-20b-chat (0.8-epoch),internlm2_5-20b-chat (0.8-epoch),0.795,0.817457691710893,0.795,0.8027552955647029,1.0
|
5 |
+
4,Qwen2-7B-Instruct (0.4-epoch),Qwen2-7B-Instruct (0.4-epoch),0.759,0.8005303465799652,0.759,0.7748745026535183,1.0
|
6 |
+
5,Qwen2-72B-Instruct (1.8-epoch),Qwen2-72B-Instruct (1.8-epoch),0.784,0.8354349234761956,0.784,0.804194683154365,1.0
|
7 |
+
6,Llama3.1-8B-Chinese-Chat (1.0-epoch),Llama3.1-8B-Chinese-Chat (1.0-epoch),0.78,0.810582723471486,0.78,0.7924651054056209,1.0
|
8 |
+
7,Llama3.1-70B-Chinese-Chat (1.0-epoch),Llama3.1-70B-Chinese-Chat (1.0-epoch),0.7963333333333333,0.8248972880055918,0.7963333333333333,0.8076868978089201,1.0
|
9 |
+
8,gpt-4o-mini (0-shot),gpt-4o-mini (0-shot),0.7176666666666667,0.785706730193659,0.7176666666666667,0.7296061848734905,1.0
|
10 |
+
9,o1-mini (20-shot),o1-mini (20-shot),0.7343333333333333,0.786101455887261,0.7343333333333333,0.7535300565051624,0.999
|
11 |
+
10,gpt-4o (10-shot),gpt-4o (10-shot),0.7916666666666666,0.8227707658360168,0.7916666666666666,0.803614688453356,0.9996666666666667
|
12 |
+
11,o1-preview (50-shot),o1-preview (50-shot),0.7546666666666667,0.7979981023789272,0.7546666666666667,0.7708181822112403,0.9996666666666667
|
13 |
+
12,Ensemble Model (Open Source),Ensemble Model (Open Source),0.8193333333333334,0.8407464756633664,0.8193333333333334,0.828054127213081,1.0
|
14 |
+
13,Ensemble Model (OpenAI),Ensemble Model (OpenAI),0.7986666666666666,0.8223071972084313,0.7986666666666666,0.8080230503376233,1.0
|
data/best_results.csv
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
data/few-shots_metrics.csv
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-00,0.705,0.7398041613378253,0.705,0.6906357423169466,1.0
|
3 |
+
10,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-10,0.5533333333333333,0.7301739373336078,0.5533333333333333,0.625097481985829,0.9883333333333332
|
4 |
+
0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-00,0.4813333333333333,0.7605248207587668,0.4813333333333333,0.5244515621126862,0.9986666666666668
|
5 |
+
10,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-10,0.6473333333333333,0.7282065610714444,0.6473333333333333,0.665824871588245,0.8866666666666667
|
6 |
+
0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/shots-00,0.564,0.7745256693833624,0.564,0.6352190975436365,0.6726666666666666
|
7 |
+
0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/shots-00,0.683,0.7493103872717293,0.683,0.710140098232232,0.9996666666666668
|
8 |
+
10,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/shots-10,0.5646666666666667,0.7391197908117386,0.5646666666666667,0.6064049121095652,0.9896666666666668
|
9 |
+
0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct_torch/shots-00,0.7516666666666667,0.7949378981748352,0.7516666666666667,0.7572499605227642,0.9773333333333334
|
10 |
+
0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-00,0.742,0.7477056799746837,0.742,0.7371050181385632,0.8033333333333333
|
11 |
+
10,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-10,0.6676666666666666,0.7834080522821993,0.6676666666666666,0.7082605860921491,0.9623333333333334
|
12 |
+
0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-00,0.7636666666666667,0.7806653325131986,0.7636666666666667,0.7525813484548423,0.0096666666666666
|
13 |
+
5,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-05,0.7536666666666667,0.772126097633354,0.7536666666666667,0.7545029613768596,0.79
|
14 |
+
10,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-10,0.754,0.7729477984842943,0.754,0.756682017266956,0.8326666666666667
|
15 |
+
20,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-20,0.738,0.7566938786102072,0.738,0.7348961489952073,0.819
|
16 |
+
0,gpt-4o-mini,gpt-4o-mini/shots-00,0.7176666666666667,0.785706730193659,0.7176666666666667,0.7296061848734905,0.9916666666666668
|
17 |
+
5,gpt-4o-mini,gpt-4o-mini/shots-05,0.7176666666666667,0.7767294185987051,0.7176666666666667,0.7181068311028772,0.9996666666666668
|
18 |
+
10,gpt-4o-mini,gpt-4o-mini/shots-10,0.6793333333333333,0.7728086050218999,0.6793333333333333,0.6916749681933937,0.9983333333333332
|
19 |
+
20,gpt-4o-mini,gpt-4o-mini/shots-20,0.6623333333333333,0.7686706009175459,0.6623333333333333,0.6798015109939115,0.998
|
20 |
+
30,gpt-4o-mini,gpt-4o-mini/shots-30,0.6873333333333334,0.7684209723431035,0.6873333333333334,0.6913018667081989,0.999
|
21 |
+
40,gpt-4o-mini,gpt-4o-mini/shots-40,0.6923333333333334,0.7639874967862498,0.6923333333333334,0.6924934068935911,0.9986666666666668
|
22 |
+
50,gpt-4o-mini,gpt-4o-mini/shots-50,0.717,0.7692638634416518,0.717,0.7105227254860433,0.9993333333333332
|
23 |
+
0,o1-mini,o1-mini/shots-00,0.7083333333333334,0.7848098266888749,0.7083333333333334,0.7377068425566796,0.999
|
24 |
+
5,o1-mini,o1-mini/shots-05,0.724,0.7905045610386181,0.724,0.7482963122126776,0.9966666666666668
|
25 |
+
10,o1-mini,o1-mini/shots-10,0.725,0.7892485648334764,0.725,0.7485623974683336,0.9943333333333332
|
26 |
+
20,o1-mini,o1-mini/shots-20,0.7343333333333333,0.786101455887261,0.7343333333333333,0.7535300565051624,0.9946666666666668
|
27 |
+
0,gpt-4o,gpt-4o/shots-00,0.782,0.8204048322982596,0.782,0.7953019682198627,0.066
|
28 |
+
5,gpt-4o,gpt-4o/shots-05,0.7873333333333333,0.8230974205170392,0.7873333333333333,0.8000290527498529,0.998
|
29 |
+
10,gpt-4o,gpt-4o/shots-10,0.7916666666666666,0.8227707658360168,0.7916666666666666,0.803614688453356,0.9996666666666668
|
30 |
+
20,gpt-4o,gpt-4o/shots-20,0.7816666666666666,0.8204541793856629,0.7816666666666666,0.7967017169880498,0.9993333333333332
|
31 |
+
30,gpt-4o,gpt-4o/shots-30,0.7886666666666666,0.8260847852316618,0.7886666666666666,0.8030949295928699,0.999
|
32 |
+
40,gpt-4o,gpt-4o/shots-40,0.784,0.8233509309291644,0.784,0.7993336791122846,0.9973333333333332
|
33 |
+
50,gpt-4o,gpt-4o/shots-50,0.787,0.8234800466218334,0.787,0.8013530974301947,0.9993333333333332
|
34 |
+
0,o1-preview,o1-preview/shots-00,0.721,0.7849371317342158,0.721,0.7451207069815194,0.998
|
35 |
+
5,o1-preview,o1-preview/shots-05,0.7313333333333333,0.7878283093765627,0.7313333333333333,0.7535489719321234,0.979
|
36 |
+
10,o1-preview,o1-preview/shots-10,0.749,0.7964482186234537,0.749,0.7677316493549238,0.9873333333333332
|
37 |
+
20,o1-preview,o1-preview/shots-20,0.7443333333333333,0.7911442834260676,0.7443333333333333,0.7625144090816939,0.9853333333333332
|
38 |
+
30,o1-preview,o1-preview/shots-30,0.7473333333333333,0.7920604378746952,0.7473333333333333,0.7643977099599287,0.984
|
39 |
+
40,o1-preview,o1-preview/shots-40,0.7506666666666667,0.7964679024468982,0.7506666666666667,0.7674109766459014,0.984
|
40 |
+
50,o1-preview,o1-preview/shots-50,0.7546666666666667,0.7979981023789272,0.7546666666666667,0.7708181822112403,0.9816666666666668
|
data/fine-tuning_metrics.csv
ADDED
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat_torch.bfloat16_lf,0.705,0.7398041613378253,0.705,0.6906357423169466,1.0
|
3 |
+
0.2,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-35_torch.bfloat16_lf,0.7193333333333334,0.7863486093365692,0.7193333333333334,0.7330498811142795,1.0
|
4 |
+
0.4,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-70_torch.bfloat16_lf,0.726,0.7900250828103491,0.726,0.7396583495246526,1.0
|
5 |
+
0.6,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-105_torch.bfloat16_lf,0.6736666666666666,0.8044565554629858,0.6736666666666666,0.7104123104529902,1.0
|
6 |
+
0.8,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-140_torch.bfloat16_lf,0.7496666666666667,0.8041871978859686,0.7496666666666667,0.7660159670998776,1.0
|
7 |
+
1.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-175_torch.bfloat16_lf,0.726,0.8094634420846424,0.726,0.751394838822856,1.0
|
8 |
+
1.2,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-210_torch.bfloat16_lf,0.7276666666666667,0.8039673699820601,0.7276666666666667,0.7488653386949028,1.0
|
9 |
+
1.4,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-245_torch.bfloat16_lf,0.747,0.8055537753403307,0.747,0.76527383722639,1.0
|
10 |
+
1.6,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-280_torch.bfloat16_lf,0.7166666666666667,0.8059535682746547,0.7166666666666667,0.7432427946178835,1.0
|
11 |
+
1.8,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-315_torch.bfloat16_lf,0.6983333333333334,0.8119110469658597,0.6983333333333334,0.7347246872892312,1.0
|
12 |
+
2.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-350_torch.bfloat16_lf,0.7076666666666667,0.8120132783051135,0.7076666666666667,0.7408145046817652,1.0
|
13 |
+
0.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m_torch.bfloat16_lf,0.4813333333333333,0.7605248207587668,0.4813333333333333,0.5244515621126862,0.9986666666666668
|
14 |
+
0.2,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-35_torch.bfloat16_lf,0.7843333333333333,0.7977648302848388,0.7843333333333333,0.7864944570659659,1.0
|
15 |
+
0.4,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-70_torch.bfloat16_lf,0.7836666666666666,0.7996977262947886,0.7836666666666666,0.7886881726841081,1.0
|
16 |
+
0.6,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-105_torch.bfloat16_lf,0.7243333333333334,0.8171172705912051,0.7243333333333334,0.7565804830382912,1.0
|
17 |
+
0.8,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-140_torch.bfloat16_lf,0.803,0.8031411888150441,0.803,0.8028064320197301,1.0
|
18 |
+
1.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-175_torch.bfloat16_lf,0.7676666666666667,0.8108441731715863,0.7676666666666667,0.7843187816704813,1.0
|
19 |
+
1.2,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-210_torch.bfloat16_lf,0.7736666666666666,0.8091671780923799,0.7736666666666666,0.7876874850235454,1.0
|
20 |
+
1.4,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-245_torch.bfloat16_lf,0.7623333333333333,0.8062291602218205,0.7623333333333333,0.777669094563925,1.0
|
21 |
+
1.6,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-280_torch.bfloat16_lf,0.7553333333333333,0.8086197936829652,0.7553333333333333,0.7755588811428297,1.0
|
22 |
+
1.8,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-315_torch.bfloat16_lf,0.748,0.8171996792797457,0.748,0.773990849396903,1.0
|
23 |
+
2.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-350_torch.bfloat16_lf,0.756,0.8126875394266148,0.756,0.7777812522863184,1.0
|
24 |
+
0.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat_torch.bfloat16_4bit_lf,0.564,0.7745256693833624,0.564,0.6352190975436365,0.6726666666666666
|
25 |
+
0.2,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-35_torch.bfloat16_4bit_lf,0.7576666666666667,0.7960640143421251,0.7576666666666667,0.769346697622254,1.0
|
26 |
+
0.4,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-70_torch.bfloat16_4bit_lf,0.7743333333333333,0.8042791719587958,0.7743333333333333,0.7849233169481004,1.0
|
27 |
+
0.6,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-105_torch.bfloat16_4bit_lf,0.7053333333333334,0.8070587351344375,0.7053333333333334,0.7421985241641746,1.0
|
28 |
+
0.8,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-140_torch.bfloat16_4bit_lf,0.795,0.817457691710893,0.795,0.8027552955647029,1.0
|
29 |
+
1.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-175_torch.bfloat16_4bit_lf,0.7786666666666666,0.8220512342362645,0.7786666666666666,0.7938353741035283,1.0
|
30 |
+
1.2,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-210_torch.bfloat16_4bit_lf,0.7516666666666667,0.8264680853251051,0.7516666666666667,0.7787088167337303,1.0
|
31 |
+
1.4,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-245_torch.bfloat16_4bit_lf,0.7876666666666666,0.8154190698395475,0.7876666666666666,0.7965399224841393,1.0
|
32 |
+
1.6,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-280_torch.bfloat16_4bit_lf,0.7753333333333333,0.8181125383376948,0.7753333333333333,0.7899794199099057,1.0
|
33 |
+
1.8,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-315_torch.bfloat16_4bit_lf,0.7583333333333333,0.8179523170315577,0.7583333333333333,0.7795358413482081,1.0
|
34 |
+
2.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-350_torch.bfloat16_4bit_lf,0.7616666666666667,0.8208475549648238,0.7616666666666667,0.7826736174247095,1.0
|
35 |
+
0.0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct_torch.float16_lf,0.683,0.7493103872717293,0.683,0.710140098232232,0.9996666666666668
|
36 |
+
0.2,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-35_torch.float16_lf,0.725,0.7840171468707405,0.725,0.748994536667058,0.9996666666666668
|
37 |
+
0.4,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-70_torch.float16_lf,0.759,0.8005303465799652,0.759,0.7748745026535183,1.0
|
38 |
+
0.6,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-105_torch.float16_lf,0.6926666666666667,0.8039176975550218,0.6926666666666667,0.7332481528585848,1.0
|
39 |
+
0.8,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-140_torch.float16_lf,0.725,0.7952719247171957,0.725,0.7476238017654298,1.0
|
40 |
+
1.0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-175_torch.float16_lf,0.6756666666666666,0.7810148934939715,0.6756666666666666,0.708653993277772,1.0
|
41 |
+
1.2,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-210_torch.float16_lf,0.7013333333333334,0.7969562600853992,0.7013333333333334,0.7362679665494508,1.0
|
42 |
+
1.4,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-245_torch.float16_lf,0.7326666666666667,0.7922538479314682,0.7326666666666667,0.755402136631717,0.9996666666666668
|
43 |
+
1.6,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-280_torch.float16_lf,0.6983333333333334,0.785127298428753,0.6983333333333334,0.7292251109166867,1.0
|
44 |
+
1.8,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-315_torch.float16_lf,0.6783333333333333,0.785390767631834,0.6783333333333333,0.7164131321837346,1.0
|
45 |
+
2.0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-350_torch.float16_lf,0.689,0.7929715746898984,0.689,0.7259993126510194,1.0
|
46 |
+
0.0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct_torch.bfloat16_4bit_lf,0.7516666666666667,0.7949378981748352,0.7516666666666667,0.7572499605227642,0.9773333333333334
|
47 |
+
0.2,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-35_torch.bfloat16_4bit_lf,0.7583333333333333,0.8199928526815756,0.7583333333333333,0.782751089787442,1.0
|
48 |
+
0.4,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-70_torch.bfloat16_4bit_lf,0.7366666666666667,0.8224865755517643,0.7366666666666667,0.7700627366337021,1.0
|
49 |
+
0.6,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-105_torch.bfloat16_4bit_lf,0.757,0.8253824826209251,0.757,0.784000409833628,1.0
|
50 |
+
0.8,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-140_torch.bfloat16_4bit_lf,0.7893333333333333,0.8229104753645825,0.7893333333333333,0.8033124955993173,1.0
|
51 |
+
1.0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-175_torch.bfloat16_4bit_lf,0.7376666666666667,0.8243654864769323,0.7376666666666667,0.7699617360961548,1.0
|
52 |
+
1.2,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-210_torch.bfloat16_4bit_lf,0.763,0.8318882808702871,0.763,0.7901075708186186,1.0
|
53 |
+
1.4,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-245_torch.bfloat16_4bit_lf,0.7656666666666667,0.8288272203240518,0.7656666666666667,0.790627109330698,1.0
|
54 |
+
1.6,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-280_torch.bfloat16_4bit_lf,0.7693333333333333,0.8292798021666021,0.7693333333333333,0.7930169589012503,1.0
|
55 |
+
1.8,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-315_torch.bfloat16_4bit_lf,0.784,0.8354349234761956,0.784,0.804194683154365,1.0
|
56 |
+
2.0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-350_torch.bfloat16_4bit_lf,0.7736666666666666,0.8330147983140184,0.7736666666666666,0.7973657072550873,1.0
|
57 |
+
0.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat_torch.float16_lf,0.742,0.7477056799746837,0.742,0.7371050181385632,0.8033333333333333
|
58 |
+
0.2,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-35_torch.float16_lf,0.709,0.7987219597893886,0.709,0.7427961200958145,1.0
|
59 |
+
0.4,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-70_torch.float16_lf,0.7163333333333334,0.8058657875960304,0.7163333333333334,0.7487811196109319,0.9993333333333332
|
60 |
+
0.6,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-105_torch.float16_lf,0.6996666666666667,0.802722482275839,0.6996666666666667,0.7370938556711591,1.0
|
61 |
+
0.8,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-140_torch.float16_lf,0.7716666666666666,0.8092193821623755,0.7716666666666666,0.7864287269398251,1.0
|
62 |
+
1.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-175_torch.float16_lf,0.78,0.810582723471486,0.78,0.7924651054056209,1.0
|
63 |
+
1.2,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-210_torch.float16_lf,0.7313333333333333,0.8157783263996798,0.7313333333333333,0.7628807622782868,1.0
|
64 |
+
1.4,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-245_torch.float16_lf,0.751,0.8125856808988221,0.751,0.7745416635653988,1.0
|
65 |
+
1.6,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-280_torch.float16_lf,0.739,0.8097375095673094,0.739,0.7662329023371559,1.0
|
66 |
+
1.8,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-315_torch.float16_lf,0.7236666666666667,0.8145530585912838,0.7236666666666667,0.7580428816095297,1.0
|
67 |
+
2.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-350_torch.float16_lf,0.7293333333333333,0.8151184301713545,0.7293333333333333,0.7616699266814145,1.0
|
68 |
+
0.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat_torch.bfloat16_4bit_lf,0.7636666666666667,0.7806653325131986,0.7636666666666667,0.7525813484548423,0.0096666666666666
|
69 |
+
0.2,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-35_torch.bfloat16_4bit_lf,0.778,0.8148707737020212,0.778,0.7910805488003003,0.9996666666666668
|
70 |
+
0.4,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-70_torch.bfloat16_4bit_lf,0.7306666666666667,0.8145782271710159,0.7306666666666667,0.7624724104697406,1.0
|
71 |
+
0.6,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-105_torch.bfloat16_4bit_lf,0.7193333333333334,0.8213567226911125,0.7193333333333334,0.7560702640626931,1.0
|
72 |
+
0.8,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-140_torch.bfloat16_4bit_lf,0.7563333333333333,0.826789897753756,0.7563333333333333,0.7815164366677209,1.0
|
73 |
+
1.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-175_torch.bfloat16_4bit_lf,0.7963333333333333,0.8248972880055918,0.7963333333333333,0.8076868978089201,1.0
|
74 |
+
1.2,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-210_torch.bfloat16_4bit_lf,0.7326666666666667,0.8265345821998035,0.7326666666666667,0.7644418492070342,1.0
|
75 |
+
1.4,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-245_torch.bfloat16_4bit_lf,0.7556666666666667,0.8258994609525315,0.7556666666666667,0.7820405339757727,1.0
|
76 |
+
1.6,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-280_torch.bfloat16_4bit_lf,0.757,0.8264461657684251,0.757,0.7834496144681513,1.0
|
77 |
+
1.8,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-315_torch.bfloat16_4bit_lf,0.7546666666666667,0.8277723752096544,0.7546666666666667,0.7823584779069335,1.0
|
78 |
+
2.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-350_torch.bfloat16_4bit_lf,0.7496666666666667,0.8282310230333227,0.7496666666666667,0.7791947625361637,1.0
|
data/internlm2_5-20b-chat_metrics.csv
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
0.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat_torch.bfloat16_4bit_lf,0.564,0.7745256693833624,0.564,0.6352190975436365,0.6726666666666666
|
3 |
-
0.2,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-
|
4 |
-
0.4,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-
|
5 |
-
0.6,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-
|
6 |
-
0.8,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-
|
7 |
-
1.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-
|
8 |
-
1.2,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-
|
9 |
-
1.4,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-
|
10 |
-
1.6,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-
|
11 |
-
1.8,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-
|
12 |
-
2.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-
|
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
0.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat_torch.bfloat16_4bit_lf,0.564,0.7745256693833624,0.564,0.6352190975436365,0.6726666666666666
|
3 |
+
0.2,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-35_torch.bfloat16_4bit_lf,0.7576666666666667,0.7960640143421251,0.7576666666666667,0.769346697622254,1.0
|
4 |
+
0.4,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-70_torch.bfloat16_4bit_lf,0.7743333333333333,0.8042791719587958,0.7743333333333333,0.7849233169481004,1.0
|
5 |
+
0.6,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-105_torch.bfloat16_4bit_lf,0.7053333333333334,0.8070587351344375,0.7053333333333334,0.7421985241641746,1.0
|
6 |
+
0.8,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-140_torch.bfloat16_4bit_lf,0.795,0.817457691710893,0.795,0.8027552955647029,1.0
|
7 |
+
1.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-175_torch.bfloat16_4bit_lf,0.7786666666666666,0.8220512342362645,0.7786666666666666,0.7938353741035283,1.0
|
8 |
+
1.2,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-210_torch.bfloat16_4bit_lf,0.7516666666666667,0.8264680853251051,0.7516666666666667,0.7787088167337303,1.0
|
9 |
+
1.4,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-245_torch.bfloat16_4bit_lf,0.7876666666666666,0.8154190698395475,0.7876666666666666,0.7965399224841393,1.0
|
10 |
+
1.6,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-280_torch.bfloat16_4bit_lf,0.7753333333333333,0.8181125383376948,0.7753333333333333,0.7899794199099057,1.0
|
11 |
+
1.8,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-315_torch.bfloat16_4bit_lf,0.7583333333333333,0.8179523170315577,0.7583333333333333,0.7795358413482081,1.0
|
12 |
+
2.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-350_torch.bfloat16_4bit_lf,0.7616666666666667,0.8208475549648238,0.7616666666666667,0.7826736174247095,1.0
|
data/internlm2_5-7b-chat_metrics.csv
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
0.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat_torch.bfloat16_lf,0.705,0.7398041613378253,0.705,0.6906357423169466,1.0
|
3 |
-
0.2,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-
|
4 |
-
0.4,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-
|
5 |
-
0.6,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-
|
6 |
-
0.8,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-
|
7 |
-
1.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-
|
8 |
-
1.2,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-
|
9 |
-
1.4,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-
|
10 |
-
1.6,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-
|
11 |
-
1.8,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-
|
12 |
-
2.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-
|
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
0.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat_torch.bfloat16_lf,0.705,0.7398041613378253,0.705,0.6906357423169466,1.0
|
3 |
+
0.2,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-35_torch.bfloat16_lf,0.7193333333333334,0.7863486093365692,0.7193333333333334,0.7330498811142795,1.0
|
4 |
+
0.4,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-70_torch.bfloat16_lf,0.726,0.7900250828103491,0.726,0.7396583495246526,1.0
|
5 |
+
0.6,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-105_torch.bfloat16_lf,0.6736666666666666,0.8044565554629858,0.6736666666666666,0.7104123104529902,1.0
|
6 |
+
0.8,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-140_torch.bfloat16_lf,0.7496666666666667,0.8041871978859686,0.7496666666666667,0.7660159670998776,1.0
|
7 |
+
1.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-175_torch.bfloat16_lf,0.726,0.8094634420846424,0.726,0.751394838822856,1.0
|
8 |
+
1.2,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-210_torch.bfloat16_lf,0.7276666666666667,0.8039673699820601,0.7276666666666667,0.7488653386949028,1.0
|
9 |
+
1.4,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-245_torch.bfloat16_lf,0.747,0.8055537753403307,0.747,0.76527383722639,1.0
|
10 |
+
1.6,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-280_torch.bfloat16_lf,0.7166666666666667,0.8059535682746547,0.7166666666666667,0.7432427946178835,1.0
|
11 |
+
1.8,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-315_torch.bfloat16_lf,0.6983333333333334,0.8119110469658597,0.6983333333333334,0.7347246872892312,1.0
|
12 |
+
2.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-350_torch.bfloat16_lf,0.7076666666666667,0.8120132783051135,0.7076666666666667,0.7408145046817652,1.0
|
data/openai_metrics.csv
CHANGED
@@ -1,23 +1,26 @@
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
0,gpt-4o-mini,gpt-4o-mini/shots-00,0.7176666666666667,0.785706730193659,0.7176666666666667,0.7296061848734905,0.9916666666666667
|
3 |
-
5,gpt-4o-mini,gpt-4o-mini/shots-05,0.7176666666666667,0.7767294185987051,0.7176666666666667,0.7181068311028772,0.9996666666666667
|
4 |
-
10,gpt-4o-mini,gpt-4o-mini/shots-10,0.6793333333333333,0.7728086050218999,0.6793333333333333,0.6916749681933937,0.9983333333333333
|
5 |
-
20,gpt-4o-mini,gpt-4o-mini/shots-20,0.6623333333333333,0.7686706009175459,0.6623333333333333,0.6798015109939115,0.998
|
6 |
-
30,gpt-4o-mini,gpt-4o-mini/shots-30,0.6873333333333334,0.7684209723431035,0.6873333333333334,0.6913018667081989,0.999
|
7 |
-
40,gpt-4o-mini,gpt-4o-mini/shots-40,0.6923333333333334,0.7639874967862498,0.6923333333333334,0.6924934068935911,0.9986666666666667
|
8 |
-
50,gpt-4o-mini,gpt-4o-mini/shots-50,0.717,0.7692638634416518,0.717,0.7105227254860433,0.9993333333333333
|
9 |
0,gpt-4o,gpt-4o/shots-00,0.782,0.8204048322982596,0.782,0.7953019682198627,0.066
|
|
|
|
|
|
|
10 |
5,gpt-4o,gpt-4o/shots-05,0.7873333333333333,0.8230974205170392,0.7873333333333333,0.8000290527498529,0.998
|
|
|
|
|
|
|
11 |
10,gpt-4o,gpt-4o/shots-10,0.7916666666666666,0.8227707658360168,0.7916666666666666,0.803614688453356,0.9996666666666667
|
|
|
|
|
|
|
12 |
20,gpt-4o,gpt-4o/shots-20,0.7816666666666666,0.8204541793856629,0.7816666666666666,0.7967017169880498,0.9993333333333333
|
|
|
|
|
|
|
13 |
30,gpt-4o,gpt-4o/shots-30,0.7886666666666666,0.8260847852316618,0.7886666666666666,0.8030949295928699,0.999
|
|
|
|
|
14 |
40,gpt-4o,gpt-4o/shots-40,0.784,0.8233509309291644,0.784,0.7993336791122846,0.9973333333333333
|
|
|
|
|
15 |
50,gpt-4o,gpt-4o/shots-50,0.787,0.8234800466218334,0.787,0.8013530974301947,0.9993333333333333
|
16 |
-
|
17 |
-
5,o1-mini,o1-mini/shots-05,0.724,0.7905045610386181,0.724,0.7482963122126776,0.9966666666666667
|
18 |
-
10,o1-mini,o1-mini/shots-10,0.725,0.7892485648334764,0.725,0.7485623974683336,0.9943333333333333
|
19 |
-
20,o1-mini,o1-mini/shots-20,0.7343333333333333,0.786101455887261,0.7343333333333333,0.7535300565051624,0.9946666666666667
|
20 |
-
0,o1-preview,o1-preview/shots-00,0.721,0.7849371317342158,0.721,0.7451207069815194,0.998
|
21 |
-
5,o1-preview,o1-preview/shots-05,0.7313333333333333,0.7878283093765627,0.7313333333333333,0.7535489719321234,0.979
|
22 |
-
10,o1-preview,o1-preview/shots-10,0.749,0.7964482186234537,0.749,0.7677316493549238,0.9873333333333333
|
23 |
-
20,o1-preview,o1-preview/shots-20,0.7443333333333333,0.7911442834260676,0.7443333333333333,0.7625144090816939,0.9853333333333333
|
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
0,gpt-4o-mini,gpt-4o-mini/shots-00,0.7176666666666667,0.785706730193659,0.7176666666666667,0.7296061848734905,0.9916666666666667
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
0,gpt-4o,gpt-4o/shots-00,0.782,0.8204048322982596,0.782,0.7953019682198627,0.066
|
4 |
+
0,o1-mini,o1-mini/shots-00,0.7083333333333334,0.7848098266888749,0.7083333333333334,0.7377068425566796,0.999
|
5 |
+
0,o1-preview,o1-preview/shots-00,0.721,0.7849371317342158,0.721,0.7451207069815194,0.998
|
6 |
+
5,gpt-4o-mini,gpt-4o-mini/shots-05,0.7176666666666667,0.7767294185987051,0.7176666666666667,0.7181068311028772,0.9996666666666667
|
7 |
5,gpt-4o,gpt-4o/shots-05,0.7873333333333333,0.8230974205170392,0.7873333333333333,0.8000290527498529,0.998
|
8 |
+
5,o1-mini,o1-mini/shots-05,0.724,0.7905045610386181,0.724,0.7482963122126776,0.9966666666666667
|
9 |
+
5,o1-preview,o1-preview/shots-05,0.7313333333333333,0.7878283093765627,0.7313333333333333,0.7535489719321234,0.979
|
10 |
+
10,gpt-4o-mini,gpt-4o-mini/shots-10,0.6793333333333333,0.7728086050218999,0.6793333333333333,0.6916749681933937,0.9983333333333333
|
11 |
10,gpt-4o,gpt-4o/shots-10,0.7916666666666666,0.8227707658360168,0.7916666666666666,0.803614688453356,0.9996666666666667
|
12 |
+
10,o1-mini,o1-mini/shots-10,0.725,0.7892485648334764,0.725,0.7485623974683336,0.9943333333333333
|
13 |
+
10,o1-preview,o1-preview/shots-10,0.749,0.7964482186234537,0.749,0.7677316493549238,0.9873333333333333
|
14 |
+
20,gpt-4o-mini,gpt-4o-mini/shots-20,0.6623333333333333,0.7686706009175459,0.6623333333333333,0.6798015109939115,0.998
|
15 |
20,gpt-4o,gpt-4o/shots-20,0.7816666666666666,0.8204541793856629,0.7816666666666666,0.7967017169880498,0.9993333333333333
|
16 |
+
20,o1-mini,o1-mini/shots-20,0.7343333333333333,0.786101455887261,0.7343333333333333,0.7535300565051624,0.9946666666666667
|
17 |
+
20,o1-preview,o1-preview/shots-20,0.7443333333333333,0.7911442834260676,0.7443333333333333,0.7625144090816939,0.9853333333333333
|
18 |
+
30,gpt-4o-mini,gpt-4o-mini/shots-30,0.6873333333333334,0.7684209723431035,0.6873333333333334,0.6913018667081989,0.999
|
19 |
30,gpt-4o,gpt-4o/shots-30,0.7886666666666666,0.8260847852316618,0.7886666666666666,0.8030949295928699,0.999
|
20 |
+
30,o1-preview,o1-preview/shots-30,0.7473333333333333,0.7920604378746952,0.7473333333333333,0.7643977099599287,0.984
|
21 |
+
40,gpt-4o-mini,gpt-4o-mini/shots-40,0.6923333333333334,0.7639874967862498,0.6923333333333334,0.6924934068935911,0.9986666666666667
|
22 |
40,gpt-4o,gpt-4o/shots-40,0.784,0.8233509309291644,0.784,0.7993336791122846,0.9973333333333333
|
23 |
+
40,o1-preview,o1-preview/shots-40,0.7506666666666667,0.7964679024468982,0.7506666666666667,0.7674109766459014,0.984
|
24 |
+
50,gpt-4o-mini,gpt-4o-mini/shots-50,0.717,0.7692638634416518,0.717,0.7105227254860433,0.9993333333333333
|
25 |
50,gpt-4o,gpt-4o/shots-50,0.787,0.8234800466218334,0.787,0.8013530974301947,0.9993333333333333
|
26 |
+
50,o1-preview,o1-preview/shots-50,0.7546666666666667,0.7979981023789272,0.7546666666666667,0.7708181822112403,0.9816666666666667
|
|
|
|
|
|
|
|
|
|
|
|
|
|
datasets/mgtv/unique_translations.csv
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e99225113dcee9de7882586a73e8e741ccd90d55b12341bf4625811a7324ec58
|
3 |
+
size 1619162
|
llm_toolkit/logical_reasoning_utils.py
CHANGED
@@ -116,6 +116,9 @@ P2_few_shot = """你是一个情景猜谜游戏的主持人。游戏规则如下
|
|
116 |
回答:
|
117 |
"""
|
118 |
|
|
|
|
|
|
|
119 |
|
120 |
def get_prompt_template(using_p1=True, chinese_prompt=True):
|
121 |
if using_p1:
|
@@ -399,27 +402,31 @@ def load_alpaca_data(data_path, using_p1=True, use_english_datasets=False):
|
|
399 |
|
400 |
|
401 |
def plot_value_counts(df, column_name, offset=0.1, title=None, preprocess_func=None):
|
402 |
-
font_family = rcParams["font.family"]
|
403 |
-
# Set the font to SimHei to support Chinese characters
|
404 |
-
rcParams["font.family"] = "
|
405 |
-
rcParams["axes.unicode_minus"] = (
|
406 |
-
|
407 |
-
)
|
408 |
if preprocess_func:
|
409 |
df["backup"] = df[column_name]
|
410 |
df[column_name] = df[column_name].apply(preprocess_func)
|
411 |
|
412 |
plt.figure(figsize=(8, 4))
|
413 |
-
|
|
|
|
|
|
|
|
|
414 |
# add values on top of bars
|
415 |
-
for i, v in enumerate(
|
416 |
plt.text(i, v + offset, str(v), ha="center")
|
417 |
|
418 |
plt.xlabel(title or column_name)
|
419 |
|
420 |
plt.show()
|
421 |
|
422 |
-
rcParams["font.family"] = font_family
|
423 |
|
424 |
if preprocess_func:
|
425 |
plot_confusion_matrix(df["label"], df[column_name])
|
@@ -432,12 +439,12 @@ def calc_metrics_for_col(df, col):
|
|
432 |
return metrics["accuracy"], metrics["precision"], metrics["recall"], metrics["f1"]
|
433 |
|
434 |
|
435 |
-
def get_metrics_df(df, variant="epoch"):
|
436 |
perf_df = pd.DataFrame(
|
437 |
columns=[variant, "model", "run", "accuracy", "precision", "recall", "f1"]
|
438 |
)
|
439 |
columns = (
|
440 |
-
df.columns[5:]
|
441 |
if variant == "index"
|
442 |
else [
|
443 |
col
|
@@ -445,6 +452,8 @@ def get_metrics_df(df, variant="epoch"):
|
|
445 |
if variant in col or variant == "epoch" and "_torch." in col
|
446 |
]
|
447 |
)
|
|
|
|
|
448 |
print("columns:", columns)
|
449 |
for i, col in enumerate(columns):
|
450 |
metrics = calc_metrics(df["label"], df[col], debug=False)
|
@@ -592,21 +601,22 @@ def eval_openai(
|
|
592 |
|
593 |
|
594 |
def plot_confusion_matrix(y_true, y_pred, title="Confusion Matrix"):
|
595 |
-
font_family = rcParams["font.family"]
|
596 |
-
# Set the font to SimHei to support Chinese characters
|
597 |
-
rcParams["font.family"] = "
|
598 |
-
rcParams["axes.unicode_minus"] = (
|
599 |
-
|
600 |
-
)
|
601 |
|
602 |
labels = np.unique(y_true)
|
603 |
-
|
604 |
y_pred = [extract_answer(text) for text in y_pred]
|
605 |
|
606 |
-
cm = confusion_matrix(y_true, y_pred)
|
607 |
cm = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis]
|
608 |
|
609 |
fig, ax = plt.subplots(figsize=(8, 8))
|
|
|
|
|
610 |
sns.heatmap(
|
611 |
cm,
|
612 |
annot=True,
|
@@ -620,7 +630,7 @@ def plot_confusion_matrix(y_true, y_pred, title="Confusion Matrix"):
|
|
620 |
ax.set_ylabel("True labels")
|
621 |
plt.show()
|
622 |
|
623 |
-
rcParams["font.family"] = font_family
|
624 |
|
625 |
|
626 |
def majority_vote(r1, r2, r3):
|
|
|
116 |
回答:
|
117 |
"""
|
118 |
|
119 |
+
df_translation = pd.read_csv("datasets/mgtv/unique_translations.csv")
|
120 |
+
translation_dict = df_translation.set_index("chinese").to_dict(orient="index")
|
121 |
+
translation_dict = {k: v["english"] for k, v in translation_dict.items()}
|
122 |
|
123 |
def get_prompt_template(using_p1=True, chinese_prompt=True):
|
124 |
if using_p1:
|
|
|
402 |
|
403 |
|
404 |
def plot_value_counts(df, column_name, offset=0.1, title=None, preprocess_func=None):
|
405 |
+
# font_family = rcParams["font.family"]
|
406 |
+
# # Set the font to SimHei to support Chinese characters
|
407 |
+
# rcParams["font.family"] = "SimHei"
|
408 |
+
# rcParams["axes.unicode_minus"] = (
|
409 |
+
# False # This is to support the minus sign in Chinese.
|
410 |
+
# )
|
411 |
if preprocess_func:
|
412 |
df["backup"] = df[column_name]
|
413 |
df[column_name] = df[column_name].apply(preprocess_func)
|
414 |
|
415 |
plt.figure(figsize=(8, 4))
|
416 |
+
|
417 |
+
value_counts = df[column_name].value_counts()
|
418 |
+
value_counts = value_counts.rename(index=translation_dict)
|
419 |
+
value_counts.plot(kind="bar")
|
420 |
+
|
421 |
# add values on top of bars
|
422 |
+
for i, v in enumerate(value_counts):
|
423 |
plt.text(i, v + offset, str(v), ha="center")
|
424 |
|
425 |
plt.xlabel(title or column_name)
|
426 |
|
427 |
plt.show()
|
428 |
|
429 |
+
# rcParams["font.family"] = font_family
|
430 |
|
431 |
if preprocess_func:
|
432 |
plot_confusion_matrix(df["label"], df[column_name])
|
|
|
439 |
return metrics["accuracy"], metrics["precision"], metrics["recall"], metrics["f1"]
|
440 |
|
441 |
|
442 |
+
def get_metrics_df(df, variant="epoch", sort_columns=True):
|
443 |
perf_df = pd.DataFrame(
|
444 |
columns=[variant, "model", "run", "accuracy", "precision", "recall", "f1"]
|
445 |
)
|
446 |
columns = (
|
447 |
+
df.columns[5:].tolist()
|
448 |
if variant == "index"
|
449 |
else [
|
450 |
col
|
|
|
452 |
if variant in col or variant == "epoch" and "_torch." in col
|
453 |
]
|
454 |
)
|
455 |
+
if sort_columns:
|
456 |
+
columns = sorted(columns, key=lambda x: int(x.lower().replace("-1m", "").replace("chat", "0").replace("instruct", "0").split("-")[-1].split("_")[0]))
|
457 |
print("columns:", columns)
|
458 |
for i, col in enumerate(columns):
|
459 |
metrics = calc_metrics(df["label"], df[col], debug=False)
|
|
|
601 |
|
602 |
|
603 |
def plot_confusion_matrix(y_true, y_pred, title="Confusion Matrix"):
|
604 |
+
# font_family = rcParams["font.family"]
|
605 |
+
# # Set the font to SimHei to support Chinese characters
|
606 |
+
# rcParams["font.family"] = "SimHei"
|
607 |
+
# rcParams["axes.unicode_minus"] = (
|
608 |
+
# False # This is to support the minus sign in Chinese.
|
609 |
+
# )
|
610 |
|
611 |
labels = np.unique(y_true)
|
|
|
612 |
y_pred = [extract_answer(text) for text in y_pred]
|
613 |
|
614 |
+
cm = confusion_matrix(y_true, y_pred, labels=labels)
|
615 |
cm = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis]
|
616 |
|
617 |
fig, ax = plt.subplots(figsize=(8, 8))
|
618 |
+
labels = [translation_dict[x] for x in labels]
|
619 |
+
|
620 |
sns.heatmap(
|
621 |
cm,
|
622 |
annot=True,
|
|
|
630 |
ax.set_ylabel("True labels")
|
631 |
plt.show()
|
632 |
|
633 |
+
# rcParams["font.family"] = font_family
|
634 |
|
635 |
|
636 |
def majority_vote(r1, r2, r3):
|
notebooks/00_Data Analysis.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/01a_internlm2_5-20b-chat_analysis.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/01a_internlm2_5-7b-chat-1m_analysis.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/01a_internlm2_5-7b-chat_analysis.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/01b_Mistral-7B-v0.3-Chinese-Chat_analysis.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/02a_Qwen2-7B-Instruct_analysis.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/02b_Qwen2-72B-Instruct_analysis.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/02c_Qwen2.5-3B-Instruct_analysis.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/03a_Llama3.1-8B-Chinese-Chat_analysis.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/03b_Llama3.1-70B-Chinese-Chat_analysis.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/04b_OpenAI-Models_analysis.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
scripts/eval-mgtv-qwen2.5.sh
CHANGED
@@ -13,12 +13,18 @@ cat /etc/os-release
|
|
13 |
lscpu
|
14 |
grep MemTotal /proc/meminfo
|
15 |
|
|
|
|
|
|
|
|
|
16 |
$BASEDIR/scripts/eval-shots.sh Qwen Qwen2.5-3B-Instruct
|
17 |
|
18 |
-
$BASEDIR/scripts/eval-epochs.sh Qwen Qwen2.5-
|
|
|
|
|
19 |
|
20 |
-
$BASEDIR/scripts/eval-shots.sh Qwen Qwen2.5-1.5B-Instruct
|
21 |
|
22 |
-
$BASEDIR/scripts/eval-epochs.sh Qwen Qwen2.5-0.5B-Instruct
|
23 |
|
24 |
-
$BASEDIR/scripts/eval-shots.sh Qwen Qwen2.5-0.5B-Instruct
|
|
|
13 |
lscpu
|
14 |
grep MemTotal /proc/meminfo
|
15 |
|
16 |
+
$BASEDIR/scripts/eval-shots.sh Qwen Qwen2.5-7B-Instruct
|
17 |
+
|
18 |
+
$BASEDIR/scripts/eval-epochs.sh Qwen Qwen2.5-7B-Instruct
|
19 |
+
|
20 |
$BASEDIR/scripts/eval-shots.sh Qwen Qwen2.5-3B-Instruct
|
21 |
|
22 |
+
$BASEDIR/scripts/eval-epochs.sh Qwen Qwen2.5-3B-Instruct
|
23 |
+
|
24 |
+
# $BASEDIR/scripts/eval-epochs.sh Qwen Qwen2.5-1.5B-Instruct
|
25 |
|
26 |
+
# $BASEDIR/scripts/eval-shots.sh Qwen Qwen2.5-1.5B-Instruct
|
27 |
|
28 |
+
# $BASEDIR/scripts/eval-epochs.sh Qwen Qwen2.5-0.5B-Instruct
|
29 |
|
30 |
+
# $BASEDIR/scripts/eval-shots.sh Qwen Qwen2.5-0.5B-Instruct
|
scripts/tune-mgtv-bf16.sh
CHANGED
@@ -25,10 +25,12 @@ export LOGICAL_REASONING_RESULTS_PATH=results/mgtv-results_bf16.csv
|
|
25 |
|
26 |
# $BASEDIR/scripts/tune-lf_v2.sh internlm internlm2_5-7b-chat intern2
|
27 |
|
28 |
-
$BASEDIR/scripts/tune-lf_v2.sh Qwen Qwen2.5-7B-Instruct qwen
|
29 |
|
30 |
-
|
31 |
|
32 |
#$BASEDIR/scripts/tune-lf_v2.sh Qwen Qwen2.5-1.5B-Instruct qwen
|
33 |
|
34 |
#$BASEDIR/scripts/tune-lf_v2.sh Qwen Qwen2.5-0.5B-Instruct qwen
|
|
|
|
|
|
25 |
|
26 |
# $BASEDIR/scripts/tune-lf_v2.sh internlm internlm2_5-7b-chat intern2
|
27 |
|
28 |
+
# $BASEDIR/scripts/tune-lf_v2.sh Qwen Qwen2.5-7B-Instruct qwen
|
29 |
|
30 |
+
$BASEDIR/scripts/tune-lf_v2.sh Qwen Qwen2.5-3B-Instruct qwen
|
31 |
|
32 |
#$BASEDIR/scripts/tune-lf_v2.sh Qwen Qwen2.5-1.5B-Instruct qwen
|
33 |
|
34 |
#$BASEDIR/scripts/tune-lf_v2.sh Qwen Qwen2.5-0.5B-Instruct qwen
|
35 |
+
|
36 |
+
$BASEDIR/scripts/eval-mgtv-shots-qwen2.5.sh
|