dh-mc commited on
Commit
8157c36
1 Parent(s): 9a8598f

ready for final run

Browse files
Files changed (48) hide show
  1. data/Llama3.1-70B-Chinese-Chat_metrics.csv +11 -11
  2. data/Llama3.1-70B-Chinese-Chat_shots_metrics.csv +5 -5
  3. data/Llama3.1-8B-Chinese-Chat_metrics.csv +11 -11
  4. data/Llama3.1-8B-Chinese-Chat_shots_metrics.csv +7 -5
  5. data/Mistral-7B-v0.3-Chinese-Chat_metrics.csv +11 -11
  6. data/Mistral-7B-v0.3-Chinese-Chat_shots_metrics.csv +5 -5
  7. data/Qwen2-72B-Instruct_metrics.csv +11 -11
  8. data/Qwen2-72B-Instruct_shots_metrics.csv +1 -1
  9. data/Qwen2-7B-Instruct_metrics.csv +11 -11
  10. data/Qwen2-7B-Instruct_shots_metrics.csv +2 -2
  11. data/Qwen2.5-0.5B-Instruct_metrics.csv +11 -11
  12. data/Qwen2.5-0.5B-Instruct_shots_metrics.csv +7 -7
  13. data/Qwen2.5-1.5B-Instruct_metrics.csv +11 -11
  14. data/Qwen2.5-1.5B-Instruct_shots_metrics.csv +7 -7
  15. data/Qwen2.5-72B-Instruct_metrics.csv +11 -4
  16. data/Qwen2.5-72B-Instruct_shots_metrics.csv +3 -1
  17. data/all_model_token_counts.csv +49 -28
  18. data/best_metrics.csv +15 -13
  19. data/best_results.csv +0 -0
  20. data/few-shots_metrics.csv +90 -39
  21. data/fine-tuning_metrics.csv +121 -77
  22. data/internlm2_5-20b-chat_metrics.csv +11 -11
  23. data/internlm2_5-20b-chat_shots_metrics.csv +1 -1
  24. data/internlm2_5-7b-chat-1m_metrics.csv +11 -11
  25. data/internlm2_5-7b-chat-1m_shots_metrics.csv +7 -6
  26. data/internlm2_5-7b-chat_metrics.csv +11 -11
  27. data/internlm2_5-7b-chat_shots_metrics.csv +7 -5
  28. data/openai_metrics.csv +28 -25
  29. datasets/mgtv/train.csv +2 -2
  30. datasets/mgtv/val.csv +0 -0
  31. llm_toolkit/logical_reasoning_utils.py +8 -3
  32. notebooks/00_Data Analysis.ipynb +0 -0
  33. notebooks/01a_internlm2_5-20b-chat_analysis.ipynb +0 -0
  34. notebooks/01a_internlm2_5-7b-chat-1m_analysis.ipynb +0 -0
  35. notebooks/01a_internlm2_5-7b-chat_analysis.ipynb +0 -0
  36. notebooks/01b_Mistral-7B-v0.3-Chinese-Chat_analysis.ipynb +0 -0
  37. notebooks/02a_Qwen2-7B-Instruct_analysis.ipynb +0 -0
  38. notebooks/02b_Qwen2-72B-Instruct_analysis.ipynb +0 -0
  39. notebooks/02e_Qwen2.5-1.5B-Instruct_analysis.ipynb +0 -0
  40. notebooks/02f_Qwen2.5-0.5B-Instruct_analysis.ipynb +0 -0
  41. notebooks/02g_Qwen2.5-72B-Instruct_analysis.ipynb +0 -0
  42. notebooks/03a_Llama3.1-8B-Chinese-Chat_analysis.ipynb +0 -0
  43. notebooks/03b_Llama3.1-70B-Chinese-Chat_analysis.ipynb +0 -0
  44. notebooks/04b_OpenAI-Models_analysis.ipynb +0 -0
  45. notebooks/06b_Open-Source-Models_analysis.ipynb +0 -0
  46. scripts/eval-mgtv-qwen2.5_4bit.sh +6 -5
  47. scripts/eval-mgtv.sh +1 -1
  48. scripts/eval-shots.sh +2 -7
data/Llama3.1-70B-Chinese-Chat_metrics.csv CHANGED
@@ -1,12 +1,12 @@
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat_torch.bfloat16_4bit_lf,0.7636666666666667,0.7806653325131986,0.7636666666666667,0.7525813484548423,0.009666666666666667
3
- 0.2,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-35_torch.bfloat16_4bit_lf,0.778,0.8148707737020212,0.778,0.7910805488003003,0.9996666666666667
4
- 0.4,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-70_torch.bfloat16_4bit_lf,0.7306666666666667,0.8145782271710159,0.7306666666666667,0.7624724104697406,1.0
5
- 0.6,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-105_torch.bfloat16_4bit_lf,0.7193333333333334,0.8213567226911125,0.7193333333333334,0.7560702640626931,1.0
6
- 0.8,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-140_torch.bfloat16_4bit_lf,0.7563333333333333,0.826789897753756,0.7563333333333333,0.7815164366677209,1.0
7
- 1.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-175_torch.bfloat16_4bit_lf,0.7963333333333333,0.8248972880055918,0.7963333333333333,0.8076868978089201,1.0
8
- 1.2,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-210_torch.bfloat16_4bit_lf,0.7326666666666667,0.8265345821998035,0.7326666666666667,0.7644418492070342,1.0
9
- 1.4,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-245_torch.bfloat16_4bit_lf,0.7556666666666667,0.8258994609525315,0.7556666666666667,0.7820405339757727,1.0
10
- 1.6,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-280_torch.bfloat16_4bit_lf,0.757,0.8264461657684251,0.757,0.7834496144681513,1.0
11
- 1.8,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-315_torch.bfloat16_4bit_lf,0.7546666666666667,0.8277723752096544,0.7546666666666667,0.7823584779069335,1.0
12
- 2.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-350_torch.bfloat16_4bit_lf,0.7496666666666667,0.8282310230333227,0.7496666666666667,0.7791947625361637,1.0
 
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat_torch.bfloat16_4bit_lf,0.7646666666666667,0.7804609488644828,0.7646666666666667,0.7497548621711109,0.009666666666666667
3
+ 0.2,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-35_torch.bfloat16_4bit_lf,0.784,0.8105343792887019,0.784,0.7931742141608462,0.9996666666666667
4
+ 0.4,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-70_torch.bfloat16_4bit_lf,0.7426666666666667,0.8117033235947096,0.7426666666666667,0.7673825750808414,1.0
5
+ 0.6,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-105_torch.bfloat16_4bit_lf,0.736,0.8227236574891071,0.736,0.7650739090144549,1.0
6
+ 0.8,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-140_torch.bfloat16_4bit_lf,0.7686666666666667,0.8259659464402258,0.7686666666666667,0.7880870865039342,1.0
7
+ 1.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-175_torch.bfloat16_4bit_lf,0.809,0.8282732906153989,0.809,0.8166997776775797,1.0
8
+ 1.2,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-210_torch.bfloat16_4bit_lf,0.75,0.8287348768409003,0.75,0.7741734526674708,1.0
9
+ 1.4,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-245_torch.bfloat16_4bit_lf,0.7703333333333333,0.8271894042316865,0.7703333333333333,0.7907617274354051,1.0
10
+ 1.6,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-280_torch.bfloat16_4bit_lf,0.776,0.8315436250878178,0.776,0.7959870550088912,1.0
11
+ 1.8,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-315_torch.bfloat16_4bit_lf,0.7733333333333333,0.8327336470976,0.7733333333333333,0.7947537193805649,1.0
12
+ 2.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-350_torch.bfloat16_4bit_lf,0.7686666666666667,0.8329633784586954,0.7686666666666667,0.7914454794587963,1.0
data/Llama3.1-70B-Chinese-Chat_shots_metrics.csv CHANGED
@@ -1,6 +1,6 @@
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-00,0.7636666666666667,0.7806653325131986,0.7636666666666667,0.7525813484548423,0.009666666666666667
3
- 5,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-05,0.7536666666666667,0.772126097633354,0.7536666666666667,0.7545029613768596,0.79
4
- 10,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-10,0.754,0.7729477984842943,0.754,0.756682017266956,0.8326666666666667
5
- 20,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-20,0.738,0.7566938786102072,0.738,0.7348961489952073,0.819
6
- 30,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-30,0.758,0.7731535340331644,0.758,0.7565012256889623,0.548
 
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-00,0.7646666666666667,0.7804609488644828,0.7646666666666667,0.7497548621711109,0.009666666666666667
3
+ 5,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-05,0.754,0.7675695134276339,0.754,0.7530665717237273,0.79
4
+ 10,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-10,0.756,0.7695738042762151,0.756,0.7563878737797524,0.8326666666666667
5
+ 20,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-20,0.7406666666666667,0.7560876641054418,0.7406666666666667,0.7360011002310723,0.819
6
+ 30,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-30,0.7603333333333333,0.7710641222872985,0.7603333333333333,0.7570501796584528,0.548
data/Llama3.1-8B-Chinese-Chat_metrics.csv CHANGED
@@ -1,12 +1,12 @@
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat_torch.float16_lf,0.742,0.7477056799746837,0.742,0.7371050181385632,0.8033333333333333
3
- 0.2,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-35_torch.float16_lf,0.709,0.7987219597893886,0.709,0.7427961200958145,1.0
4
- 0.4,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-70_torch.float16_lf,0.7163333333333334,0.8058657875960304,0.7163333333333334,0.7487811196109319,0.9993333333333333
5
- 0.6,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-105_torch.float16_lf,0.6996666666666667,0.802722482275839,0.6996666666666667,0.7370938556711591,1.0
6
- 0.8,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-140_torch.float16_lf,0.7716666666666666,0.8092193821623755,0.7716666666666666,0.7864287269398251,1.0
7
- 1.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-175_torch.float16_lf,0.78,0.810582723471486,0.78,0.7924651054056209,1.0
8
- 1.2,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-210_torch.float16_lf,0.7313333333333333,0.8157783263996798,0.7313333333333333,0.7628807622782868,1.0
9
- 1.4,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-245_torch.float16_lf,0.751,0.8125856808988221,0.751,0.7745416635653988,1.0
10
- 1.6,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-280_torch.float16_lf,0.739,0.8097375095673094,0.739,0.7662329023371559,1.0
11
- 1.8,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-315_torch.float16_lf,0.7236666666666667,0.8145530585912838,0.7236666666666667,0.7580428816095297,1.0
12
- 2.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-350_torch.float16_lf,0.7293333333333333,0.8151184301713545,0.7293333333333333,0.7616699266814145,1.0
 
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat_torch.float16_lf,0.7343333333333333,0.7375752740091942,0.7343333333333333,0.7270283652909943,0.8033333333333333
3
+ 0.2,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-35_torch.float16_lf,0.717,0.7933072428707201,0.717,0.7447412977676989,1.0
4
+ 0.4,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-70_torch.float16_lf,0.7226666666666667,0.7983383063141186,0.7226666666666667,0.7489397350174751,0.9993333333333333
5
+ 0.6,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-105_torch.float16_lf,0.7083333333333334,0.7967030927405547,0.7083333333333334,0.738836849803633,1.0
6
+ 0.8,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-140_torch.float16_lf,0.7773333333333333,0.805139129977305,0.7773333333333333,0.7882159693114585,1.0
7
+ 1.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-175_torch.float16_lf,0.7853333333333333,0.8062405645226312,0.7853333333333333,0.7938991590982061,1.0
8
+ 1.2,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-210_torch.float16_lf,0.7436666666666667,0.8148316221752646,0.7436666666666667,0.7689773286065246,1.0
9
+ 1.4,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-245_torch.float16_lf,0.759,0.8080929326806991,0.759,0.7772842274293189,1.0
10
+ 1.6,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-280_torch.float16_lf,0.745,0.8027959680086005,0.745,0.7666181725503965,1.0
11
+ 1.8,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-315_torch.float16_lf,0.7303333333333333,0.806805925253305,0.7303333333333333,0.7580841794383364,1.0
12
+ 2.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-350_torch.float16_lf,0.737,0.808786608325944,0.737,0.7629963845364953,1.0
data/Llama3.1-8B-Chinese-Chat_shots_metrics.csv CHANGED
@@ -1,6 +1,8 @@
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-00,0.742,0.7477056799746837,0.742,0.7371050181385632,0.8033333333333333
3
- 5,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-05,0.7056666666666667,0.7605745196939752,0.7056666666666667,0.7269189565098723,0.9886666666666667
4
- 10,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-10,0.6676666666666666,0.7834080522821993,0.6676666666666666,0.7082605860921491,0.9623333333333334
5
- 20,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-20,0.767,0.7690587905035869,0.767,0.7661695279121855,0.979
6
- 30,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-30,0.7693333333333333,0.7765844200886581,0.7693333333333333,0.7697325957683855,0.7326666666666667
 
 
 
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-00,0.7343333333333333,0.7375752740091942,0.7343333333333333,0.7270283652909943,0.8033333333333333
3
+ 5,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-05,0.7056666666666667,0.7508515184863084,0.7056666666666667,0.7230574380518462,0.9886666666666667
4
+ 10,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-10,0.6736666666666666,0.7776004745989736,0.6736666666666666,0.7094104807112239,0.9623333333333334
5
+ 20,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-20,0.767,0.764982587229615,0.767,0.7638473265780445,0.979
6
+ 30,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-30,0.7713333333333333,0.7725685630276532,0.7713333333333333,0.7692692690410152,0.7326666666666667
7
+ 40,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-40,0.6873333333333334,0.773294758147205,0.6873333333333334,0.7075877720686631,0.759
8
+ 50,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-50,0.7176666666666667,0.7599215931134234,0.7176666666666667,0.7203550920641806,0.6623333333333333
data/Mistral-7B-v0.3-Chinese-Chat_metrics.csv CHANGED
@@ -1,12 +1,12 @@
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat_torch.float16_lf,0.6946666666666667,0.701136267898111,0.6946666666666667,0.6634078645357937,0.011666666666666667
3
- 0.2,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-35_torch.float16_lf,0.702,0.7932731014186957,0.702,0.7342714734731689,1.0
4
- 0.4,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-70_torch.float16_lf,0.742,0.78982949223512,0.742,0.7536681109811127,1.0
5
- 0.6,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-105_torch.float16_lf,0.6596666666666666,0.7923396753604393,0.6596666666666666,0.7067542301676931,1.0
6
- 0.8,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-140_torch.float16_lf,0.7146666666666667,0.7861341885687435,0.7146666666666667,0.7404677278137267,1.0
7
- 1.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-175_torch.float16_lf,0.7326666666666667,0.7876867721932461,0.7326666666666667,0.7471869515031995,1.0
8
- 1.2,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-210_torch.float16_lf,0.7016666666666667,0.7903119228393193,0.7016666666666667,0.7348708822385348,1.0
9
- 1.4,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-245_torch.float16_lf,0.75,0.7885868317699068,0.75,0.7648234347578796,1.0
10
- 1.6,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-280_torch.float16_lf,0.7156666666666667,0.7846106674095725,0.7156666666666667,0.7410042005708856,1.0
11
- 1.8,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-315_torch.float16_lf,0.6916666666666667,0.7864256994491394,0.6916666666666667,0.7257499426487266,1.0
12
- 2.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-350_torch.float16_lf,0.6976666666666667,0.7889443494370009,0.6976666666666667,0.7307996137659796,1.0
 
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat_torch.float16_lf,0.6923333333333334,0.7009179792741449,0.6923333333333334,0.6605899639694456,0.011666666666666667
3
+ 0.2,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-35_torch.float16_lf,0.706,0.7832545046834243,0.706,0.7323466131711432,1.0
4
+ 0.4,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-70_torch.float16_lf,0.7476666666666667,0.7836120158306894,0.7476666666666667,0.7557791381509955,1.0
5
+ 0.6,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-105_torch.float16_lf,0.6736666666666666,0.7908140272002406,0.6736666666666666,0.7129951145360993,1.0
6
+ 0.8,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-140_torch.float16_lf,0.7293333333333333,0.788387677637057,0.7293333333333333,0.7494137469900564,1.0
7
+ 1.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-175_torch.float16_lf,0.74,0.7833068129490098,0.74,0.7499935485741815,1.0
8
+ 1.2,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-210_torch.float16_lf,0.7146666666666667,0.7890760288118991,0.7146666666666667,0.7411240160229633,1.0
9
+ 1.4,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-245_torch.float16_lf,0.7616666666666667,0.789634957005121,0.7616666666666667,0.7721210086098353,1.0
10
+ 1.6,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-280_torch.float16_lf,0.7296666666666667,0.7854982015370922,0.7296666666666667,0.7491267995936699,1.0
11
+ 1.8,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-315_torch.float16_lf,0.7076666666666667,0.7877874532247918,0.7076666666666667,0.7346283562321456,1.0
12
+ 2.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-350_torch.float16_lf,0.713,0.7895690867103055,0.713,0.739013227401175,1.0
data/Mistral-7B-v0.3-Chinese-Chat_shots_metrics.csv CHANGED
@@ -1,6 +1,6 @@
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-00,0.6946666666666667,0.701136267898111,0.6946666666666667,0.6634078645357937,0.011666666666666667
3
- 5,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-05,0.6446666666666667,0.7451807329096397,0.6446666666666667,0.681030628954011,0.142
4
- 10,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-10,0.6036666666666667,0.7334913867282189,0.6036666666666667,0.6493185547247415,0.10633333333333334
5
- 20,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-20,0.6276666666666667,0.7398894455389585,0.6276666666666667,0.6690543758928521,0.08266666666666667
6
- 30,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-30,0.661,0.7422079284443324,0.661,0.6862974695781847,0.07
 
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-00,0.6923333333333334,0.7009179792741449,0.6923333333333334,0.6605899639694456,0.011666666666666667
3
+ 5,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-05,0.6546666666666666,0.7415422757067709,0.6546666666666666,0.684189810233595,0.142
4
+ 10,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-10,0.612,0.7259976964524691,0.612,0.6501410678512595,0.10633333333333334
5
+ 20,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-20,0.6336666666666667,0.7315100617022602,0.6336666666666667,0.6683245802083553,0.08266666666666667
6
+ 30,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-30,0.665,0.7374233826761456,0.665,0.6872462947319797,0.07
data/Qwen2-72B-Instruct_metrics.csv CHANGED
@@ -1,12 +1,12 @@
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0.0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct_torch.bfloat16_4bit_lf,0.7516666666666667,0.7949378981748352,0.7516666666666667,0.7572499605227642,0.9773333333333334
3
- 0.2,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-35_torch.bfloat16_4bit_lf,0.7583333333333333,0.8199928526815756,0.7583333333333333,0.782751089787442,1.0
4
- 0.4,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-70_torch.bfloat16_4bit_lf,0.7366666666666667,0.8224865755517643,0.7366666666666667,0.7700627366337021,1.0
5
- 0.6,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-105_torch.bfloat16_4bit_lf,0.757,0.8253824826209251,0.757,0.784000409833628,1.0
6
- 0.8,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-140_torch.bfloat16_4bit_lf,0.7893333333333333,0.8229104753645825,0.7893333333333333,0.8033124955993173,1.0
7
- 1.0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-175_torch.bfloat16_4bit_lf,0.7376666666666667,0.8243654864769323,0.7376666666666667,0.7699617360961548,1.0
8
- 1.2,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-210_torch.bfloat16_4bit_lf,0.763,0.8318882808702871,0.763,0.7901075708186186,1.0
9
- 1.4,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-245_torch.bfloat16_4bit_lf,0.7656666666666667,0.8288272203240518,0.7656666666666667,0.790627109330698,1.0
10
- 1.6,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-280_torch.bfloat16_4bit_lf,0.7693333333333333,0.8292798021666021,0.7693333333333333,0.7930169589012503,1.0
11
- 1.8,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-315_torch.bfloat16_4bit_lf,0.784,0.8354349234761956,0.784,0.804194683154365,1.0
12
- 2.0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-350_torch.bfloat16_4bit_lf,0.7736666666666666,0.8330147983140184,0.7736666666666666,0.7973657072550873,1.0
 
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0.0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct_torch.bfloat16_4bit_lf,0.757,0.7973819870472458,0.757,0.7602606947698078,0.9773333333333334
3
+ 0.2,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-35_torch.bfloat16_4bit_lf,0.772,0.8214192168152544,0.772,0.7910898276003457,1.0
4
+ 0.4,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-70_torch.bfloat16_4bit_lf,0.757,0.828747966447233,0.757,0.783516715780864,1.0
5
+ 0.6,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-105_torch.bfloat16_4bit_lf,0.772,0.8277697933855978,0.772,0.7932982172336923,1.0
6
+ 0.8,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-140_torch.bfloat16_4bit_lf,0.8036666666666666,0.8277228453985896,0.8036666666666666,0.8136774676398189,1.0
7
+ 1.0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-175_torch.bfloat16_4bit_lf,0.753,0.8267761287574541,0.753,0.7793434248302783,1.0
8
+ 1.2,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-210_torch.bfloat16_4bit_lf,0.7793333333333333,0.8358618807490109,0.7793333333333333,0.800734522365308,1.0
9
+ 1.4,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-245_torch.bfloat16_4bit_lf,0.7883333333333333,0.8390667295473608,0.7883333333333333,0.8075446360016978,1.0
10
+ 1.6,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-280_torch.bfloat16_4bit_lf,0.7856666666666666,0.8333912862981965,0.7856666666666666,0.8038536915174684,1.0
11
+ 1.8,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-315_torch.bfloat16_4bit_lf,0.805,0.8442903406198344,0.805,0.8197956174225439,1.0
12
+ 2.0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-350_torch.bfloat16_4bit_lf,0.7936666666666666,0.8399561173931658,0.7936666666666666,0.8112524138737499,1.0
data/Qwen2-72B-Instruct_shots_metrics.csv CHANGED
@@ -1,2 +1,2 @@
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct_torch/shots-00,0.7516666666666667,0.7949378981748352,0.7516666666666667,0.7572499605227642,0.9773333333333334
 
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct_torch/shots-00,0.757,0.7973819870472458,0.757,0.7602606947698078,0.9773333333333334
data/Qwen2-7B-Instruct_metrics.csv CHANGED
@@ -1,12 +1,12 @@
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0.0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct_torch.float16_lf,0.683,0.7493103872717293,0.683,0.710140098232232,0.9996666666666667
3
- 0.2,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-35_torch.float16_lf,0.725,0.7840171468707405,0.725,0.748994536667058,0.9996666666666667
4
- 0.4,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-70_torch.float16_lf,0.759,0.8005303465799652,0.759,0.7748745026535183,1.0
5
- 0.6,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-105_torch.float16_lf,0.6926666666666667,0.8039176975550218,0.6926666666666667,0.7332481528585848,1.0
6
- 0.8,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-140_torch.float16_lf,0.725,0.7952719247171957,0.725,0.7476238017654298,1.0
7
- 1.0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-175_torch.float16_lf,0.6756666666666666,0.7810148934939715,0.6756666666666666,0.708653993277772,1.0
8
- 1.2,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-210_torch.float16_lf,0.7013333333333334,0.7969562600853992,0.7013333333333334,0.7362679665494508,1.0
9
- 1.4,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-245_torch.float16_lf,0.7326666666666667,0.7922538479314682,0.7326666666666667,0.755402136631717,0.9996666666666667
10
- 1.6,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-280_torch.float16_lf,0.6983333333333334,0.785127298428753,0.6983333333333334,0.7292251109166867,1.0
11
- 1.8,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-315_torch.float16_lf,0.6783333333333333,0.785390767631834,0.6783333333333333,0.7164131321837346,1.0
12
- 2.0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-350_torch.float16_lf,0.689,0.7929715746898984,0.689,0.7259993126510194,1.0
 
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0.0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct_torch.float16_lf,0.6853333333333333,0.7434931541561965,0.6853333333333333,0.7090778261894969,0.9996666666666667
3
+ 0.2,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-35_torch.float16_lf,0.7313333333333333,0.7782207073448913,0.7313333333333333,0.7498580605712221,0.9996666666666667
4
+ 0.4,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-70_torch.float16_lf,0.767,0.7975691979811874,0.767,0.7784908005204111,1.0
5
+ 0.6,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-105_torch.float16_lf,0.706,0.8028770302127605,0.706,0.7396402026345186,1.0
6
+ 0.8,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-140_torch.float16_lf,0.7313333333333333,0.7899967378450532,0.7313333333333333,0.7491181057755286,1.0
7
+ 1.0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-175_torch.float16_lf,0.6853333333333333,0.7776902509375624,0.6853333333333333,0.7122906026955259,1.0
8
+ 1.2,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-210_torch.float16_lf,0.7156666666666667,0.7981854285684257,0.7156666666666667,0.7440952985881264,1.0
9
+ 1.4,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-245_torch.float16_lf,0.743,0.7909260776868464,0.743,0.7603582063225583,0.9996666666666667
10
+ 1.6,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-280_torch.float16_lf,0.7106666666666667,0.7844615294470283,0.7106666666666667,0.7354379359862141,1.0
11
+ 1.8,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-315_torch.float16_lf,0.6926666666666667,0.7852752054045592,0.6926666666666667,0.7234458732476875,1.0
12
+ 2.0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-350_torch.float16_lf,0.6996666666666667,0.7892137201429604,0.6996666666666667,0.7296312152658814,1.0
data/Qwen2-7B-Instruct_shots_metrics.csv CHANGED
@@ -1,3 +1,3 @@
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/shots-00,0.683,0.7493103872717293,0.683,0.710140098232232,0.9996666666666667
3
- 10,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/shots-10,0.5646666666666667,0.7391197908117386,0.5646666666666667,0.6064049121095652,0.9896666666666667
 
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/shots-00,0.6853333333333333,0.7434931541561965,0.6853333333333333,0.7090778261894969,0.9996666666666667
3
+ 10,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/shots-10,0.5723333333333334,0.738817429885796,0.5723333333333334,0.6112549880619311,0.9896666666666667
data/Qwen2.5-0.5B-Instruct_metrics.csv CHANGED
@@ -1,12 +1,12 @@
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0.0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct_torch.float16_lf,0.443,0.5490534863315207,0.443,0.43178235266224163,0.594
3
- 0.2,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-35_torch.float16_lf,0.525,0.5819221558338251,0.525,0.4586682135998428,1.0
4
- 0.4,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-70_torch.float16_lf,0.54,0.6445255881472232,0.54,0.5293020271128788,1.0
5
- 0.6,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-105_torch.float16_lf,0.43766666666666665,0.6565760150511494,0.43766666666666665,0.49167707971005714,1.0
6
- 0.8,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-140_torch.float16_lf,0.49933333333333335,0.6513093602943617,0.49933333333333335,0.49913143191054443,1.0
7
- 1.0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-175_torch.float16_lf,0.5523333333333333,0.6622075519433389,0.5523333333333333,0.5627283867177305,1.0
8
- 1.2,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-210_torch.float16_lf,0.5403333333333333,0.64319564963495,0.5403333333333333,0.5598419070210608,1.0
9
- 1.4,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-245_torch.float16_lf,0.5843333333333334,0.6559808590166016,0.5843333333333334,0.6086767064128167,1.0
10
- 1.6,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-280_torch.float16_lf,0.5216666666666666,0.6604678981061621,0.5216666666666666,0.5615446578399996,1.0
11
- 1.8,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-315_torch.float16_lf,0.524,0.6673441240188523,0.524,0.5607458201939703,1.0
12
- 2.0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-350_torch.float16_lf,0.507,0.6597337077954278,0.5070000000000001,0.5492280882625964,1.0
 
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0.0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct_torch.float16_lf,0.43833333333333335,0.5292917259914629,0.43833333333333335,0.42286875992486556,0.594
3
+ 0.2,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-35_torch.float16_lf,0.5223333333333333,0.5704911830866488,0.5223333333333333,0.454387436259078,1.0
4
+ 0.4,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-70_torch.float16_lf,0.542,0.6358012674347429,0.542,0.5272438410312219,1.0
5
+ 0.6,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-105_torch.float16_lf,0.44633333333333336,0.6477441598024034,0.44633333333333336,0.4917457459702999,1.0
6
+ 0.8,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-140_torch.float16_lf,0.5053333333333333,0.6438300456580985,0.5053333333333333,0.4995247505211914,1.0
7
+ 1.0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-175_torch.float16_lf,0.558,0.6560369730369926,0.558,0.5632487818615118,1.0
8
+ 1.2,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-210_torch.float16_lf,0.5453333333333333,0.6357935773889876,0.5453333333333333,0.5594242895140294,1.0
9
+ 1.4,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-245_torch.float16_lf,0.5903333333333334,0.6503049529377274,0.5903333333333334,0.6094397514027766,1.0
10
+ 1.6,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-280_torch.float16_lf,0.5286666666666666,0.6532851084098983,0.5286666666666666,0.5617239467523474,1.0
11
+ 1.8,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-315_torch.float16_lf,0.5336666666666666,0.6607103736450911,0.5336666666666666,0.5622949959647037,1.0
12
+ 2.0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-350_torch.float16_lf,0.5156666666666667,0.652809461208547,0.5156666666666667,0.549955024535151,1.0
data/Qwen2.5-0.5B-Instruct_shots_metrics.csv CHANGED
@@ -1,8 +1,8 @@
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-00,0.443,0.5490534863315207,0.443,0.43178235266224163,0.594
3
- 5,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-05,0.177,0.49074939459487404,0.177,0.2155165894788838,0.004
4
- 10,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-10,0.35433333333333333,0.5213384036972462,0.35433333333333333,0.39783362635065245,0.068
5
- 20,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-20,0.43666666666666665,0.5234006681691764,0.43666666666666665,0.4691719255495575,0.37266666666666665
6
- 30,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-30,0.39066666666666666,0.5462493905687185,0.39066666666666666,0.4339604066000981,0.07566666666666666
7
- 40,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-40,0.4653333333333333,0.5468189581246721,0.4653333333333333,0.49752341605759137,0.324
8
- 50,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-50,0.5026666666666667,0.5610230233594029,0.5026666666666667,0.5163435163649445,0.24333333333333335
 
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-00,0.43833333333333335,0.5292917259914629,0.43833333333333335,0.42286875992486556,0.594
3
+ 5,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-05,0.17966666666666667,0.47516573853109806,0.17966666666666667,0.214144872117911,0.004
4
+ 10,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-10,0.351,0.5084853117995367,0.351,0.39097839594031075,0.068
5
+ 20,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-20,0.43366666666666664,0.513186330900278,0.43366666666666664,0.463747974034812,0.37266666666666665
6
+ 30,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-30,0.39,0.5367753683204347,0.39,0.4299603249123421,0.07566666666666666
7
+ 40,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-40,0.466,0.5400134144413437,0.466,0.49542975613961904,0.324
8
+ 50,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-50,0.496,0.5465409839032335,0.496,0.5069942984615308,0.24333333333333335
data/Qwen2.5-1.5B-Instruct_metrics.csv CHANGED
@@ -1,12 +1,12 @@
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0.0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct_torch.float16_lf,0.19966666666666666,0.5279959815418013,0.19966666666666666,0.23918953371981191,0.9223333333333333
3
- 0.2,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-35_torch.float16_lf,0.481,0.6625717555914767,0.481,0.5396575906071639,0.9996666666666667
4
- 0.4,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-70_torch.float16_lf,0.5653333333333334,0.711044032475149,0.5653333333333334,0.6130876130683667,0.9996666666666667
5
- 0.6,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-105_torch.float16_lf,0.5303333333333333,0.7229828883930918,0.5303333333333333,0.5954306316407808,1.0
6
- 0.8,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-140_torch.float16_lf,0.6423333333333333,0.7326944345439858,0.6423333333333333,0.6760588124127741,1.0
7
- 1.0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-175_torch.float16_lf,0.6266666666666667,0.716073329097764,0.6266666666666667,0.6524988509397216,1.0
8
- 1.2,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-210_torch.float16_lf,0.5773333333333334,0.7309423620832619,0.5773333333333334,0.6328015564736814,1.0
9
- 1.4,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-245_torch.float16_lf,0.6403333333333333,0.749792626106991,0.6403333333333333,0.679795778108406,1.0
10
- 1.6,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-280_torch.float16_lf,0.6233333333333333,0.7415417300032008,0.6233333333333333,0.6642642786690383,1.0
11
- 1.8,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-315_torch.float16_lf,0.5903333333333334,0.7358743162328453,0.5903333333333334,0.6381733773475835,1.0
12
- 2.0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-350_torch.float16_lf,0.5966666666666667,0.7358100917578044,0.5966666666666667,0.6407733961630157,1.0
 
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0.0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct_torch.float16_lf,0.20166666666666666,0.5269756683734005,0.20166666666666666,0.24069835329504388,0.9223333333333333
3
+ 0.2,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-35_torch.float16_lf,0.48533333333333334,0.654166887199198,0.48533333333333334,0.5381849571995003,0.9996666666666667
4
+ 0.4,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-70_torch.float16_lf,0.573,0.7037737273232145,0.573,0.6131069400231612,0.9996666666666667
5
+ 0.6,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-105_torch.float16_lf,0.539,0.7162869126454278,0.539,0.5961610389687657,1.0
6
+ 0.8,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-140_torch.float16_lf,0.6443333333333333,0.7218750831357578,0.6443333333333333,0.6721473356905486,1.0
7
+ 1.0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-175_torch.float16_lf,0.6296666666666667,0.7065049203038848,0.6296666666666667,0.6496809196018393,1.0
8
+ 1.2,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-210_torch.float16_lf,0.5836666666666667,0.7222805944180548,0.5836666666666667,0.6314346830311218,1.0
9
+ 1.4,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-245_torch.float16_lf,0.6493333333333333,0.7440287895607589,0.6493333333333333,0.6815314583590799,1.0
10
+ 1.6,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-280_torch.float16_lf,0.6293333333333333,0.7332138067544355,0.6293333333333333,0.6634330572585689,1.0
11
+ 1.8,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-315_torch.float16_lf,0.599,0.7297954686265763,0.599,0.6396292878324805,1.0
12
+ 2.0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-350_torch.float16_lf,0.6056666666666667,0.7305580205770756,0.6056666666666667,0.6426785514786738,1.0
data/Qwen2.5-1.5B-Instruct_shots_metrics.csv CHANGED
@@ -1,8 +1,8 @@
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-00,0.19966666666666666,0.5279959815418013,0.19966666666666666,0.23918953371981191,0.9223333333333333
3
- 5,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-05,0.3913333333333333,0.5906409192176565,0.3913333333333333,0.4387379376697362,0.8283333333333334
4
- 10,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-10,0.405,0.5886453916977137,0.405,0.46059038959324416,0.9156666666666666
5
- 20,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-20,0.228,0.5255112437643187,0.228,0.30386597855848074,0.676
6
- 30,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-30,0.23033333333333333,0.55368556787824,0.23033333333333333,0.3067125355762305,0.661
7
- 40,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-40,0.292,0.5667420801465655,0.292,0.375496356843247,0.5206666666666667
8
- 50,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-50,0.2876666666666667,0.5660207537890989,0.2876666666666667,0.36627420118815035,0.4603333333333333
 
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-00,0.20166666666666666,0.5269756683734005,0.20166666666666666,0.24069835329504388,0.9223333333333333
3
+ 5,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-05,0.3933333333333333,0.578886379886985,0.3933333333333333,0.43554636943558694,0.8283333333333334
4
+ 10,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-10,0.407,0.5820145311822223,0.407,0.459589777544246,0.9156666666666666
5
+ 20,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-20,0.232,0.5282610881631451,0.232,0.3093707499897376,0.676
6
+ 30,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-30,0.23,0.5479545947886839,0.23,0.3064381040560128,0.661
7
+ 40,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-40,0.29233333333333333,0.5608411738006117,0.29233333333333333,0.3751714671158081,0.5206666666666667
8
+ 50,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-50,0.29,0.5646814860840066,0.29,0.36883826526592467,0.4603333333333333
data/Qwen2.5-72B-Instruct_metrics.csv CHANGED
@@ -1,5 +1,12 @@
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0.0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct_torch.bfloat16_4bit,0.755,0.7861877119461959,0.755,0.7540930716916622,0.5573333333333333
3
- 0.2,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-35_torch.bfloat16_4bit,0.7776666666666666,0.8064344404751805,0.7776666666666666,0.7902083134269027,0.5623333333333334
4
- 0.4,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-70_torch.bfloat16_4bit,0.7346666666666667,0.7919767732613179,0.7346666666666667,0.7595614261349122,0.5626666666666666
5
- 0.6,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-105_torch.bfloat16_4bit,0.739,0.8045199868378529,0.739,0.7672258374793208,0.5626666666666666
 
 
 
 
 
 
 
 
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0.0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct_torch.bfloat16_4bit_lf,0.7956666666666666,0.8098073411161181,0.7956666666666666,0.7771317592221199,0.994
3
+ 0.2,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-35_torch.bfloat16_4bit_lf,0.792,0.8180793658647517,0.792,0.80166512366027,1.0
4
+ 0.4,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-70_torch.bfloat16_4bit_lf,0.7716666666666666,0.8199569804721152,0.7716666666666666,0.7895879011938259,1.0
5
+ 0.6,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-105_torch.bfloat16_4bit_lf,0.798,0.8379062379534957,0.798,0.812148680520218,1.0
6
+ 0.8,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-140_torch.bfloat16_4bit_lf,0.8213333333333334,0.8447926258362122,0.8213333333333334,0.8299486611547571,1.0
7
+ 1.0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-175_torch.bfloat16_4bit_lf,0.7643333333333333,0.8235366724638146,0.7643333333333333,0.7858148913986999,1.0
8
+ 1.2,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-210_torch.bfloat16_4bit_lf,0.7986666666666666,0.83233218480008,0.7986666666666666,0.8115886421806521,1.0
9
+ 1.4,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-245_torch.bfloat16_4bit_lf,0.7923333333333333,0.8231874218285514,0.7923333333333333,0.803363661387202,1.0
10
+ 1.6,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-280_torch.bfloat16_4bit_lf,0.7936666666666666,0.8268750473800219,0.7936666666666666,0.8057720333101867,1.0
11
+ 1.8,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-315_torch.bfloat16_4bit_lf,0.801,0.830389411421043,0.801,0.8117656427717702,1.0
12
+ 2.0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-350_torch.bfloat16_4bit_lf,0.795,0.8280696193638868,0.795,0.8068114730639832,1.0
data/Qwen2.5-72B-Instruct_shots_metrics.csv CHANGED
@@ -1 +1,3 @@
1
- shots,model,run,accuracy,precision,recall,f1
 
 
 
1
+ shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/shots-00,0.7956666666666666,0.8098073411161181,0.7956666666666666,0.7771317592221199,0.994
3
+ 5,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/shots-05,0.819,0.8182324679666184,0.819,0.8095367865845521,0.9416666666666667
data/all_model_token_counts.csv CHANGED
@@ -1,60 +1,81 @@
1
  model_name,num_shots,max,min,mean,std
 
 
2
  Mistral-7B-v0.3-Chinese-Chat,0,928,694,799.354,15.567384660085061
3
  internlm2_5-7b-chat,0,511,426,461.91766666666666,7.767732430462529
4
  internlm2_5-7b-chat-1m,0,511,426,461.91766666666666,7.767732430462529
5
- Qwen2-7B-Instruct,0,517,426,465.33866666666665,8.617118029244592
6
- Llama3.1-8B-Chinese-Chat,0,652,512,571.091,9.115687078710652
7
  internlm2_5-20b-chat,0,511,426,461.91766666666666,7.767732430462529
8
- Llama3.1-70B-Chinese-Chat,0,652,512,571.091,9.115687078710652
9
- Qwen2-72B-Instruct,0,517,426,465.33866666666665,8.617118029244592
 
 
 
 
 
10
  Mistral-7B-v0.3-Chinese-Chat,5,2573,2339,2444.354,15.567384660085061
11
  internlm2_5-7b-chat,5,1351,1266,1301.9176666666667,7.767732430462529
12
  internlm2_5-7b-chat-1m,5,1351,1266,1301.9176666666667,7.767732430462529
13
- Qwen2-7B-Instruct,5,1381,1290,1329.3386666666668,8.617118029244592
14
- Llama3.1-8B-Chinese-Chat,5,1818,1678,1737.091,9.115687078710652
15
  internlm2_5-20b-chat,5,1351,1266,1301.9176666666667,7.767732430462529
16
- Llama3.1-70B-Chinese-Chat,5,1818,1678,1737.091,9.115687078710652
17
- Qwen2-72B-Instruct,5,1381,1290,1329.3386666666668,8.617118029244592
 
 
 
 
 
18
  Mistral-7B-v0.3-Chinese-Chat,10,4119,3885,3990.354,15.567384660085061
19
  internlm2_5-7b-chat,10,2245,2160,2195.9176666666667,7.767732430462529
20
  internlm2_5-7b-chat-1m,10,2245,2160,2195.9176666666667,7.767732430462529
21
- Qwen2-7B-Instruct,10,2289,2198,2237.3386666666665,8.617118029244592
22
- Llama3.1-8B-Chinese-Chat,10,2914,2774,2833.091,9.115687078710652
23
  internlm2_5-20b-chat,10,2245,2160,2195.9176666666667,7.767732430462529
24
- Llama3.1-70B-Chinese-Chat,10,2914,2774,2833.091,9.115687078710652
25
- Qwen2-72B-Instruct,10,2289,2198,2237.3386666666665,8.617118029244592
 
 
 
 
 
26
  Mistral-7B-v0.3-Chinese-Chat,20,7392,7158,7263.354,15.567384660085061
27
  internlm2_5-7b-chat,20,4065,3980,4015.9176666666667,7.767732430462529
28
  internlm2_5-7b-chat-1m,20,4065,3980,4015.9176666666667,7.767732430462529
29
- Qwen2-7B-Instruct,20,4176,4085,4124.3386666666665,8.617118029244592
30
- Llama3.1-8B-Chinese-Chat,20,5283,5143,5202.091,9.115687078710652
31
  internlm2_5-20b-chat,20,4065,3980,4015.9176666666667,7.767732430462529
32
- Llama3.1-70B-Chinese-Chat,20,5283,5143,5202.091,9.115687078710652
33
- Qwen2-72B-Instruct,20,4176,4085,4124.3386666666665,8.617118029244592
 
 
 
 
 
34
  Mistral-7B-v0.3-Chinese-Chat,30,10804,10570,10675.354,15.567384660085061
35
  internlm2_5-7b-chat,30,5903,5818,5853.917666666666,7.767732430462529
36
  internlm2_5-7b-chat-1m,30,5903,5818,5853.917666666666,7.767732430462529
37
- Qwen2-7B-Instruct,30,6107,6016,6055.3386666666665,8.617118029244592
38
- Llama3.1-8B-Chinese-Chat,30,7768,7628,7687.091,9.115687078710652
39
  internlm2_5-20b-chat,30,5903,5818,5853.917666666666,7.767732430462529
40
- Llama3.1-70B-Chinese-Chat,30,7768,7628,7687.091,9.115687078710652
41
- Qwen2-72B-Instruct,30,6107,6016,6055.3386666666665,8.617118029244592
 
 
 
 
 
42
  Mistral-7B-v0.3-Chinese-Chat,40,14152,13918,14023.354,15.567384660085061
43
  internlm2_5-7b-chat,40,7709,7624,7659.917666666666,7.767732430462529
44
  internlm2_5-7b-chat-1m,40,7709,7624,7659.917666666666,7.767732430462529
45
- Qwen2-7B-Instruct,40,8010,7919,7958.3386666666665,8.617118029244592
46
- Llama3.1-8B-Chinese-Chat,40,10217,10077,10136.091,9.115687078710652
47
  internlm2_5-20b-chat,40,7709,7624,7659.917666666666,7.767732430462529
48
- Llama3.1-70B-Chinese-Chat,40,10217,10077,10136.091,9.115687078710652
49
- Qwen2-72B-Instruct,40,8010,7919,7958.3386666666665,8.617118029244592
 
 
 
 
 
50
  Mistral-7B-v0.3-Chinese-Chat,50,17588,17354,17459.354,15.567384660085061
51
  internlm2_5-7b-chat,50,9561,9476,9511.917666666666,7.767732430462529
52
  internlm2_5-7b-chat-1m,50,9561,9476,9511.917666666666,7.767732430462529
53
- Qwen2-7B-Instruct,50,9961,9870,9909.338666666667,8.617118029244592
54
- Llama3.1-8B-Chinese-Chat,50,12719,12579,12638.091,9.115687078710652
55
  internlm2_5-20b-chat,50,9561,9476,9511.917666666666,7.767732430462529
56
- Llama3.1-70B-Chinese-Chat,50,12719,12579,12638.091,9.115687078710652
57
- Qwen2-72B-Instruct,50,9961,9870,9909.338666666667,8.617118029244592
 
 
 
58
  gpt-4o,0,606,464,524.8063333333333,10.057594723695004
59
  gpt-4o-mini,0,606,464,524.8063333333333,10.057594723695004
60
  o1-preview,0,925,682,797.5953333333333,16.41724967580933
 
1
  model_name,num_shots,max,min,mean,std
2
+ Llama3.1-8B-Chinese-Chat,0,652,512,571.091,9.115687078710652
3
+ Llama3.1-70B-Chinese-Chat,0,652,512,571.091,9.115687078710652
4
  Mistral-7B-v0.3-Chinese-Chat,0,928,694,799.354,15.567384660085061
5
  internlm2_5-7b-chat,0,511,426,461.91766666666666,7.767732430462529
6
  internlm2_5-7b-chat-1m,0,511,426,461.91766666666666,7.767732430462529
 
 
7
  internlm2_5-20b-chat,0,511,426,461.91766666666666,7.767732430462529
8
+ Qwen2.5-0.5B-Instruct,0,517,426,465.33866666666665,8.617118029244592
9
+ Qwen2.5-1.5B-Instruct,0,517,426,465.33866666666665,8.617118029244592
10
+ Qwen2.5-3B-Instruct,0,517,426,465.33866666666665,8.617118029244592
11
+ Qwen2.5-7B-Instruct,0,517,426,465.33866666666665,8.617118029244592
12
+ Qwen2.5-72B-Instruct,0,517,426,465.33866666666665,8.617118029244592
13
+ Llama3.1-8B-Chinese-Chat,5,1818,1678,1737.091,9.115687078710652
14
+ Llama3.1-70B-Chinese-Chat,5,1818,1678,1737.091,9.115687078710652
15
  Mistral-7B-v0.3-Chinese-Chat,5,2573,2339,2444.354,15.567384660085061
16
  internlm2_5-7b-chat,5,1351,1266,1301.9176666666667,7.767732430462529
17
  internlm2_5-7b-chat-1m,5,1351,1266,1301.9176666666667,7.767732430462529
 
 
18
  internlm2_5-20b-chat,5,1351,1266,1301.9176666666667,7.767732430462529
19
+ Qwen2.5-0.5B-Instruct,5,1381,1290,1329.3386666666668,8.617118029244592
20
+ Qwen2.5-1.5B-Instruct,5,1381,1290,1329.3386666666668,8.617118029244592
21
+ Qwen2.5-3B-Instruct,5,1381,1290,1329.3386666666668,8.617118029244592
22
+ Qwen2.5-7B-Instruct,5,1381,1290,1329.3386666666668,8.617118029244592
23
+ Qwen2.5-72B-Instruct,5,1381,1290,1329.3386666666668,8.617118029244592
24
+ Llama3.1-8B-Chinese-Chat,10,2914,2774,2833.091,9.115687078710652
25
+ Llama3.1-70B-Chinese-Chat,10,2914,2774,2833.091,9.115687078710652
26
  Mistral-7B-v0.3-Chinese-Chat,10,4119,3885,3990.354,15.567384660085061
27
  internlm2_5-7b-chat,10,2245,2160,2195.9176666666667,7.767732430462529
28
  internlm2_5-7b-chat-1m,10,2245,2160,2195.9176666666667,7.767732430462529
 
 
29
  internlm2_5-20b-chat,10,2245,2160,2195.9176666666667,7.767732430462529
30
+ Qwen2.5-0.5B-Instruct,10,2289,2198,2237.3386666666665,8.617118029244592
31
+ Qwen2.5-1.5B-Instruct,10,2289,2198,2237.3386666666665,8.617118029244592
32
+ Qwen2.5-3B-Instruct,10,2289,2198,2237.3386666666665,8.617118029244592
33
+ Qwen2.5-7B-Instruct,10,2289,2198,2237.3386666666665,8.617118029244592
34
+ Qwen2.5-72B-Instruct,10,2289,2198,2237.3386666666665,8.617118029244592
35
+ Llama3.1-8B-Chinese-Chat,20,5283,5143,5202.091,9.115687078710652
36
+ Llama3.1-70B-Chinese-Chat,20,5283,5143,5202.091,9.115687078710652
37
  Mistral-7B-v0.3-Chinese-Chat,20,7392,7158,7263.354,15.567384660085061
38
  internlm2_5-7b-chat,20,4065,3980,4015.9176666666667,7.767732430462529
39
  internlm2_5-7b-chat-1m,20,4065,3980,4015.9176666666667,7.767732430462529
 
 
40
  internlm2_5-20b-chat,20,4065,3980,4015.9176666666667,7.767732430462529
41
+ Qwen2.5-0.5B-Instruct,20,4176,4085,4124.3386666666665,8.617118029244592
42
+ Qwen2.5-1.5B-Instruct,20,4176,4085,4124.3386666666665,8.617118029244592
43
+ Qwen2.5-3B-Instruct,20,4176,4085,4124.3386666666665,8.617118029244592
44
+ Qwen2.5-7B-Instruct,20,4176,4085,4124.3386666666665,8.617118029244592
45
+ Qwen2.5-72B-Instruct,20,4176,4085,4124.3386666666665,8.617118029244592
46
+ Llama3.1-8B-Chinese-Chat,30,7768,7628,7687.091,9.115687078710652
47
+ Llama3.1-70B-Chinese-Chat,30,7768,7628,7687.091,9.115687078710652
48
  Mistral-7B-v0.3-Chinese-Chat,30,10804,10570,10675.354,15.567384660085061
49
  internlm2_5-7b-chat,30,5903,5818,5853.917666666666,7.767732430462529
50
  internlm2_5-7b-chat-1m,30,5903,5818,5853.917666666666,7.767732430462529
 
 
51
  internlm2_5-20b-chat,30,5903,5818,5853.917666666666,7.767732430462529
52
+ Qwen2.5-0.5B-Instruct,30,6107,6016,6055.3386666666665,8.617118029244592
53
+ Qwen2.5-1.5B-Instruct,30,6107,6016,6055.3386666666665,8.617118029244592
54
+ Qwen2.5-3B-Instruct,30,6107,6016,6055.3386666666665,8.617118029244592
55
+ Qwen2.5-7B-Instruct,30,6107,6016,6055.3386666666665,8.617118029244592
56
+ Qwen2.5-72B-Instruct,30,6107,6016,6055.3386666666665,8.617118029244592
57
+ Llama3.1-8B-Chinese-Chat,40,10217,10077,10136.091,9.115687078710652
58
+ Llama3.1-70B-Chinese-Chat,40,10217,10077,10136.091,9.115687078710652
59
  Mistral-7B-v0.3-Chinese-Chat,40,14152,13918,14023.354,15.567384660085061
60
  internlm2_5-7b-chat,40,7709,7624,7659.917666666666,7.767732430462529
61
  internlm2_5-7b-chat-1m,40,7709,7624,7659.917666666666,7.767732430462529
 
 
62
  internlm2_5-20b-chat,40,7709,7624,7659.917666666666,7.767732430462529
63
+ Qwen2.5-0.5B-Instruct,40,8010,7919,7958.3386666666665,8.617118029244592
64
+ Qwen2.5-1.5B-Instruct,40,8010,7919,7958.3386666666665,8.617118029244592
65
+ Qwen2.5-3B-Instruct,40,8010,7919,7958.3386666666665,8.617118029244592
66
+ Qwen2.5-7B-Instruct,40,8010,7919,7958.3386666666665,8.617118029244592
67
+ Qwen2.5-72B-Instruct,40,8010,7919,7958.3386666666665,8.617118029244592
68
+ Llama3.1-8B-Chinese-Chat,50,12719,12579,12638.091,9.115687078710652
69
+ Llama3.1-70B-Chinese-Chat,50,12719,12579,12638.091,9.115687078710652
70
  Mistral-7B-v0.3-Chinese-Chat,50,17588,17354,17459.354,15.567384660085061
71
  internlm2_5-7b-chat,50,9561,9476,9511.917666666666,7.767732430462529
72
  internlm2_5-7b-chat-1m,50,9561,9476,9511.917666666666,7.767732430462529
 
 
73
  internlm2_5-20b-chat,50,9561,9476,9511.917666666666,7.767732430462529
74
+ Qwen2.5-0.5B-Instruct,50,9961,9870,9909.338666666667,8.617118029244592
75
+ Qwen2.5-1.5B-Instruct,50,9961,9870,9909.338666666667,8.617118029244592
76
+ Qwen2.5-3B-Instruct,50,9961,9870,9909.338666666667,8.617118029244592
77
+ Qwen2.5-7B-Instruct,50,9961,9870,9909.338666666667,8.617118029244592
78
+ Qwen2.5-72B-Instruct,50,9961,9870,9909.338666666667,8.617118029244592
79
  gpt-4o,0,606,464,524.8063333333333,10.057594723695004
80
  gpt-4o-mini,0,606,464,524.8063333333333,10.057594723695004
81
  o1-preview,0,925,682,797.5953333333333,16.41724967580933
data/best_metrics.csv CHANGED
@@ -1,14 +1,16 @@
1
  index,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 1,internlm2_5-7b-chat (0.8-epoch),internlm2_5-7b-chat (0.8-epoch),0.7496666666666667,0.8041871978859686,0.7496666666666667,0.7660159670998776,1.0
3
- 2,internlm2_5-7b-chat-1m (0.8-epoch),internlm2_5-7b-chat-1m (0.8-epoch),0.803,0.8031411888150441,0.803,0.8028064320197301,1.0
4
- 3,internlm2_5-20b-chat (0.8-epoch),internlm2_5-20b-chat (0.8-epoch),0.795,0.817457691710893,0.795,0.8027552955647029,1.0
5
- 4,Qwen2-7B-Instruct (0.4-epoch),Qwen2-7B-Instruct (0.4-epoch),0.759,0.8005303465799652,0.759,0.7748745026535183,1.0
6
- 5,Qwen2-72B-Instruct (1.8-epoch),Qwen2-72B-Instruct (1.8-epoch),0.784,0.8354349234761956,0.784,0.804194683154365,1.0
7
- 6,Llama3.1-8B-Chinese-Chat (1.0-epoch),Llama3.1-8B-Chinese-Chat (1.0-epoch),0.78,0.810582723471486,0.78,0.7924651054056209,1.0
8
- 7,Llama3.1-70B-Chinese-Chat (1.0-epoch),Llama3.1-70B-Chinese-Chat (1.0-epoch),0.7963333333333333,0.8248972880055918,0.7963333333333333,0.8076868978089201,1.0
9
- 8,gpt-4o-mini (0-shot),gpt-4o-mini (0-shot),0.7176666666666667,0.785706730193659,0.7176666666666667,0.7296061848734905,1.0
10
- 9,o1-mini (20-shot),o1-mini (20-shot),0.7343333333333333,0.786101455887261,0.7343333333333333,0.7535300565051624,0.999
11
- 10,gpt-4o (10-shot),gpt-4o (10-shot),0.7916666666666666,0.8227707658360168,0.7916666666666666,0.803614688453356,0.9996666666666667
12
- 11,o1-preview (50-shot),o1-preview (50-shot),0.7546666666666667,0.7979981023789272,0.7546666666666667,0.7708181822112403,0.9996666666666667
13
- 12,Ensemble Model (Open Source),Ensemble Model (Open Source),0.8193333333333334,0.8407464756633664,0.8193333333333334,0.828054127213081,1.0
14
- 13,Ensemble Model (OpenAI),Ensemble Model (OpenAI),0.7986666666666666,0.8223071972084313,0.7986666666666666,0.8080230503376233,1.0
 
 
 
1
  index,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 1,Llama3.1-8B (1.0-epoch),Llama3.1-8B (1.0-epoch),0.7853333333333333,0.8062405645226312,0.7853333333333333,0.7938991590982061,1.0
3
+ 2,Llama3.1-70B (1.0-epoch),Llama3.1-70B (1.0-epoch),0.809,0.8282732906153989,0.809,0.8166997776775797,1.0
4
+ 3,Mistral-7B (1.4-epoch),Mistral-7B (1.4-epoch),0.7616666666666667,0.789634957005121,0.7616666666666667,0.7721210086098353,1.0
5
+ 4,InternLM2.5-7B (1.4-epoch),InternLM2.5-7B (1.4-epoch),0.762,0.8089123492151512,0.762,0.7753217972757948,1.0
6
+ 5,InternLM2.5-7B-1M (0.8-epoch),InternLM2.5-7B-1M (0.8-epoch),0.8076666666666666,0.8048844422436796,0.8076666666666666,0.8049749805997191,1.0
7
+ 6,InternLM2.5-20B (0.8-epoch),InternLM2.5-20B (0.8-epoch),0.8063333333333333,0.8207793607428686,0.8063333333333333,0.811239851005161,1.0
8
+ 7,Qwen2.5-0.5B (1.4-epoch),Qwen2.5-0.5B (1.4-epoch),0.5903333333333334,0.6503049529377274,0.5903333333333334,0.6094397514027766,1.0
9
+ 8,Qwen2.5-1.5B (1.4-epoch),Qwen2.5-1.5B (1.4-epoch),0.6493333333333333,0.7440287895607589,0.6493333333333333,0.6815314583590799,1.0
10
+ 9,Qwen2.5-3B (1.4-epoch),Qwen2.5-3B (1.4-epoch),0.7326666666666667,0.7716369414239331,0.7326666666666667,0.7468182490858526,1.0
11
+ 10,Qwen2.5-7B (1.0-epoch),Qwen2.5-7B (1.0-epoch),0.782,0.8023938029436536,0.782,0.7888740758699296,0.9993333333333333
12
+ 11,Qwen2.5-72B (0.8-epoch),Qwen2.5-72B (0.8-epoch),0.8213333333333334,0.8447926258362122,0.8213333333333334,0.8299486611547571,1.0
13
+ 12,gpt-4o-mini (0-shot),gpt-4o-mini (0-shot),0.7166666666666667,0.7800918028217227,0.7166666666666667,0.7260056154268697,1.0
14
+ 13,gpt-4o (10-shot),gpt-4o (10-shot),0.8013333333333333,0.8246834383036209,0.8013333333333333,0.8098901724387172,0.9996666666666667
15
+ 14,o1-mini (50-shot),o1-mini (50-shot),0.7536666666666667,0.7755130422727871,0.7536666666666667,0.7602241520634903,1.0
16
+ 15,o1-preview (50-shot),o1-preview (50-shot),0.7576666666666667,0.7986597718440941,0.7576666666666667,0.7718331604189232,0.9996666666666667
data/best_results.csv CHANGED
The diff for this file is too large to render. See raw diff
 
data/few-shots_metrics.csv CHANGED
@@ -1,40 +1,91 @@
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-00,0.705,0.7398041613378253,0.705,0.6906357423169466,1.0
3
- 10,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-10,0.5533333333333333,0.7301739373336078,0.5533333333333333,0.625097481985829,0.9883333333333332
4
- 0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-00,0.4813333333333333,0.7605248207587668,0.4813333333333333,0.5244515621126862,0.9986666666666668
5
- 10,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-10,0.6473333333333333,0.7282065610714444,0.6473333333333333,0.665824871588245,0.8866666666666667
6
- 0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/shots-00,0.564,0.7745256693833624,0.564,0.6352190975436365,0.6726666666666666
7
- 0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/shots-00,0.683,0.7493103872717293,0.683,0.710140098232232,0.9996666666666668
8
- 10,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/shots-10,0.5646666666666667,0.7391197908117386,0.5646666666666667,0.6064049121095652,0.9896666666666668
9
- 0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct_torch/shots-00,0.7516666666666667,0.7949378981748352,0.7516666666666667,0.7572499605227642,0.9773333333333334
10
- 0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-00,0.742,0.7477056799746837,0.742,0.7371050181385632,0.8033333333333333
11
- 10,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-10,0.6676666666666666,0.7834080522821993,0.6676666666666666,0.7082605860921491,0.9623333333333334
12
- 0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-00,0.7636666666666667,0.7806653325131986,0.7636666666666667,0.7525813484548423,0.0096666666666666
13
- 5,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-05,0.7536666666666667,0.772126097633354,0.7536666666666667,0.7545029613768596,0.79
14
- 10,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-10,0.754,0.7729477984842943,0.754,0.756682017266956,0.8326666666666667
15
- 20,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-20,0.738,0.7566938786102072,0.738,0.7348961489952073,0.819
16
- 0,gpt-4o-mini,gpt-4o-mini/shots-00,0.7176666666666667,0.785706730193659,0.7176666666666667,0.7296061848734905,0.9916666666666668
17
- 5,gpt-4o-mini,gpt-4o-mini/shots-05,0.7176666666666667,0.7767294185987051,0.7176666666666667,0.7181068311028772,0.9996666666666668
18
- 10,gpt-4o-mini,gpt-4o-mini/shots-10,0.6793333333333333,0.7728086050218999,0.6793333333333333,0.6916749681933937,0.9983333333333332
19
- 20,gpt-4o-mini,gpt-4o-mini/shots-20,0.6623333333333333,0.7686706009175459,0.6623333333333333,0.6798015109939115,0.998
20
- 30,gpt-4o-mini,gpt-4o-mini/shots-30,0.6873333333333334,0.7684209723431035,0.6873333333333334,0.6913018667081989,0.999
21
- 40,gpt-4o-mini,gpt-4o-mini/shots-40,0.6923333333333334,0.7639874967862498,0.6923333333333334,0.6924934068935911,0.9986666666666668
22
- 50,gpt-4o-mini,gpt-4o-mini/shots-50,0.717,0.7692638634416518,0.717,0.7105227254860433,0.9993333333333332
23
- 0,o1-mini,o1-mini/shots-00,0.7083333333333334,0.7848098266888749,0.7083333333333334,0.7377068425566796,0.999
24
- 5,o1-mini,o1-mini/shots-05,0.724,0.7905045610386181,0.724,0.7482963122126776,0.9966666666666668
25
- 10,o1-mini,o1-mini/shots-10,0.725,0.7892485648334764,0.725,0.7485623974683336,0.9943333333333332
26
- 20,o1-mini,o1-mini/shots-20,0.7343333333333333,0.786101455887261,0.7343333333333333,0.7535300565051624,0.9946666666666668
27
- 0,gpt-4o,gpt-4o/shots-00,0.782,0.8204048322982596,0.782,0.7953019682198627,0.066
28
- 5,gpt-4o,gpt-4o/shots-05,0.7873333333333333,0.8230974205170392,0.7873333333333333,0.8000290527498529,0.998
29
- 10,gpt-4o,gpt-4o/shots-10,0.7916666666666666,0.8227707658360168,0.7916666666666666,0.803614688453356,0.9996666666666668
30
- 20,gpt-4o,gpt-4o/shots-20,0.7816666666666666,0.8204541793856629,0.7816666666666666,0.7967017169880498,0.9993333333333332
31
- 30,gpt-4o,gpt-4o/shots-30,0.7886666666666666,0.8260847852316618,0.7886666666666666,0.8030949295928699,0.999
32
- 40,gpt-4o,gpt-4o/shots-40,0.784,0.8233509309291644,0.784,0.7993336791122846,0.9973333333333332
33
- 50,gpt-4o,gpt-4o/shots-50,0.787,0.8234800466218334,0.787,0.8013530974301947,0.9993333333333332
34
- 0,o1-preview,o1-preview/shots-00,0.721,0.7849371317342158,0.721,0.7451207069815194,0.998
35
- 5,o1-preview,o1-preview/shots-05,0.7313333333333333,0.7878283093765627,0.7313333333333333,0.7535489719321234,0.979
36
- 10,o1-preview,o1-preview/shots-10,0.749,0.7964482186234537,0.749,0.7677316493549238,0.9873333333333332
37
- 20,o1-preview,o1-preview/shots-20,0.7443333333333333,0.7911442834260676,0.7443333333333333,0.7625144090816939,0.9853333333333332
38
- 30,o1-preview,o1-preview/shots-30,0.7473333333333333,0.7920604378746952,0.7473333333333333,0.7643977099599287,0.984
39
- 40,o1-preview,o1-preview/shots-40,0.7506666666666667,0.7964679024468982,0.7506666666666667,0.7674109766459014,0.984
40
- 50,o1-preview,o1-preview/shots-50,0.7546666666666667,0.7979981023789272,0.7546666666666667,0.7708181822112403,0.9816666666666668
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0,gpt-4o-mini,gpt-4o-mini/shots-00,0.7166666666666667,0.7800918028217227,0.7166666666666667,0.7260056154268697,0.9916666666666668
3
+ 5,gpt-4o-mini,gpt-4o-mini/shots-05,0.7203333333333334,0.7754800244789168,0.7203333333333334,0.718540502683781,0.9996666666666668
4
+ 10,gpt-4o-mini,gpt-4o-mini/shots-10,0.6836666666666666,0.7701177891593667,0.6836666666666666,0.6932016303210964,0.9983333333333332
5
+ 20,gpt-4o-mini,gpt-4o-mini/shots-20,0.6616666666666666,0.7627467933668375,0.6616666666666666,0.677372757519069,0.998
6
+ 30,gpt-4o-mini,gpt-4o-mini/shots-30,0.6876666666666666,0.7663381611066244,0.6876666666666666,0.6896169854446027,0.999
7
+ 40,gpt-4o-mini,gpt-4o-mini/shots-40,0.6903333333333334,0.7603850760051853,0.6903333333333334,0.688393665975117,0.9986666666666668
8
+ 50,gpt-4o-mini,gpt-4o-mini/shots-50,0.7143333333333334,0.7654214682013311,0.7143333333333334,0.7056961582308003,0.9993333333333332
9
+ 0,gpt-4o,gpt-4o/shots-00,0.792,0.8234582231232066,0.792,0.8022633746318892,0.066
10
+ 5,gpt-4o,gpt-4o/shots-05,0.7973333333333333,0.8251066339666824,0.7973333333333333,0.8066429877716694,0.998
11
+ 10,gpt-4o,gpt-4o/shots-10,0.8013333333333333,0.8246834383036209,0.8013333333333333,0.8098901724387172,0.9996666666666668
12
+ 20,gpt-4o,gpt-4o/shots-20,0.79,0.822098231279132,0.79,0.8020290214439503,0.9993333333333332
13
+ 30,gpt-4o,gpt-4o/shots-30,0.7946666666666666,0.8259436682564079,0.7946666666666666,0.8063113377291872,0.999
14
+ 40,gpt-4o,gpt-4o/shots-40,0.7906666666666666,0.8242154446428003,0.7906666666666666,0.803356987717753,0.9973333333333332
15
+ 50,gpt-4o,gpt-4o/shots-50,0.798,0.8274250231711487,0.798,0.8091066504350897,0.9993333333333332
16
+ 0,o1-mini,o1-mini/shots-00,0.7133333333333334,0.78301872209321,0.7133333333333334,0.7402734333211688,0.999
17
+ 5,o1-mini,o1-mini/shots-05,0.7313333333333333,0.7913577967036569,0.7313333333333333,0.7532525881890013,0.9966666666666668
18
+ 10,o1-mini,o1-mini/shots-10,0.7283333333333334,0.7851844846890333,0.7283333333333334,0.7490987096521479,0.9943333333333332
19
+ 20,o1-mini,o1-mini/shots-20,0.7373333333333333,0.7815727856803751,0.7373333333333333,0.7533353509620383,0.9946666666666668
20
+ 30,o1-mini,o1-mini/shots-30,0.748,0.779168441371953,0.748,0.7583397172973073,0.9976666666666668
21
+ 40,o1-mini,o1-mini/shots-40,0.7496666666666667,0.775765877349714,0.7496666666666667,0.757640226210139,0.9976666666666668
22
+ 50,o1-mini,o1-mini/shots-50,0.7536666666666667,0.7755130422727871,0.7536666666666667,0.7602241520634903,0.9976666666666668
23
+ 0,o1-preview,o1-preview/shots-00,0.725,0.7860443296236067,0.725,0.7471736898827371,0.998
24
+ 5,o1-preview,o1-preview/shots-05,0.736,0.789169445854742,0.736,0.7557068489703724,0.979
25
+ 10,o1-preview,o1-preview/shots-10,0.7513333333333333,0.7947574632958824,0.7513333333333333,0.7673707529850041,0.9873333333333332
26
+ 20,o1-preview,o1-preview/shots-20,0.7483333333333333,0.790639591375103,0.7483333333333333,0.763324860719675,0.9853333333333332
27
+ 30,o1-preview,o1-preview/shots-30,0.7513333333333333,0.792049804996314,0.7513333333333333,0.7654800949250774,0.984
28
+ 40,o1-preview,o1-preview/shots-40,0.7526666666666667,0.795308022968859,0.7526666666666667,0.7672762517397222,0.984
29
+ 50,o1-preview,o1-preview/shots-50,0.7576666666666667,0.7986597718440941,0.7576666666666667,0.7718331604189232,0.9816666666666668
30
+ 0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-00,0.7343333333333333,0.7375752740091942,0.7343333333333333,0.7270283652909943,0.8033333333333333
31
+ 5,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-05,0.7056666666666667,0.7508515184863084,0.7056666666666667,0.7230574380518462,0.9886666666666668
32
+ 10,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-10,0.6736666666666666,0.7776004745989736,0.6736666666666666,0.7094104807112239,0.9623333333333334
33
+ 20,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-20,0.767,0.764982587229615,0.767,0.7638473265780445,0.979
34
+ 30,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-30,0.7713333333333333,0.7725685630276532,0.7713333333333333,0.7692692690410152,0.7326666666666667
35
+ 40,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-40,0.6873333333333334,0.773294758147205,0.6873333333333334,0.7075877720686631,0.759
36
+ 50,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-50,0.7176666666666667,0.7599215931134234,0.7176666666666667,0.7203550920641806,0.6623333333333333
37
+ 0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-00,0.7646666666666667,0.7804609488644828,0.7646666666666667,0.7497548621711109,0.0096666666666666
38
+ 5,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-05,0.754,0.7675695134276339,0.754,0.7530665717237273,0.79
39
+ 10,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-10,0.756,0.7695738042762151,0.756,0.7563878737797524,0.8326666666666667
40
+ 20,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-20,0.7406666666666667,0.7560876641054418,0.7406666666666667,0.7360011002310723,0.819
41
+ 30,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-30,0.7603333333333333,0.7710641222872985,0.7603333333333333,0.7570501796584528,0.548
42
+ 0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-00,0.6923333333333334,0.7009179792741449,0.6923333333333334,0.6605899639694456,0.0116666666666666
43
+ 5,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-05,0.6546666666666666,0.7415422757067709,0.6546666666666666,0.684189810233595,0.142
44
+ 10,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-10,0.612,0.7259976964524691,0.612,0.6501410678512595,0.1063333333333333
45
+ 20,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-20,0.6336666666666667,0.7315100617022602,0.6336666666666667,0.6683245802083553,0.0826666666666666
46
+ 30,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-30,0.665,0.7374233826761456,0.665,0.6872462947319797,0.07
47
+ 0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-00,0.7063333333333334,0.7369785607161373,0.7063333333333334,0.6895815239121195,1.0
48
+ 5,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-05,0.747,0.7433195768374967,0.747,0.7232456014841266,0.999
49
+ 10,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-10,0.559,0.7306434812774306,0.559,0.6287391975839828,0.9883333333333332
50
+ 20,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-20,0.6466666666666666,0.7143354332969056,0.6466666666666666,0.6738164117926014,0.9473333333333334
51
+ 30,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-30,0.626,0.7223442225693745,0.626,0.6494216734706632,0.9403333333333334
52
+ 40,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-40,0.64,0.7020393671564193,0.64,0.611996460461355,0.9813333333333332
53
+ 50,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-50,0.6116666666666667,0.6808793455512054,0.6116666666666667,0.5502581431071487,0.9803333333333332
54
+ 0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-00,0.4923333333333333,0.7570993062022159,0.4923333333333333,0.5279738886353613,0.9986666666666668
55
+ 5,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-05,0.7753333333333333,0.7586378181445387,0.7753333333333333,0.7665405919258307,0.9453333333333334
56
+ 10,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-10,0.654,0.7251381758855274,0.654,0.6681655588675279,0.8866666666666667
57
+ 20,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-20,0.677,0.7296467412730754,0.677,0.6780570012166849,0.8213333333333334
58
+ 30,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-30,0.68,0.7425906069240685,0.68,0.6837924261094331,0.8236666666666667
59
+ 40,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-40,0.726,0.7533750344411337,0.726,0.7132456474026365,0.8336666666666667
60
+ 50,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-50,0.7173333333333334,0.7471186719787132,0.7173333333333334,0.6980283743779222,0.8846666666666667
61
+ 0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/shots-00,0.575,0.7745319004159336,0.575,0.6416875854199033,0.6726666666666666
62
+ 0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-00,0.4383333333333333,0.5292917259914629,0.4383333333333333,0.4228687599248655,0.594
63
+ 5,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-05,0.1796666666666666,0.475165738531098,0.1796666666666666,0.214144872117911,0.004
64
+ 10,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-10,0.351,0.5084853117995367,0.351,0.3909783959403107,0.068
65
+ 20,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-20,0.4336666666666666,0.513186330900278,0.4336666666666666,0.463747974034812,0.3726666666666666
66
+ 30,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-30,0.39,0.5367753683204347,0.39,0.4299603249123421,0.0756666666666666
67
+ 40,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-40,0.466,0.5400134144413437,0.466,0.495429756139619,0.324
68
+ 50,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-50,0.496,0.5465409839032335,0.496,0.5069942984615308,0.2433333333333333
69
+ 0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-00,0.2016666666666666,0.5269756683734005,0.2016666666666666,0.2406983532950438,0.9223333333333332
70
+ 5,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-05,0.3933333333333333,0.578886379886985,0.3933333333333333,0.4355463694355869,0.8283333333333334
71
+ 10,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-10,0.407,0.5820145311822223,0.407,0.459589777544246,0.9156666666666666
72
+ 20,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-20,0.232,0.5282610881631451,0.232,0.3093707499897376,0.676
73
+ 30,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-30,0.23,0.5479545947886839,0.23,0.3064381040560128,0.661
74
+ 40,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-40,0.2923333333333333,0.5608411738006117,0.2923333333333333,0.3751714671158081,0.5206666666666667
75
+ 50,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-50,0.29,0.5646814860840066,0.29,0.3688382652659246,0.4603333333333333
76
+ 0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-00,0.5796666666666667,0.6966500240864278,0.5796666666666667,0.5506370828782681,1.0
77
+ 5,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-05,0.639,0.7226431221398603,0.639,0.641568790114368,0.9973333333333332
78
+ 10,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-10,0.625,0.7164154004131771,0.625,0.6402584852791593,0.995
79
+ 20,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-20,0.4666666666666667,0.6987641430848737,0.4666666666666667,0.5265074036660548,0.9316666666666666
80
+ 30,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-30,0.475,0.6880994914236809,0.475,0.5310948082593374,0.904
81
+ 40,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-40,0.584,0.7065303262365236,0.584,0.6214992664375876,0.7173333333333334
82
+ 50,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-50,0.6093333333333333,0.7120506480394511,0.6093333333333333,0.6451959368825358,0.574
83
+ 0,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-00,0.644,0.7200261355300325,0.644,0.6101052277961244,1.0
84
+ 5,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-05,0.6346666666666667,0.7653343185471776,0.6346666666666667,0.6219419633691871,0.998
85
+ 10,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-10,0.678,0.7675951017673515,0.678,0.6790860659550377,0.9796666666666668
86
+ 20,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-20,0.7353333333333333,0.7702034737275962,0.7353333333333333,0.7278047438569933,0.807
87
+ 30,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-30,0.7646666666666667,0.7787918401418651,0.7646666666666667,0.7527649874769439,0.805
88
+ 40,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-40,0.759,0.7736852689131295,0.759,0.7472252604775926,0.8546666666666667
89
+ 50,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-50,0.7586666666666667,0.7640431634617543,0.7586666666666667,0.7414332963557551,0.7563333333333333
90
+ 0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/shots-00,0.7956666666666666,0.8098073411161181,0.7956666666666666,0.7771317592221199,0.994
91
+ 5,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/shots-05,0.819,0.8182324679666184,0.819,0.8095367865845521,0.9416666666666668
data/fine-tuning_metrics.csv CHANGED
@@ -1,78 +1,122 @@
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat_torch.bfloat16_lf,0.705,0.7398041613378253,0.705,0.6906357423169466,1.0
3
- 0.2,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-35_torch.bfloat16_lf,0.7193333333333334,0.7863486093365692,0.7193333333333334,0.7330498811142795,1.0
4
- 0.4,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-70_torch.bfloat16_lf,0.726,0.7900250828103491,0.726,0.7396583495246526,1.0
5
- 0.6,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-105_torch.bfloat16_lf,0.6736666666666666,0.8044565554629858,0.6736666666666666,0.7104123104529902,1.0
6
- 0.8,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-140_torch.bfloat16_lf,0.7496666666666667,0.8041871978859686,0.7496666666666667,0.7660159670998776,1.0
7
- 1.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-175_torch.bfloat16_lf,0.726,0.8094634420846424,0.726,0.751394838822856,1.0
8
- 1.2,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-210_torch.bfloat16_lf,0.7276666666666667,0.8039673699820601,0.7276666666666667,0.7488653386949028,1.0
9
- 1.4,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-245_torch.bfloat16_lf,0.747,0.8055537753403307,0.747,0.76527383722639,1.0
10
- 1.6,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-280_torch.bfloat16_lf,0.7166666666666667,0.8059535682746547,0.7166666666666667,0.7432427946178835,1.0
11
- 1.8,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-315_torch.bfloat16_lf,0.6983333333333334,0.8119110469658597,0.6983333333333334,0.7347246872892312,1.0
12
- 2.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-350_torch.bfloat16_lf,0.7076666666666667,0.8120132783051135,0.7076666666666667,0.7408145046817652,1.0
13
- 0.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m_torch.bfloat16_lf,0.4813333333333333,0.7605248207587668,0.4813333333333333,0.5244515621126862,0.9986666666666668
14
- 0.2,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-35_torch.bfloat16_lf,0.7843333333333333,0.7977648302848388,0.7843333333333333,0.7864944570659659,1.0
15
- 0.4,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-70_torch.bfloat16_lf,0.7836666666666666,0.7996977262947886,0.7836666666666666,0.7886881726841081,1.0
16
- 0.6,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-105_torch.bfloat16_lf,0.7243333333333334,0.8171172705912051,0.7243333333333334,0.7565804830382912,1.0
17
- 0.8,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-140_torch.bfloat16_lf,0.803,0.8031411888150441,0.803,0.8028064320197301,1.0
18
- 1.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-175_torch.bfloat16_lf,0.7676666666666667,0.8108441731715863,0.7676666666666667,0.7843187816704813,1.0
19
- 1.2,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-210_torch.bfloat16_lf,0.7736666666666666,0.8091671780923799,0.7736666666666666,0.7876874850235454,1.0
20
- 1.4,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-245_torch.bfloat16_lf,0.7623333333333333,0.8062291602218205,0.7623333333333333,0.777669094563925,1.0
21
- 1.6,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-280_torch.bfloat16_lf,0.7553333333333333,0.8086197936829652,0.7553333333333333,0.7755588811428297,1.0
22
- 1.8,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-315_torch.bfloat16_lf,0.748,0.8171996792797457,0.748,0.773990849396903,1.0
23
- 2.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-350_torch.bfloat16_lf,0.756,0.8126875394266148,0.756,0.7777812522863184,1.0
24
- 0.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat_torch.bfloat16_4bit_lf,0.564,0.7745256693833624,0.564,0.6352190975436365,0.6726666666666666
25
- 0.2,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-35_torch.bfloat16_4bit_lf,0.7576666666666667,0.7960640143421251,0.7576666666666667,0.769346697622254,1.0
26
- 0.4,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-70_torch.bfloat16_4bit_lf,0.7743333333333333,0.8042791719587958,0.7743333333333333,0.7849233169481004,1.0
27
- 0.6,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-105_torch.bfloat16_4bit_lf,0.7053333333333334,0.8070587351344375,0.7053333333333334,0.7421985241641746,1.0
28
- 0.8,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-140_torch.bfloat16_4bit_lf,0.795,0.817457691710893,0.795,0.8027552955647029,1.0
29
- 1.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-175_torch.bfloat16_4bit_lf,0.7786666666666666,0.8220512342362645,0.7786666666666666,0.7938353741035283,1.0
30
- 1.2,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-210_torch.bfloat16_4bit_lf,0.7516666666666667,0.8264680853251051,0.7516666666666667,0.7787088167337303,1.0
31
- 1.4,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-245_torch.bfloat16_4bit_lf,0.7876666666666666,0.8154190698395475,0.7876666666666666,0.7965399224841393,1.0
32
- 1.6,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-280_torch.bfloat16_4bit_lf,0.7753333333333333,0.8181125383376948,0.7753333333333333,0.7899794199099057,1.0
33
- 1.8,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-315_torch.bfloat16_4bit_lf,0.7583333333333333,0.8179523170315577,0.7583333333333333,0.7795358413482081,1.0
34
- 2.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-350_torch.bfloat16_4bit_lf,0.7616666666666667,0.8208475549648238,0.7616666666666667,0.7826736174247095,1.0
35
- 0.0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct_torch.float16_lf,0.683,0.7493103872717293,0.683,0.710140098232232,0.9996666666666668
36
- 0.2,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-35_torch.float16_lf,0.725,0.7840171468707405,0.725,0.748994536667058,0.9996666666666668
37
- 0.4,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-70_torch.float16_lf,0.759,0.8005303465799652,0.759,0.7748745026535183,1.0
38
- 0.6,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-105_torch.float16_lf,0.6926666666666667,0.8039176975550218,0.6926666666666667,0.7332481528585848,1.0
39
- 0.8,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-140_torch.float16_lf,0.725,0.7952719247171957,0.725,0.7476238017654298,1.0
40
- 1.0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-175_torch.float16_lf,0.6756666666666666,0.7810148934939715,0.6756666666666666,0.708653993277772,1.0
41
- 1.2,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-210_torch.float16_lf,0.7013333333333334,0.7969562600853992,0.7013333333333334,0.7362679665494508,1.0
42
- 1.4,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-245_torch.float16_lf,0.7326666666666667,0.7922538479314682,0.7326666666666667,0.755402136631717,0.9996666666666668
43
- 1.6,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-280_torch.float16_lf,0.6983333333333334,0.785127298428753,0.6983333333333334,0.7292251109166867,1.0
44
- 1.8,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-315_torch.float16_lf,0.6783333333333333,0.785390767631834,0.6783333333333333,0.7164131321837346,1.0
45
- 2.0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-350_torch.float16_lf,0.689,0.7929715746898984,0.689,0.7259993126510194,1.0
46
- 0.0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct_torch.bfloat16_4bit_lf,0.7516666666666667,0.7949378981748352,0.7516666666666667,0.7572499605227642,0.9773333333333334
47
- 0.2,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-35_torch.bfloat16_4bit_lf,0.7583333333333333,0.8199928526815756,0.7583333333333333,0.782751089787442,1.0
48
- 0.4,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-70_torch.bfloat16_4bit_lf,0.7366666666666667,0.8224865755517643,0.7366666666666667,0.7700627366337021,1.0
49
- 0.6,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-105_torch.bfloat16_4bit_lf,0.757,0.8253824826209251,0.757,0.784000409833628,1.0
50
- 0.8,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-140_torch.bfloat16_4bit_lf,0.7893333333333333,0.8229104753645825,0.7893333333333333,0.8033124955993173,1.0
51
- 1.0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-175_torch.bfloat16_4bit_lf,0.7376666666666667,0.8243654864769323,0.7376666666666667,0.7699617360961548,1.0
52
- 1.2,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-210_torch.bfloat16_4bit_lf,0.763,0.8318882808702871,0.763,0.7901075708186186,1.0
53
- 1.4,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-245_torch.bfloat16_4bit_lf,0.7656666666666667,0.8288272203240518,0.7656666666666667,0.790627109330698,1.0
54
- 1.6,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-280_torch.bfloat16_4bit_lf,0.7693333333333333,0.8292798021666021,0.7693333333333333,0.7930169589012503,1.0
55
- 1.8,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-315_torch.bfloat16_4bit_lf,0.784,0.8354349234761956,0.784,0.804194683154365,1.0
56
- 2.0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-350_torch.bfloat16_4bit_lf,0.7736666666666666,0.8330147983140184,0.7736666666666666,0.7973657072550873,1.0
57
- 0.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat_torch.float16_lf,0.742,0.7477056799746837,0.742,0.7371050181385632,0.8033333333333333
58
- 0.2,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-35_torch.float16_lf,0.709,0.7987219597893886,0.709,0.7427961200958145,1.0
59
- 0.4,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-70_torch.float16_lf,0.7163333333333334,0.8058657875960304,0.7163333333333334,0.7487811196109319,0.9993333333333332
60
- 0.6,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-105_torch.float16_lf,0.6996666666666667,0.802722482275839,0.6996666666666667,0.7370938556711591,1.0
61
- 0.8,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-140_torch.float16_lf,0.7716666666666666,0.8092193821623755,0.7716666666666666,0.7864287269398251,1.0
62
- 1.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-175_torch.float16_lf,0.78,0.810582723471486,0.78,0.7924651054056209,1.0
63
- 1.2,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-210_torch.float16_lf,0.7313333333333333,0.8157783263996798,0.7313333333333333,0.7628807622782868,1.0
64
- 1.4,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-245_torch.float16_lf,0.751,0.8125856808988221,0.751,0.7745416635653988,1.0
65
- 1.6,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-280_torch.float16_lf,0.739,0.8097375095673094,0.739,0.7662329023371559,1.0
66
- 1.8,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-315_torch.float16_lf,0.7236666666666667,0.8145530585912838,0.7236666666666667,0.7580428816095297,1.0
67
- 2.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-350_torch.float16_lf,0.7293333333333333,0.8151184301713545,0.7293333333333333,0.7616699266814145,1.0
68
- 0.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat_torch.bfloat16_4bit_lf,0.7636666666666667,0.7806653325131986,0.7636666666666667,0.7525813484548423,0.0096666666666666
69
- 0.2,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-35_torch.bfloat16_4bit_lf,0.778,0.8148707737020212,0.778,0.7910805488003003,0.9996666666666668
70
- 0.4,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-70_torch.bfloat16_4bit_lf,0.7306666666666667,0.8145782271710159,0.7306666666666667,0.7624724104697406,1.0
71
- 0.6,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-105_torch.bfloat16_4bit_lf,0.7193333333333334,0.8213567226911125,0.7193333333333334,0.7560702640626931,1.0
72
- 0.8,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-140_torch.bfloat16_4bit_lf,0.7563333333333333,0.826789897753756,0.7563333333333333,0.7815164366677209,1.0
73
- 1.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-175_torch.bfloat16_4bit_lf,0.7963333333333333,0.8248972880055918,0.7963333333333333,0.8076868978089201,1.0
74
- 1.2,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-210_torch.bfloat16_4bit_lf,0.7326666666666667,0.8265345821998035,0.7326666666666667,0.7644418492070342,1.0
75
- 1.4,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-245_torch.bfloat16_4bit_lf,0.7556666666666667,0.8258994609525315,0.7556666666666667,0.7820405339757727,1.0
76
- 1.6,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-280_torch.bfloat16_4bit_lf,0.757,0.8264461657684251,0.757,0.7834496144681513,1.0
77
- 1.8,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-315_torch.bfloat16_4bit_lf,0.7546666666666667,0.8277723752096544,0.7546666666666667,0.7823584779069335,1.0
78
- 2.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-350_torch.bfloat16_4bit_lf,0.7496666666666667,0.8282310230333227,0.7496666666666667,0.7791947625361637,1.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat_torch.float16_lf,0.7343333333333333,0.7375752740091942,0.7343333333333333,0.7270283652909943,0.8033333333333333
3
+ 0.2,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-35_torch.float16_lf,0.717,0.7933072428707201,0.717,0.7447412977676989,1.0
4
+ 0.4,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-70_torch.float16_lf,0.7226666666666667,0.7983383063141186,0.7226666666666667,0.7489397350174751,0.9993333333333332
5
+ 0.6,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-105_torch.float16_lf,0.7083333333333334,0.7967030927405547,0.7083333333333334,0.738836849803633,1.0
6
+ 0.8,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-140_torch.float16_lf,0.7773333333333333,0.805139129977305,0.7773333333333333,0.7882159693114585,1.0
7
+ 1.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-175_torch.float16_lf,0.7853333333333333,0.8062405645226312,0.7853333333333333,0.7938991590982061,1.0
8
+ 1.2,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-210_torch.float16_lf,0.7436666666666667,0.8148316221752646,0.7436666666666667,0.7689773286065246,1.0
9
+ 1.4,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-245_torch.float16_lf,0.759,0.8080929326806991,0.759,0.7772842274293189,1.0
10
+ 1.6,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-280_torch.float16_lf,0.745,0.8027959680086005,0.745,0.7666181725503965,1.0
11
+ 1.8,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-315_torch.float16_lf,0.7303333333333333,0.806805925253305,0.7303333333333333,0.7580841794383364,1.0
12
+ 2.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-350_torch.float16_lf,0.737,0.808786608325944,0.737,0.7629963845364953,1.0
13
+ 0.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat_torch.bfloat16_4bit_lf,0.7646666666666667,0.7804609488644828,0.7646666666666667,0.7497548621711109,0.0096666666666666
14
+ 0.2,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-35_torch.bfloat16_4bit_lf,0.784,0.8105343792887019,0.784,0.7931742141608462,0.9996666666666668
15
+ 0.4,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-70_torch.bfloat16_4bit_lf,0.7426666666666667,0.8117033235947096,0.7426666666666667,0.7673825750808414,1.0
16
+ 0.6,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-105_torch.bfloat16_4bit_lf,0.736,0.8227236574891071,0.736,0.7650739090144549,1.0
17
+ 0.8,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-140_torch.bfloat16_4bit_lf,0.7686666666666667,0.8259659464402258,0.7686666666666667,0.7880870865039342,1.0
18
+ 1.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-175_torch.bfloat16_4bit_lf,0.809,0.8282732906153989,0.809,0.8166997776775797,1.0
19
+ 1.2,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-210_torch.bfloat16_4bit_lf,0.75,0.8287348768409003,0.75,0.7741734526674708,1.0
20
+ 1.4,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-245_torch.bfloat16_4bit_lf,0.7703333333333333,0.8271894042316865,0.7703333333333333,0.7907617274354051,1.0
21
+ 1.6,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-280_torch.bfloat16_4bit_lf,0.776,0.8315436250878178,0.776,0.7959870550088912,1.0
22
+ 1.8,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-315_torch.bfloat16_4bit_lf,0.7733333333333333,0.8327336470976,0.7733333333333333,0.7947537193805649,1.0
23
+ 2.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-350_torch.bfloat16_4bit_lf,0.7686666666666667,0.8329633784586954,0.7686666666666667,0.7914454794587963,1.0
24
+ 0.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat_torch.float16_lf,0.6923333333333334,0.7009179792741449,0.6923333333333334,0.6605899639694456,0.0116666666666666
25
+ 0.2,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-35_torch.float16_lf,0.706,0.7832545046834243,0.706,0.7323466131711432,1.0
26
+ 0.4,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-70_torch.float16_lf,0.7476666666666667,0.7836120158306894,0.7476666666666667,0.7557791381509955,1.0
27
+ 0.6,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-105_torch.float16_lf,0.6736666666666666,0.7908140272002406,0.6736666666666666,0.7129951145360993,1.0
28
+ 0.8,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-140_torch.float16_lf,0.7293333333333333,0.788387677637057,0.7293333333333333,0.7494137469900564,1.0
29
+ 1.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-175_torch.float16_lf,0.74,0.7833068129490098,0.74,0.7499935485741815,1.0
30
+ 1.2,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-210_torch.float16_lf,0.7146666666666667,0.7890760288118991,0.7146666666666667,0.7411240160229633,1.0
31
+ 1.4,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-245_torch.float16_lf,0.7616666666666667,0.789634957005121,0.7616666666666667,0.7721210086098353,1.0
32
+ 1.6,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-280_torch.float16_lf,0.7296666666666667,0.7854982015370922,0.7296666666666667,0.7491267995936699,1.0
33
+ 1.8,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-315_torch.float16_lf,0.7076666666666667,0.7877874532247918,0.7076666666666667,0.7346283562321456,1.0
34
+ 2.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-350_torch.float16_lf,0.713,0.7895690867103055,0.713,0.739013227401175,1.0
35
+ 0.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat_torch.bfloat16_lf,0.7063333333333334,0.7369785607161373,0.7063333333333334,0.6895815239121195,1.0
36
+ 0.2,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-35_torch.bfloat16_lf,0.729,0.7861122408311365,0.729,0.7385163226667387,1.0
37
+ 0.4,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-70_torch.bfloat16_lf,0.7336666666666667,0.7857703796539939,0.7336666666666667,0.7427841254119673,1.0
38
+ 0.6,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-105_torch.bfloat16_lf,0.6876666666666666,0.8030976203819039,0.6876666666666666,0.7170750416800897,1.0
39
+ 0.8,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-140_torch.bfloat16_lf,0.762,0.8063331692665241,0.762,0.7740172985498378,1.0
40
+ 1.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-175_torch.bfloat16_lf,0.7416666666666667,0.812190204769964,0.7416666666666667,0.761129466343473,1.0
41
+ 1.2,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-210_torch.bfloat16_lf,0.7443333333333333,0.8084922204218251,0.7443333333333333,0.7599422989743019,1.0
42
+ 1.4,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-245_torch.bfloat16_lf,0.762,0.8089123492151512,0.762,0.7753217972757948,1.0
43
+ 1.6,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-280_torch.bfloat16_lf,0.733,0.8092774765454144,0.733,0.7535080746086277,1.0
44
+ 1.8,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-315_torch.bfloat16_lf,0.7156666666666667,0.814456776214162,0.7156666666666667,0.744622807072089,1.0
45
+ 2.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-350_torch.bfloat16_lf,0.725,0.8148156790328904,0.725,0.7509650741005044,1.0
46
+ 0.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m_torch.bfloat16_lf,0.4923333333333333,0.7570993062022159,0.4923333333333333,0.5279738886353613,0.9986666666666668
47
+ 0.2,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-35_torch.bfloat16_lf,0.7843333333333333,0.7907732469871145,0.7843333333333333,0.7839137508042926,1.0
48
+ 0.4,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-70_torch.bfloat16_lf,0.7876666666666666,0.7961110449860888,0.7876666666666666,0.790011839264191,1.0
49
+ 0.6,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-105_torch.bfloat16_lf,0.74,0.818451985781803,0.74,0.7654385146358808,1.0
50
+ 0.8,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-140_torch.bfloat16_lf,0.8076666666666666,0.8048844422436796,0.8076666666666666,0.8049749805997191,1.0
51
+ 1.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-175_torch.bfloat16_lf,0.7796666666666666,0.8115925869684188,0.7796666666666666,0.7917308842405348,1.0
52
+ 1.2,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-210_torch.bfloat16_lf,0.7816666666666666,0.8082575556171326,0.7816666666666666,0.7920155623671598,1.0
53
+ 1.4,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-245_torch.bfloat16_lf,0.7736666666666666,0.8074649930391711,0.7736666666666666,0.7846002379939621,1.0
54
+ 1.6,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-280_torch.bfloat16_lf,0.771,0.8124579857634519,0.771,0.7859698091956198,1.0
55
+ 1.8,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-315_torch.bfloat16_lf,0.7646666666666667,0.8211516901334176,0.7646666666666667,0.7848541283802248,1.0
56
+ 2.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-350_torch.bfloat16_lf,0.77,0.8144910397034413,0.77,0.7862970454955438,1.0
57
+ 0.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat_torch.bfloat16_4bit_lf,0.575,0.7745319004159336,0.575,0.6416875854199033,0.6726666666666666
58
+ 0.2,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-35_torch.bfloat16_4bit_lf,0.7723333333333333,0.8004877872664371,0.7723333333333333,0.7800315047324102,1.0
59
+ 0.4,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-70_torch.bfloat16_4bit_lf,0.788,0.808878367860496,0.788,0.7952965901503556,1.0
60
+ 0.6,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-105_torch.bfloat16_4bit_lf,0.7223333333333334,0.8101427633407874,0.7223333333333334,0.7527524454293278,1.0
61
+ 0.8,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-140_torch.bfloat16_4bit_lf,0.8063333333333333,0.8207793607428686,0.8063333333333333,0.811239851005161,1.0
62
+ 1.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-175_torch.bfloat16_4bit_lf,0.792,0.8244746715585061,0.792,0.8028680300441688,1.0
63
+ 1.2,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-210_torch.bfloat16_4bit_lf,0.77,0.8305821984199763,0.77,0.7905012003721434,1.0
64
+ 1.4,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-245_torch.bfloat16_4bit_lf,0.8033333333333333,0.8215999742478901,0.8033333333333333,0.8087445768968825,1.0
65
+ 1.6,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-280_torch.bfloat16_4bit_lf,0.795,0.8261993807231882,0.795,0.805022820640186,1.0
66
+ 1.8,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-315_torch.bfloat16_4bit_lf,0.779,0.8256828719565774,0.779,0.7946766547953676,1.0
67
+ 2.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-350_torch.bfloat16_4bit_lf,0.7826666666666666,0.8284951420712369,0.7826666666666666,0.7978785507522372,1.0
68
+ 0.0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct_torch.float16_lf,0.4383333333333333,0.5292917259914629,0.4383333333333333,0.4228687599248655,0.594
69
+ 0.2,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-35_torch.float16_lf,0.5223333333333333,0.5704911830866488,0.5223333333333333,0.454387436259078,1.0
70
+ 0.4,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-70_torch.float16_lf,0.542,0.6358012674347429,0.542,0.5272438410312219,1.0
71
+ 0.6,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-105_torch.float16_lf,0.4463333333333333,0.6477441598024034,0.4463333333333333,0.4917457459702999,1.0
72
+ 0.8,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-140_torch.float16_lf,0.5053333333333333,0.6438300456580985,0.5053333333333333,0.4995247505211914,1.0
73
+ 1.0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-175_torch.float16_lf,0.558,0.6560369730369926,0.558,0.5632487818615118,1.0
74
+ 1.2,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-210_torch.float16_lf,0.5453333333333333,0.6357935773889876,0.5453333333333333,0.5594242895140294,1.0
75
+ 1.4,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-245_torch.float16_lf,0.5903333333333334,0.6503049529377274,0.5903333333333334,0.6094397514027766,1.0
76
+ 1.6,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-280_torch.float16_lf,0.5286666666666666,0.6532851084098983,0.5286666666666666,0.5617239467523474,1.0
77
+ 1.8,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-315_torch.float16_lf,0.5336666666666666,0.6607103736450911,0.5336666666666666,0.5622949959647037,1.0
78
+ 2.0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-350_torch.float16_lf,0.5156666666666667,0.652809461208547,0.5156666666666667,0.549955024535151,1.0
79
+ 0.0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct_torch.float16_lf,0.2016666666666666,0.5269756683734005,0.2016666666666666,0.2406983532950438,0.9223333333333332
80
+ 0.2,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-35_torch.float16_lf,0.4853333333333333,0.654166887199198,0.4853333333333333,0.5381849571995003,0.9996666666666668
81
+ 0.4,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-70_torch.float16_lf,0.573,0.7037737273232145,0.573,0.6131069400231612,0.9996666666666668
82
+ 0.6,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-105_torch.float16_lf,0.539,0.7162869126454278,0.539,0.5961610389687657,1.0
83
+ 0.8,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-140_torch.float16_lf,0.6443333333333333,0.7218750831357578,0.6443333333333333,0.6721473356905486,1.0
84
+ 1.0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-175_torch.float16_lf,0.6296666666666667,0.7065049203038848,0.6296666666666667,0.6496809196018393,1.0
85
+ 1.2,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-210_torch.float16_lf,0.5836666666666667,0.7222805944180548,0.5836666666666667,0.6314346830311218,1.0
86
+ 1.4,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-245_torch.float16_lf,0.6493333333333333,0.7440287895607589,0.6493333333333333,0.6815314583590799,1.0
87
+ 1.6,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-280_torch.float16_lf,0.6293333333333333,0.7332138067544355,0.6293333333333333,0.6634330572585689,1.0
88
+ 1.8,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-315_torch.float16_lf,0.599,0.7297954686265763,0.599,0.6396292878324805,1.0
89
+ 2.0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-350_torch.float16_lf,0.6056666666666667,0.7305580205770756,0.6056666666666667,0.6426785514786738,1.0
90
+ 0.0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct_torch.float16_lf,0.5796666666666667,0.6966500240864278,0.5796666666666667,0.5506370828782681,1.0
91
+ 0.2,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-35_torch.float16_lf,0.689,0.7450174119748659,0.689,0.709114466474576,0.9986666666666668
92
+ 0.4,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-70_torch.float16_lf,0.6556666666666666,0.7590430811422313,0.6556666666666666,0.6934194398116857,1.0
93
+ 0.6,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-105_torch.float16_lf,0.6963333333333334,0.7550938479315918,0.6963333333333334,0.71844324172961,1.0
94
+ 0.8,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-140_torch.float16_lf,0.6853333333333333,0.7542524799326954,0.6853333333333333,0.7128732915785243,1.0
95
+ 1.0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-175_torch.float16_lf,0.6846666666666666,0.7564071354272528,0.6846666666666666,0.7125676758538035,1.0
96
+ 1.2,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-210_torch.float16_lf,0.6896666666666667,0.7690917466956201,0.6896666666666667,0.720231747443145,1.0
97
+ 1.4,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-245_torch.float16_lf,0.7256666666666667,0.7753705482689578,0.7256666666666667,0.7440390153124937,1.0
98
+ 1.6,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-280_torch.float16_lf,0.708,0.7659638403826392,0.708,0.7293997518219294,1.0
99
+ 1.8,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-315_torch.float16_lf,0.7056666666666667,0.7717562122699148,0.7056666666666667,0.729817759784445,1.0
100
+ 2.0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-350_torch.float16_lf,0.7003333333333334,0.7698824212888824,0.7003333333333334,0.726563613830647,1.0
101
+ 0.0,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct_torch.bfloat16_lf,0.644,0.7200261355300325,0.644,0.6101052277961244,1.0
102
+ 0.2,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-35_torch.bfloat16_lf,0.745,0.7643041174791825,0.745,0.7482828029872421,0.998
103
+ 0.4,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-70_torch.bfloat16_lf,0.7446666666666667,0.7800215227839997,0.7446666666666667,0.7576550061479678,0.9996666666666668
104
+ 0.6,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-105_torch.bfloat16_lf,0.7513333333333333,0.7996792149630704,0.7513333333333333,0.7693730206330721,0.9996666666666668
105
+ 0.8,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-140_torch.bfloat16_lf,0.75,0.7923028105975739,0.75,0.7665531868559959,1.0
106
+ 1.0,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-175_torch.bfloat16_lf,0.771,0.8005814962709542,0.771,0.7814602739241332,0.9993333333333332
107
+ 1.2,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-210_torch.bfloat16_lf,0.7443333333333333,0.79978900243777,0.7443333333333333,0.7660506505481828,1.0
108
+ 1.4,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-245_torch.bfloat16_lf,0.7486666666666667,0.7974562319123832,0.7486666666666667,0.7655275916268014,0.9993333333333332
109
+ 1.6,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-280_torch.bfloat16_lf,0.7566666666666667,0.7939852407869384,0.7566666666666667,0.7689495073735431,0.9996666666666668
110
+ 1.8,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-315_torch.bfloat16_lf,0.755,0.7940575522966016,0.755,0.7681326415137147,0.9993333333333332
111
+ 2.0,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-350_torch.bfloat16_lf,0.756,0.7982464722401461,0.756,0.7704035278260453,0.9996666666666668
112
+ 0.0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct_torch.bfloat16_4bit_lf,0.7956666666666666,0.8098073411161181,0.7956666666666666,0.7771317592221199,0.994
113
+ 0.2,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-35_torch.bfloat16_4bit_lf,0.792,0.8180793658647517,0.792,0.80166512366027,1.0
114
+ 0.4,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-70_torch.bfloat16_4bit_lf,0.7716666666666666,0.8199569804721152,0.7716666666666666,0.7895879011938259,1.0
115
+ 0.6,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-105_torch.bfloat16_4bit_lf,0.798,0.8379062379534957,0.798,0.812148680520218,1.0
116
+ 0.8,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-140_torch.bfloat16_4bit_lf,0.8213333333333334,0.8447926258362122,0.8213333333333334,0.8299486611547571,1.0
117
+ 1.0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-175_torch.bfloat16_4bit_lf,0.7643333333333333,0.8235366724638146,0.7643333333333333,0.7858148913986999,1.0
118
+ 1.2,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-210_torch.bfloat16_4bit_lf,0.7986666666666666,0.83233218480008,0.7986666666666666,0.8115886421806521,1.0
119
+ 1.4,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-245_torch.bfloat16_4bit_lf,0.7923333333333333,0.8231874218285514,0.7923333333333333,0.803363661387202,1.0
120
+ 1.6,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-280_torch.bfloat16_4bit_lf,0.7936666666666666,0.8268750473800219,0.7936666666666666,0.8057720333101867,1.0
121
+ 1.8,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-315_torch.bfloat16_4bit_lf,0.801,0.830389411421043,0.801,0.8117656427717702,1.0
122
+ 2.0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-350_torch.bfloat16_4bit_lf,0.795,0.8280696193638868,0.795,0.8068114730639832,1.0
data/internlm2_5-20b-chat_metrics.csv CHANGED
@@ -1,12 +1,12 @@
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat_torch.bfloat16_4bit_lf,0.564,0.7745256693833624,0.564,0.6352190975436365,0.6726666666666666
3
- 0.2,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-35_torch.bfloat16_4bit_lf,0.7576666666666667,0.7960640143421251,0.7576666666666667,0.769346697622254,1.0
4
- 0.4,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-70_torch.bfloat16_4bit_lf,0.7743333333333333,0.8042791719587958,0.7743333333333333,0.7849233169481004,1.0
5
- 0.6,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-105_torch.bfloat16_4bit_lf,0.7053333333333334,0.8070587351344375,0.7053333333333334,0.7421985241641746,1.0
6
- 0.8,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-140_torch.bfloat16_4bit_lf,0.795,0.817457691710893,0.795,0.8027552955647029,1.0
7
- 1.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-175_torch.bfloat16_4bit_lf,0.7786666666666666,0.8220512342362645,0.7786666666666666,0.7938353741035283,1.0
8
- 1.2,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-210_torch.bfloat16_4bit_lf,0.7516666666666667,0.8264680853251051,0.7516666666666667,0.7787088167337303,1.0
9
- 1.4,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-245_torch.bfloat16_4bit_lf,0.7876666666666666,0.8154190698395475,0.7876666666666666,0.7965399224841393,1.0
10
- 1.6,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-280_torch.bfloat16_4bit_lf,0.7753333333333333,0.8181125383376948,0.7753333333333333,0.7899794199099057,1.0
11
- 1.8,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-315_torch.bfloat16_4bit_lf,0.7583333333333333,0.8179523170315577,0.7583333333333333,0.7795358413482081,1.0
12
- 2.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-350_torch.bfloat16_4bit_lf,0.7616666666666667,0.8208475549648238,0.7616666666666667,0.7826736174247095,1.0
 
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat_torch.bfloat16_4bit_lf,0.575,0.7745319004159336,0.575,0.6416875854199033,0.6726666666666666
3
+ 0.2,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-35_torch.bfloat16_4bit_lf,0.7723333333333333,0.8004877872664371,0.7723333333333333,0.7800315047324102,1.0
4
+ 0.4,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-70_torch.bfloat16_4bit_lf,0.788,0.808878367860496,0.788,0.7952965901503556,1.0
5
+ 0.6,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-105_torch.bfloat16_4bit_lf,0.7223333333333334,0.8101427633407874,0.7223333333333334,0.7527524454293278,1.0
6
+ 0.8,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-140_torch.bfloat16_4bit_lf,0.8063333333333333,0.8207793607428686,0.8063333333333333,0.811239851005161,1.0
7
+ 1.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-175_torch.bfloat16_4bit_lf,0.792,0.8244746715585061,0.792,0.8028680300441688,1.0
8
+ 1.2,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-210_torch.bfloat16_4bit_lf,0.77,0.8305821984199763,0.77,0.7905012003721434,1.0
9
+ 1.4,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-245_torch.bfloat16_4bit_lf,0.8033333333333333,0.8215999742478901,0.8033333333333333,0.8087445768968825,1.0
10
+ 1.6,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-280_torch.bfloat16_4bit_lf,0.795,0.8261993807231882,0.795,0.805022820640186,1.0
11
+ 1.8,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-315_torch.bfloat16_4bit_lf,0.779,0.8256828719565774,0.779,0.7946766547953676,1.0
12
+ 2.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-350_torch.bfloat16_4bit_lf,0.7826666666666666,0.8284951420712369,0.7826666666666666,0.7978785507522372,1.0
data/internlm2_5-20b-chat_shots_metrics.csv CHANGED
@@ -1,2 +1,2 @@
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/shots-00,0.564,0.7745256693833624,0.564,0.6352190975436365,0.6726666666666666
 
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/shots-00,0.575,0.7745319004159336,0.575,0.6416875854199033,0.6726666666666666
data/internlm2_5-7b-chat-1m_metrics.csv CHANGED
@@ -1,12 +1,12 @@
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m_torch.bfloat16_lf,0.48133333333333334,0.7605248207587668,0.48133333333333334,0.5244515621126862,0.9986666666666667
3
- 0.2,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-35_torch.bfloat16_lf,0.7843333333333333,0.7977648302848388,0.7843333333333333,0.7864944570659659,1.0
4
- 0.4,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-70_torch.bfloat16_lf,0.7836666666666666,0.7996977262947886,0.7836666666666666,0.7886881726841081,1.0
5
- 0.6,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-105_torch.bfloat16_lf,0.7243333333333334,0.8171172705912051,0.7243333333333334,0.7565804830382912,1.0
6
- 0.8,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-140_torch.bfloat16_lf,0.803,0.8031411888150441,0.803,0.8028064320197301,1.0
7
- 1.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-175_torch.bfloat16_lf,0.7676666666666667,0.8108441731715863,0.7676666666666667,0.7843187816704813,1.0
8
- 1.2,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-210_torch.bfloat16_lf,0.7736666666666666,0.8091671780923799,0.7736666666666666,0.7876874850235454,1.0
9
- 1.4,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-245_torch.bfloat16_lf,0.7623333333333333,0.8062291602218205,0.7623333333333333,0.777669094563925,1.0
10
- 1.6,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-280_torch.bfloat16_lf,0.7553333333333333,0.8086197936829652,0.7553333333333333,0.7755588811428297,1.0
11
- 1.8,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-315_torch.bfloat16_lf,0.748,0.8171996792797457,0.748,0.773990849396903,1.0
12
- 2.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-350_torch.bfloat16_lf,0.756,0.8126875394266148,0.756,0.7777812522863184,1.0
 
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m_torch.bfloat16_lf,0.49233333333333335,0.7570993062022159,0.49233333333333335,0.5279738886353613,0.9986666666666667
3
+ 0.2,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-35_torch.bfloat16_lf,0.7843333333333333,0.7907732469871145,0.7843333333333333,0.7839137508042926,1.0
4
+ 0.4,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-70_torch.bfloat16_lf,0.7876666666666666,0.7961110449860888,0.7876666666666666,0.790011839264191,1.0
5
+ 0.6,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-105_torch.bfloat16_lf,0.74,0.818451985781803,0.74,0.7654385146358808,1.0
6
+ 0.8,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-140_torch.bfloat16_lf,0.8076666666666666,0.8048844422436796,0.8076666666666666,0.8049749805997191,1.0
7
+ 1.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-175_torch.bfloat16_lf,0.7796666666666666,0.8115925869684188,0.7796666666666666,0.7917308842405348,1.0
8
+ 1.2,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-210_torch.bfloat16_lf,0.7816666666666666,0.8082575556171326,0.7816666666666666,0.7920155623671598,1.0
9
+ 1.4,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-245_torch.bfloat16_lf,0.7736666666666666,0.8074649930391711,0.7736666666666666,0.7846002379939621,1.0
10
+ 1.6,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-280_torch.bfloat16_lf,0.771,0.8124579857634519,0.771,0.7859698091956198,1.0
11
+ 1.8,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-315_torch.bfloat16_lf,0.7646666666666667,0.8211516901334176,0.7646666666666667,0.7848541283802248,1.0
12
+ 2.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-350_torch.bfloat16_lf,0.77,0.8144910397034413,0.77,0.7862970454955438,1.0
data/internlm2_5-7b-chat-1m_shots_metrics.csv CHANGED
@@ -1,7 +1,8 @@
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-00,0.48133333333333334,0.7605248207587668,0.48133333333333334,0.5244515621126862,0.9986666666666667
3
- 5,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-05,0.7763333333333333,0.7640598325070357,0.7763333333333333,0.7700878172419743,0.9453333333333334
4
- 10,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-10,0.6473333333333333,0.7282065610714444,0.6473333333333333,0.665824871588245,0.8866666666666667
5
- 20,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-20,0.6733333333333333,0.7314610506764355,0.6733333333333333,0.6764198712634657,0.8213333333333334
6
- 30,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-30,0.6736666666666666,0.7482542000402412,0.6736666666666666,0.6810446770610585,0.8236666666666667
7
- 40,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-40,0.724,0.7567654663125225,0.724,0.712500180941536,0.8336666666666667
 
 
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-00,0.49233333333333335,0.7570993062022159,0.49233333333333335,0.5279738886353613,0.9986666666666667
3
+ 5,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-05,0.7753333333333333,0.7586378181445387,0.7753333333333333,0.7665405919258307,0.9453333333333334
4
+ 10,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-10,0.654,0.7251381758855274,0.654,0.6681655588675279,0.8866666666666667
5
+ 20,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-20,0.677,0.7296467412730754,0.677,0.6780570012166849,0.8213333333333334
6
+ 30,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-30,0.68,0.7425906069240685,0.68,0.6837924261094331,0.8236666666666667
7
+ 40,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-40,0.726,0.7533750344411337,0.726,0.7132456474026365,0.8336666666666667
8
+ 50,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-50,0.7173333333333334,0.7471186719787132,0.7173333333333334,0.6980283743779222,0.8846666666666667
data/internlm2_5-7b-chat_metrics.csv CHANGED
@@ -1,12 +1,12 @@
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat_torch.bfloat16_lf,0.705,0.7398041613378253,0.705,0.6906357423169466,1.0
3
- 0.2,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-35_torch.bfloat16_lf,0.7193333333333334,0.7863486093365692,0.7193333333333334,0.7330498811142795,1.0
4
- 0.4,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-70_torch.bfloat16_lf,0.726,0.7900250828103491,0.726,0.7396583495246526,1.0
5
- 0.6,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-105_torch.bfloat16_lf,0.6736666666666666,0.8044565554629858,0.6736666666666666,0.7104123104529902,1.0
6
- 0.8,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-140_torch.bfloat16_lf,0.7496666666666667,0.8041871978859686,0.7496666666666667,0.7660159670998776,1.0
7
- 1.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-175_torch.bfloat16_lf,0.726,0.8094634420846424,0.726,0.751394838822856,1.0
8
- 1.2,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-210_torch.bfloat16_lf,0.7276666666666667,0.8039673699820601,0.7276666666666667,0.7488653386949028,1.0
9
- 1.4,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-245_torch.bfloat16_lf,0.747,0.8055537753403307,0.747,0.76527383722639,1.0
10
- 1.6,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-280_torch.bfloat16_lf,0.7166666666666667,0.8059535682746547,0.7166666666666667,0.7432427946178835,1.0
11
- 1.8,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-315_torch.bfloat16_lf,0.6983333333333334,0.8119110469658597,0.6983333333333334,0.7347246872892312,1.0
12
- 2.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-350_torch.bfloat16_lf,0.7076666666666667,0.8120132783051135,0.7076666666666667,0.7408145046817652,1.0
 
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat_torch.bfloat16_lf,0.7063333333333334,0.7369785607161373,0.7063333333333334,0.6895815239121195,1.0
3
+ 0.2,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-35_torch.bfloat16_lf,0.729,0.7861122408311365,0.729,0.7385163226667387,1.0
4
+ 0.4,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-70_torch.bfloat16_lf,0.7336666666666667,0.7857703796539939,0.7336666666666667,0.7427841254119673,1.0
5
+ 0.6,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-105_torch.bfloat16_lf,0.6876666666666666,0.8030976203819039,0.6876666666666666,0.7170750416800897,1.0
6
+ 0.8,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-140_torch.bfloat16_lf,0.762,0.8063331692665241,0.762,0.7740172985498378,1.0
7
+ 1.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-175_torch.bfloat16_lf,0.7416666666666667,0.812190204769964,0.7416666666666667,0.761129466343473,1.0
8
+ 1.2,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-210_torch.bfloat16_lf,0.7443333333333333,0.8084922204218251,0.7443333333333333,0.7599422989743019,1.0
9
+ 1.4,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-245_torch.bfloat16_lf,0.762,0.8089123492151512,0.762,0.7753217972757948,1.0
10
+ 1.6,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-280_torch.bfloat16_lf,0.733,0.8092774765454144,0.733,0.7535080746086277,1.0
11
+ 1.8,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-315_torch.bfloat16_lf,0.7156666666666667,0.814456776214162,0.7156666666666667,0.744622807072089,1.0
12
+ 2.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-350_torch.bfloat16_lf,0.725,0.8148156790328904,0.725,0.7509650741005044,1.0
data/internlm2_5-7b-chat_shots_metrics.csv CHANGED
@@ -1,6 +1,8 @@
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-00,0.705,0.7398041613378253,0.705,0.6906357423169466,1.0
3
- 5,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-05,0.7476666666666667,0.746806876028684,0.7476666666666667,0.7270588443494302,0.999
4
- 10,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-10,0.5533333333333333,0.7301739373336078,0.5533333333333333,0.625097481985829,0.9883333333333333
5
- 20,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-20,0.647,0.721136036365055,0.647,0.6769738108371004,0.9473333333333334
6
- 30,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-30,0.6263333333333333,0.7256804685839701,0.6263333333333333,0.6534519727626863,0.9403333333333334
 
 
 
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-00,0.7063333333333334,0.7369785607161373,0.7063333333333334,0.6895815239121195,1.0
3
+ 5,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-05,0.747,0.7433195768374967,0.747,0.7232456014841266,0.999
4
+ 10,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-10,0.559,0.7306434812774306,0.559,0.6287391975839828,0.9883333333333333
5
+ 20,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-20,0.6466666666666666,0.7143354332969056,0.6466666666666666,0.6738164117926014,0.9473333333333334
6
+ 30,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-30,0.626,0.7223442225693745,0.626,0.6494216734706632,0.9403333333333334
7
+ 40,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-40,0.64,0.7020393671564193,0.64,0.611996460461355,0.9813333333333333
8
+ 50,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-50,0.6116666666666667,0.6808793455512054,0.6116666666666667,0.5502581431071487,0.9803333333333333
data/openai_metrics.csv CHANGED
@@ -1,26 +1,29 @@
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0,gpt-4o-mini,gpt-4o-mini/shots-00,0.7176666666666667,0.785706730193659,0.7176666666666667,0.7296061848734905,0.9916666666666667
3
- 0,gpt-4o,gpt-4o/shots-00,0.782,0.8204048322982596,0.782,0.7953019682198627,0.066
4
- 0,o1-mini,o1-mini/shots-00,0.7083333333333334,0.7848098266888749,0.7083333333333334,0.7377068425566796,0.999
5
- 0,o1-preview,o1-preview/shots-00,0.721,0.7849371317342158,0.721,0.7451207069815194,0.998
6
- 5,gpt-4o-mini,gpt-4o-mini/shots-05,0.7176666666666667,0.7767294185987051,0.7176666666666667,0.7181068311028772,0.9996666666666667
7
- 5,gpt-4o,gpt-4o/shots-05,0.7873333333333333,0.8230974205170392,0.7873333333333333,0.8000290527498529,0.998
8
- 5,o1-mini,o1-mini/shots-05,0.724,0.7905045610386181,0.724,0.7482963122126776,0.9966666666666667
9
- 5,o1-preview,o1-preview/shots-05,0.7313333333333333,0.7878283093765627,0.7313333333333333,0.7535489719321234,0.979
10
- 10,gpt-4o-mini,gpt-4o-mini/shots-10,0.6793333333333333,0.7728086050218999,0.6793333333333333,0.6916749681933937,0.9983333333333333
11
- 10,gpt-4o,gpt-4o/shots-10,0.7916666666666666,0.8227707658360168,0.7916666666666666,0.803614688453356,0.9996666666666667
12
- 10,o1-mini,o1-mini/shots-10,0.725,0.7892485648334764,0.725,0.7485623974683336,0.9943333333333333
13
- 10,o1-preview,o1-preview/shots-10,0.749,0.7964482186234537,0.749,0.7677316493549238,0.9873333333333333
14
- 20,gpt-4o-mini,gpt-4o-mini/shots-20,0.6623333333333333,0.7686706009175459,0.6623333333333333,0.6798015109939115,0.998
15
- 20,gpt-4o,gpt-4o/shots-20,0.7816666666666666,0.8204541793856629,0.7816666666666666,0.7967017169880498,0.9993333333333333
16
- 20,o1-mini,o1-mini/shots-20,0.7343333333333333,0.786101455887261,0.7343333333333333,0.7535300565051624,0.9946666666666667
17
- 20,o1-preview,o1-preview/shots-20,0.7443333333333333,0.7911442834260676,0.7443333333333333,0.7625144090816939,0.9853333333333333
18
- 30,gpt-4o-mini,gpt-4o-mini/shots-30,0.6873333333333334,0.7684209723431035,0.6873333333333334,0.6913018667081989,0.999
19
- 30,gpt-4o,gpt-4o/shots-30,0.7886666666666666,0.8260847852316618,0.7886666666666666,0.8030949295928699,0.999
20
- 30,o1-preview,o1-preview/shots-30,0.7473333333333333,0.7920604378746952,0.7473333333333333,0.7643977099599287,0.984
21
- 40,gpt-4o-mini,gpt-4o-mini/shots-40,0.6923333333333334,0.7639874967862498,0.6923333333333334,0.6924934068935911,0.9986666666666667
22
- 40,gpt-4o,gpt-4o/shots-40,0.784,0.8233509309291644,0.784,0.7993336791122846,0.9973333333333333
23
- 40,o1-preview,o1-preview/shots-40,0.7506666666666667,0.7964679024468982,0.7506666666666667,0.7674109766459014,0.984
24
- 50,gpt-4o-mini,gpt-4o-mini/shots-50,0.717,0.7692638634416518,0.717,0.7105227254860433,0.9993333333333333
25
- 50,gpt-4o,gpt-4o/shots-50,0.787,0.8234800466218334,0.787,0.8013530974301947,0.9993333333333333
26
- 50,o1-preview,o1-preview/shots-50,0.7546666666666667,0.7979981023789272,0.7546666666666667,0.7708181822112403,0.9816666666666667
 
 
 
 
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0,gpt-4o-mini,gpt-4o-mini/shots-00,0.7166666666666667,0.7800918028217227,0.7166666666666667,0.7260056154268697,0.9916666666666667
3
+ 0,gpt-4o,gpt-4o/shots-00,0.792,0.8234582231232066,0.792,0.8022633746318892,0.066
4
+ 0,o1-mini,o1-mini/shots-00,0.7133333333333334,0.78301872209321,0.7133333333333334,0.7402734333211688,0.999
5
+ 0,o1-preview,o1-preview/shots-00,0.725,0.7860443296236067,0.725,0.7471736898827371,0.998
6
+ 5,gpt-4o-mini,gpt-4o-mini/shots-05,0.7203333333333334,0.7754800244789168,0.7203333333333334,0.718540502683781,0.9996666666666667
7
+ 5,gpt-4o,gpt-4o/shots-05,0.7973333333333333,0.8251066339666824,0.7973333333333333,0.8066429877716694,0.998
8
+ 5,o1-mini,o1-mini/shots-05,0.7313333333333333,0.7913577967036569,0.7313333333333333,0.7532525881890013,0.9966666666666667
9
+ 5,o1-preview,o1-preview/shots-05,0.736,0.789169445854742,0.736,0.7557068489703724,0.979
10
+ 10,gpt-4o-mini,gpt-4o-mini/shots-10,0.6836666666666666,0.7701177891593667,0.6836666666666666,0.6932016303210964,0.9983333333333333
11
+ 10,gpt-4o,gpt-4o/shots-10,0.8013333333333333,0.8246834383036209,0.8013333333333333,0.8098901724387172,0.9996666666666667
12
+ 10,o1-mini,o1-mini/shots-10,0.7283333333333334,0.7851844846890333,0.7283333333333334,0.7490987096521479,0.9943333333333333
13
+ 10,o1-preview,o1-preview/shots-10,0.7513333333333333,0.7947574632958824,0.7513333333333333,0.7673707529850041,0.9873333333333333
14
+ 20,gpt-4o-mini,gpt-4o-mini/shots-20,0.6616666666666666,0.7627467933668375,0.6616666666666666,0.677372757519069,0.998
15
+ 20,gpt-4o,gpt-4o/shots-20,0.79,0.822098231279132,0.79,0.8020290214439503,0.9993333333333333
16
+ 20,o1-mini,o1-mini/shots-20,0.7373333333333333,0.7815727856803751,0.7373333333333333,0.7533353509620383,0.9946666666666667
17
+ 20,o1-preview,o1-preview/shots-20,0.7483333333333333,0.790639591375103,0.7483333333333333,0.763324860719675,0.9853333333333333
18
+ 30,gpt-4o-mini,gpt-4o-mini/shots-30,0.6876666666666666,0.7663381611066244,0.6876666666666666,0.6896169854446027,0.999
19
+ 30,gpt-4o,gpt-4o/shots-30,0.7946666666666666,0.8259436682564079,0.7946666666666666,0.8063113377291872,0.999
20
+ 30,o1-mini,o1-mini/shots-30,0.748,0.779168441371953,0.748,0.7583397172973073,0.9976666666666667
21
+ 30,o1-preview,o1-preview/shots-30,0.7513333333333333,0.792049804996314,0.7513333333333333,0.7654800949250774,0.984
22
+ 40,gpt-4o-mini,gpt-4o-mini/shots-40,0.6903333333333334,0.7603850760051853,0.6903333333333334,0.688393665975117,0.9986666666666667
23
+ 40,gpt-4o,gpt-4o/shots-40,0.7906666666666666,0.8242154446428003,0.7906666666666666,0.803356987717753,0.9973333333333333
24
+ 40,o1-mini,o1-mini/shots-40,0.7496666666666667,0.775765877349714,0.7496666666666667,0.757640226210139,0.9976666666666667
25
+ 40,o1-preview,o1-preview/shots-40,0.7526666666666667,0.795308022968859,0.7526666666666667,0.7672762517397222,0.984
26
+ 50,gpt-4o-mini,gpt-4o-mini/shots-50,0.7143333333333334,0.7654214682013311,0.7143333333333334,0.7056961582308003,0.9993333333333333
27
+ 50,gpt-4o,gpt-4o/shots-50,0.798,0.8274250231711487,0.798,0.8091066504350897,0.9993333333333333
28
+ 50,o1-mini,o1-mini/shots-50,0.7536666666666667,0.7755130422727871,0.7536666666666667,0.7602241520634903,0.9976666666666667
29
+ 50,o1-preview,o1-preview/shots-50,0.7576666666666667,0.7986597718440941,0.7576666666666667,0.7718331604189232,0.9816666666666667
datasets/mgtv/train.csv CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:06570ba22afc612ea7033d2fda6acf67774f662e5c60f57e4ce8e28ca2dd9b22
3
- size 20747995
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a7703f495679958e64d334d8c16ae22e30de50a7b3b6dfd98c9966dae731688
3
+ size 20697987
datasets/mgtv/val.csv ADDED
The diff for this file is too large to render. See raw diff
 
llm_toolkit/logical_reasoning_utils.py CHANGED
@@ -208,6 +208,9 @@ def extract_answer(text, debug=False):
208
  return ""
209
 
210
  def extract_answer_from_text(text, question):
 
 
 
211
  labels = ['不是', '是', '不重要', '回答正确', '问法错误']
212
  original_text = text
213
  text = text.split("回答:")[-1]
@@ -302,7 +305,7 @@ def load_logical_reasoning_dataset(
302
  ):
303
  postfix = "" if chinese_prompt else "_en"
304
  train_data_file = data_path + f"/train{postfix}.csv"
305
- test_data_file = data_path + f"/{test_data if test_data else 'dev'}{postfix}.csv"
306
 
307
  print("loading train/test data files")
308
  datasets = load_dataset(
@@ -424,7 +427,7 @@ def load_alpaca_data(data_path, using_p1=True, use_english_datasets=False):
424
  return df_alpaca
425
 
426
 
427
- def plot_value_counts(df, column_name, offset=0.1, title=None, preprocess_func=None):
428
  # font_family = rcParams["font.family"]
429
  # # Set the font to SimHei to support Chinese characters
430
  # rcParams["font.family"] = "SimHei"
@@ -440,6 +443,8 @@ def plot_value_counts(df, column_name, offset=0.1, title=None, preprocess_func=N
440
  plt.figure(figsize=(8, 4))
441
 
442
  value_counts = df[column_name].value_counts()
 
 
443
  value_counts = value_counts.rename(index=translation_dict)
444
  value_counts.plot(kind="bar")
445
 
@@ -558,7 +563,7 @@ def plot_metrics(perf_df, model_name, variant="epoch", offset=0.01):
558
  ax.set_ylabel("Accuracy and F1 Score")
559
 
560
  ax.xaxis.set_major_locator(MultipleLocator(0.2 if variant == "epoch" else 5))
561
- ax.set_title(f"Performance Analysis Across Checkpoints for the {model_name} Model")
562
 
563
  # Rotate x labels
564
  plt.xticks(rotation=0)
 
208
  return ""
209
 
210
  def extract_answer_from_text(text, question):
211
+ if True:
212
+ return extract_answer(text)
213
+
214
  labels = ['不是', '是', '不重要', '回答正确', '问法错误']
215
  original_text = text
216
  text = text.split("回答:")[-1]
 
305
  ):
306
  postfix = "" if chinese_prompt else "_en"
307
  train_data_file = data_path + f"/train{postfix}.csv"
308
+ test_data_file = data_path + f"/{test_data if test_data else 'val'}{postfix}.csv"
309
 
310
  print("loading train/test data files")
311
  datasets = load_dataset(
 
427
  return df_alpaca
428
 
429
 
430
+ def plot_value_counts(df, column_name, offset=0.1, title=None, preprocess_func=None, debug=False):
431
  # font_family = rcParams["font.family"]
432
  # # Set the font to SimHei to support Chinese characters
433
  # rcParams["font.family"] = "SimHei"
 
443
  plt.figure(figsize=(8, 4))
444
 
445
  value_counts = df[column_name].value_counts()
446
+ if debug:
447
+ print(value_counts)
448
  value_counts = value_counts.rename(index=translation_dict)
449
  value_counts.plot(kind="bar")
450
 
 
563
  ax.set_ylabel("Accuracy and F1 Score")
564
 
565
  ax.xaxis.set_major_locator(MultipleLocator(0.2 if variant == "epoch" else 5))
566
+ ax.set_title(f"Performance Analysis Across {'Checkpoints' if variant == 'epoch' else 'Shots'} for the {model_name} Model")
567
 
568
  # Rotate x labels
569
  plt.xticks(rotation=0)
notebooks/00_Data Analysis.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
notebooks/01a_internlm2_5-20b-chat_analysis.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
notebooks/01a_internlm2_5-7b-chat-1m_analysis.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
notebooks/01a_internlm2_5-7b-chat_analysis.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
notebooks/01b_Mistral-7B-v0.3-Chinese-Chat_analysis.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
notebooks/02a_Qwen2-7B-Instruct_analysis.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
notebooks/02b_Qwen2-72B-Instruct_analysis.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
notebooks/02e_Qwen2.5-1.5B-Instruct_analysis.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
notebooks/02f_Qwen2.5-0.5B-Instruct_analysis.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
notebooks/02g_Qwen2.5-72B-Instruct_analysis.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
notebooks/03a_Llama3.1-8B-Chinese-Chat_analysis.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
notebooks/03b_Llama3.1-70B-Chinese-Chat_analysis.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
notebooks/04b_OpenAI-Models_analysis.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
notebooks/06b_Open-Source-Models_analysis.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
scripts/eval-mgtv-qwen2.5_4bit.sh CHANGED
@@ -14,11 +14,12 @@ lscpu
14
  grep MemTotal /proc/meminfo
15
 
16
  export USING_LLAMA_FACTORY=true
17
- export LOAD_IN_4BIT=true
18
- $BASEDIR/scripts/eval-epochs.sh Qwen Qwen2.5-72B-Instruct
19
 
20
- export START_NUM_SHOTS=5
 
21
  $BASEDIR/scripts/eval-shots_4bit.sh Qwen Qwen2.5-72B-Instruct
22
 
23
- export START_NUM_SHOTS=40
24
- $BASEDIR/scripts/eval-shots_4bit.sh shenzhi-wang Llama3.1-70B-Chinese-Chat
 
14
  grep MemTotal /proc/meminfo
15
 
16
  export USING_LLAMA_FACTORY=true
17
+ # export LOAD_IN_4BIT=true
18
+ # $BASEDIR/scripts/eval-epochs.sh Qwen Qwen2.5-72B-Instruct
19
 
20
+ export START_NUM_SHOTS=10
21
+ export END_NUM_SHOTS=20
22
  $BASEDIR/scripts/eval-shots_4bit.sh Qwen Qwen2.5-72B-Instruct
23
 
24
+ export START_NUM_SHOTS=5
25
+ $BASEDIR/scripts/eval-shots_4bit.sh internlm internlm2_5-20b-chat
scripts/eval-mgtv.sh CHANGED
@@ -1 +1 @@
1
- eval-mgtv-qwen2.5.sh
 
1
+ eval-mgtv-qwen2.5_4bit.sh
scripts/eval-shots.sh CHANGED
@@ -5,6 +5,8 @@ cd $BASEDIR/..
5
  echo Current Directory:
6
  pwd
7
 
 
 
8
  export LOGICAL_REASONING_DATA_PATH=datasets/mgtv
9
  export RESIZE_TOKEN_EMBEDDINGS=true
10
  # export USING_LLAMA_FACTORY=true
@@ -25,10 +27,3 @@ fi
25
 
26
  echo Evaluating $MODEL_NAME with few-shot learning
27
  python llm_toolkit/eval_shots.py
28
-
29
- if [ "$MODEL" == "internlm2_5-7b-chat-1m" ];
30
- then
31
- export LOAD_IN_4BIT=true
32
- export START_NUM_SHOTS=5
33
- $BASEDIR/scripts/eval-shots.sh internlm internlm2_5-20b-chat
34
- fi
 
5
  echo Current Directory:
6
  pwd
7
 
8
+ BASEDIR=`pwd`
9
+
10
  export LOGICAL_REASONING_DATA_PATH=datasets/mgtv
11
  export RESIZE_TOKEN_EMBEDDINGS=true
12
  # export USING_LLAMA_FACTORY=true
 
27
 
28
  echo Evaluating $MODEL_NAME with few-shot learning
29
  python llm_toolkit/eval_shots.py