inflaton commited on
Commit
77dd763
·
1 Parent(s): 44cfb92

finetune results

Browse files
.gitattributes CHANGED
@@ -63,3 +63,7 @@ logs/l40-1gpu.txt filter=lfs diff=lfs merge=lfs -text
63
  logs/l40-4gpu.txt filter=lfs diff=lfs merge=lfs -text
64
  logs/openai-gpt-4o-mini.txt filter=lfs diff=lfs merge=lfs -text
65
  logs/openai-gpt-4o.txt filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
63
  logs/l40-4gpu.txt filter=lfs diff=lfs merge=lfs -text
64
  logs/openai-gpt-4o-mini.txt filter=lfs diff=lfs merge=lfs -text
65
  logs/openai-gpt-4o.txt filter=lfs diff=lfs merge=lfs -text
66
+ results/mac-results_few_shots_openai.csv filter=lfs diff=lfs merge=lfs -text
67
+ results/mac-results_fine_tuned.csv filter=lfs diff=lfs merge=lfs -text
68
+ results/mac-results_greedy_decoding_metrics.csv filter=lfs diff=lfs merge=lfs -text
69
+ results/mac-results_few_shots_metrics.csv filter=lfs diff=lfs merge=lfs -text
llm_toolkit/eval_epochs.py CHANGED
@@ -58,9 +58,10 @@ def evaluate_model_all_epochs(
58
  for i in range(start_epoch, end_epoch + 1):
59
  print(f"Epoch {i}")
60
  if i > 0:
61
- adapter_path = adapter_path_base + "/" + subdirs[i - 1]
 
62
  print(f"loading adapter: {adapter_path}")
63
- adapter_name = model.load_adapter(adapter_path)
64
  model.active_adapters = adapter_name
65
 
66
  predictions = eval_model(
 
58
  for i in range(start_epoch, end_epoch + 1):
59
  print(f"Epoch {i}")
60
  if i > 0:
61
+ adapter_name = subdirs[i - 1]
62
+ adapter_path = adapter_path_base + "/" + adapter_name
63
  print(f"loading adapter: {adapter_path}")
64
+ model.load_adapter(adapter_path, adapter_name=adapter_name)
65
  model.active_adapters = adapter_name
66
 
67
  predictions = eval_model(
results/mac-results_few_shots_metrics.csv CHANGED
@@ -1,47 +1,3 @@
1
- model,shots,meteor,bleu_1,rouge_l,ews_score,repetition_score,total_repetitions,rap,num_max_output_tokens,eval_time
2
- Qwen/Qwen2-72B-Instruct,0,0.4003638205699929,0.12223832517678616,0.38486660208459095,0.0,0.19593998234774934,0.19593998234774934,0.3970180421898014,1,8.894969108561341
3
- Qwen/Qwen2-72B-Instruct,1,0.4068727655718769,0.13151008586303575,0.3946283255752747,0.0,0.15798764342453664,0.15798764342453664,0.4041216347207881,1,8.983230361871138
4
- Qwen/Qwen2-72B-Instruct,3,0.4086244766794449,0.13771788946915253,0.3979712282250465,0.0,0.12709620476610767,0.12709620476610767,0.4063954239173824,0,11.657546337157987
5
- Qwen/Qwen2-72B-Instruct,5,0.4132330811975005,0.1439773872150899,0.40387035186513487,0.0,0.11915269196822595,0.11915269196822595,0.41111822769434864,0,17.167696381288614
6
- Qwen/Qwen2-72B-Instruct,10,0.41598174489789025,0.14493475334416772,0.4065933507975943,0.0,0.09620476610767872,0.09620476610767872,0.4142591929807702,0,29.728155339805824
7
- Qwen/Qwen2-72B-Instruct,50,0.4401536409204816,0.1538634893900684,0.41722880607716234,0.0,0.10150044130626655,0.10150044130626655,0.43823160654983345,0,112.50397175639894
8
- Qwen/Qwen2-7B-Instruct,0,0.377477070949433,0.11783492823424507,0.3678523300904837,0.0,0.07149161518093557,0.07149161518093557,0.3763128359886437,0,0.9805825242718447
9
- Qwen/Qwen2-7B-Instruct,1,0.38000752971097884,0.11731917392837622,0.371517786678723,0.0,0.07413945278022947,0.07413945278022947,0.37879237953430883,0,1.0529567519858782
10
- Qwen/Qwen2-7B-Instruct,3,0.38678180999660744,0.12368875746156333,0.3780278278830778,0.0,0.1412180052956752,0.1412180052956752,0.38444052153933106,0,1.6010591350397176
11
- Qwen/Qwen2-7B-Instruct,5,0.38784856371389564,0.1227725469820483,0.38246119910508375,0.0,0.09179170344218888,0.09179170344218888,0.38631555618548774,0,2.2894969108561343
12
- Qwen/Qwen2-7B-Instruct,10,0.38526484346757095,0.12535252418966952,0.38202725422893463,0.0,0.10326566637246248,0.10326566637246248,0.3835535147682633,0,4.006178287731686
13
- Qwen/Qwen2-7B-Instruct,50,0.3953455943001352,0.12949951844499932,0.3899754114871057,0.0,0.10061782877316858,0.10061782877316858,0.39363409715118836,0,17.46425419240953
14
- internlm/internlm2_5-7b-chat,0,0.36816799960793073,0.11360521358693174,0.3600058558701442,0.0,0.2144748455428067,0.2144748455428067,0.3648059323539847,0,1.2241835834068844
15
- internlm/internlm2_5-7b-chat,1,0.3719587471180722,0.1157707566176535,0.36379026028083117,0.0,0.14033539276257723,0.14033539276257723,0.36972107700643503,0,1.3124448367166814
16
- internlm/internlm2_5-7b-chat,3,0.3747105229822289,0.1154826016668525,0.36859373323449984,0.0,0.17740511915269197,0.17740511915269197,0.37187052462735126,0,1.8578993821712269
17
- internlm/internlm2_5-7b-chat,5,0.37285562384505977,0.11541534709366409,0.36845885184482197,0.0,0.14827890556045895,0.14827890556045895,0.37048732274065205,0,2.860547219770521
18
- internlm/internlm2_5-7b-chat,10,0.3750895095392996,0.11696492920010637,0.36774089220788087,0.0,0.13062665489849956,0.13062665489849956,0.37298723763770353,0,5.722859664607237
19
- internlm/internlm2_5-7b-chat,50,0.37213069871716603,0.11404688073207249,0.3627041392544321,0.0,0.16857899382171226,0.16857899382171226,0.3694484047441432,8,42.29214474845543
20
- shenzhi-wang/Llama3.1-70B-Chinese-Chat,0,0.3638704024273502,0.10874677881601094,0.35336472352140924,0.0,0.15445719329214475,0.15445719329214475,0.3614642386796342,0,7.8331862312444835
21
- shenzhi-wang/Llama3.1-70B-Chinese-Chat,1,0.37956764543783084,0.11805442002282653,0.36984338962652286,0.0,0.12533097969991175,0.12533097969991175,0.3775255236309064,0,8.307149161518094
22
- shenzhi-wang/Llama3.1-70B-Chinese-Chat,3,0.38622483411876246,0.12306660851355093,0.37461197525974343,0.0,0.14386584289496912,0.14386584289496912,0.38384366117983154,0,11.681376875551633
23
- shenzhi-wang/Llama3.1-70B-Chinese-Chat,5,0.3895488616778815,0.12582029733797498,0.37850976779334966,0.0,0.14386584289496912,0.14386584289496912,0.38714719527562863,0,17.23389232127096
24
- shenzhi-wang/Llama3.1-70B-Chinese-Chat,10,0.3897515010230098,0.12957008401715697,0.3804272354384455,0.0,0.1262135922330097,0.1262135922330097,0.3876399935245347,0,
25
- shenzhi-wang/Llama3.1-8B-Chinese-Chat,0,0.3476217085789916,0.09799438963103267,0.33493508618013,0.0,2.2162400706090026,2.2162400706090026,0.319817551404022,2,1.0750220653133276
26
- shenzhi-wang/Llama3.1-8B-Chinese-Chat,1,0.35430794534292803,0.10438367949419078,0.34360907692906495,0.0,0.19505736981465135,0.19505736981465135,0.3513601482457364,0,1.1571050308914386
27
- shenzhi-wang/Llama3.1-8B-Chinese-Chat,3,0.3600739839089376,0.10709900175348612,0.35151668174502293,0.0,0.14827890556045895,0.14827890556045895,0.3577868691137631,0,1.9814651368049427
28
- shenzhi-wang/Llama3.1-8B-Chinese-Chat,5,0.36231904915539526,0.11204735364530892,0.3555966636828387,0.0,0.15798764342453664,0.15798764342453664,0.3598691748988386,0,2.8146513680494265
29
- shenzhi-wang/Llama3.1-8B-Chinese-Chat,10,0.3615257973929306,0.10125226501021815,0.35395169595888565,0.0,0.8314210061782877,0.8314210061782877,0.34940648099916116,22,
30
- shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,0,0.3284997502705771,0.08313795089297474,0.31837381406868526,0.0,0.12797881729920565,0.12797881729920565,0.3266954815790356,0,1.204766107678729
31
- shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1,0.3342115436248988,0.08857909016110346,0.32376944492764814,0.0,0.6690203000882613,0.6690203000882613,0.3250691235789747,1,1.4819064430714917
32
- shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,3,0.3435621946945506,0.09605927100886698,0.33411105509944494,0.0,0.1262135922330097,0.1262135922330097,0.3417009211692762,0,2.262135922330097
33
- shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,5,0.34429078135481284,0.09638489591361771,0.33818535378281456,0.0,0.07590467784642542,0.07590467784642542,0.34316381414750663,2,3.3883495145631066
34
- shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,10,0.3408431061510028,0.09735670499814125,0.3331892932821734,0.0,0.10414827890556046,0.10414827890556046,0.339316280986861,11,6.558693733451015
35
- shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,50,0.36184201368489227,0.104864029030581,0.3457669052738412,0.0,0.5136804942630185,0.5136804942630185,0.35413782502473057,2,29.659311562224183
36
- gpt-4o-mini,0,0.3797696357415517,0.1208238389018596,0.37055778050320864,0.0,0.09532215357458076,0.09532215357458076,0.37821133607113916,0,1.5939982347749337
37
- gpt-4o-mini,1,0.37721414424357197,0.12013402254992751,0.36752595751803463,0.0,0.09179170344218888,0.09179170344218888,0.37572317024740703,0,1.5666372462488967
38
- gpt-4o-mini,3,0.3772985230936086,0.12400311006855895,0.3682965259271725,0.0,0.09179170344218888,0.09179170344218888,0.3758072155821894,0,1.2868490732568403
39
- gpt-4o-mini,5,0.35541821046691263,0.1202464326274801,0.34743907979125577,0.0,0.05030891438658429,0.05030891438658429,0.3546452926906339,0,1.203883495145631
40
- gpt-4o-mini,10,0.37335968903521094,0.1257600824824953,0.3655455159774728,0.0,0.0706090026478376,0.0706090026478376,0.37222227656264567,0,1.1879964695498677
41
- gpt-4o-mini,50,0.4044690970661121,0.13972883920222515,0.3915950550621088,0.0,0.08473080317740513,0.08473080317740513,0.4029924080114739,0,1.289496910856134
42
- gpt-4o,0,0.3797419877414444,0.12054600115274576,0.37050277223396,0.0,0.09532215357458076,0.09532215357458076,0.37818380151840997,0,1.528684907325684
43
- gpt-4o,1,0.37588586538591867,0.12049862468096047,0.36605424160788713,0.0,0.09179170344218888,0.09179170344218888,0.3744001415355042,0,1.203883495145631
44
- gpt-4o,3,0.3768512103553621,0.12408746322526747,0.3667929041403734,0.0,0.09355692850838482,0.09355692850838482,0.3753332737090981,0,2.05207413945278
45
- gpt-4o,5,0.35772544915145654,0.12169683347842021,0.3484913675543446,0.0,0.0353045013239188,0.0353045013239188,0.3571787674657609,0,1.6840247131509267
46
- gpt-4o,10,0.3746444651189953,0.12498238983123719,0.3667923043349673,0.0,0.0706090026478376,0.0706090026478376,0.37350313867182305,0,1.7899382171226832
47
- gpt-4o,50,0.40413933252744955,0.13782450337569063,0.39078212423794856,0.0,0.07590467784642542,0.07590467784642542,0.402816463024093,0,2.025595763459841
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da524c8a2bcc6e996545af1c48cf428675203bbf299fea395f8679418b599013
3
+ size 7931
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
results/mac-results_few_shots_openai.csv CHANGED
The diff for this file is too large to render. See raw diff
 
results/mac-results_fine_tuned.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a82ea7c6d799182bf52b2041afae75eba95ed195079af1b887d26e9f6f39ffe
3
+ size 764081
results/mac-results_greedy_decoding_metrics.csv CHANGED
@@ -1,24 +1,3 @@
1
- model,rpp,meteor,bleu_1,rouge_l,ews_score,repetition_score,total_repetitions,rap,num_max_output_tokens
2
- Qwen/Qwen2-72B-Instruct,1.00,0.39496912014495184,0.12294894050451377,0.38356294219564346,0.0,0.17122683142100617,0.17122683142100617,0.39207819441096226,0
3
- Qwen/Qwen2-7B-Instruct,1.00,0.3757937058055942,0.11257687997946404,0.36434769925835264,0.0,0.09267431597528684,0.09267431597528684,0.3742941863232811,0
4
- Qwen/Qwen2-7B-Instruct,1.02,0.3768162203335968,0.11553860771639841,0.36918004433175405,0.0,0.08649602824360106,0.08649602824360106,0.37541205840978376,0
5
- Qwen/Qwen2-7B-Instruct,1.04,0.3715147429622351,0.11311605625702598,0.36366814755671606,0.0,0.09267431597528684,0.09267431597528684,0.3700322977098747,0
6
- Qwen/Qwen2-7B-Instruct,1.06,0.3721614566005243,0.10986034422062402,0.36079258503565237,0.0,0.10150044130626655,0.10150044130626655,0.370536326090367,0
7
- Qwen/Qwen2-7B-Instruct,1.08,0.3712966405354824,0.10809530671609749,0.35931016773383284,0.0,0.04766107678729038,0.04766107678729038,0.3705315014283968,0
8
- Qwen/Qwen2-7B-Instruct,1.10,0.3713527017404089,0.10809698094017595,0.3597422068131812,0.0,0.0820829655781112,0.0820829655781112,0.3700389646409943,0
9
- Qwen/Qwen2-7B-Instruct,1.12,0.36820419885143935,0.10505573355971856,0.3585817960835123,0.0,0.22065313327449249,0.22065313327449249,0.3647468909061054,0
10
- Qwen/Qwen2-7B-Instruct,1.14,0.36307746488229864,0.10051614663163566,0.34954090679105765,0.0,0.10944395410414828,0.10944395410414828,0.36136917561198173,0
11
- Qwen/Qwen2-7B-Instruct,1.16,0.36060381551154586,0.09572351387840275,0.3502273949453767,0.0,0.10150044130626655,0.10150044130626655,0.35902915415886316,1
12
- Qwen/Qwen2-7B-Instruct,1.18,0.36078545841521914,0.09571300097111912,0.34428422281617715,0.0,0.0794351279788173,0.0794351279788173,0.35954997493196317,0
13
- Qwen/Qwen2-7B-Instruct,1.20,0.3567548354175595,0.0912485469982839,0.3444198668825326,0.0,0.11032656663724624,0.11032656663724624,0.3550628955088913,1
14
- Qwen/Qwen2-7B-Instruct,1.22,0.3510044718361491,0.08350689777294566,0.3368270419392757,0.0,0.08914386584289496,0.08914386584289496,0.349656784486179,0
15
- Qwen/Qwen2-7B-Instruct,1.24,0.3465600044661264,0.07954262823239741,0.3330694392293648,0.0,0.07413945278022947,0.07413945278022947,0.345451809449638,0
16
- Qwen/Qwen2-7B-Instruct,1.26,0.3435165661403993,0.07858780987337025,0.3300088897852168,0.0,0.11297440423654016,0.11297440423654016,0.34184871767976216,1
17
- Qwen/Qwen2-7B-Instruct,1.28,0.34053363547339577,0.07203840378380885,0.32582095330226524,0.0,0.11120917917034422,0.11120917917034422,0.3389058385010004,2
18
- Qwen/Qwen2-7B-Instruct,1.30,0.33446931317267503,0.062148408497464926,0.32202345280559475,0.00529567519858782,0.14210061782877317,0.147396293027361,0.33235730827665977,6
19
- internlm/internlm2_5-7b-chat-1m,1.00,0.3715346402699926,0.1059772684959813,0.36295516834486563,0.0,5.531332744924978,5.531332744924978,0.31189717866457706,1
20
- internlm/internlm2_5-7b-chat-1m,1.02,0.352901317633597,0.08697903417673139,0.34000866115889206,0.0,0.12444836716681378,0.12444836716681378,0.3510158814935464,0
21
- shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.00,0.38168584246814397,0.11518296996672078,0.3701295888657657,0.0,0.19593998234774934,0.19593998234774934,0.37849615305530687,0
22
- shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.02,0.381084663579427,0.11434064727385712,0.3694187231105839,0.0,0.21094439541041482,0.21094439541041482,0.37766082408076884,0
23
- shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.04,0.38019108433175514,0.11353152954579881,0.3690593230960736,0.0,0.20123565754633715,0.20123565754633715,0.37692959536692827,0
24
- shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.06,0.37862157681270814,0.11220469680226439,0.36854877610676506,0.0,0.20123565754633715,0.20123565754633715,0.37537355194965677,0
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aff0fa4634065ff26f4c401048e7f041828ae97de7d88ef58e922ae99625854a
3
+ size 3694
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/eval-mac.sh CHANGED
@@ -24,8 +24,13 @@ grep MemTotal /proc/meminfo
24
 
25
  # ./scripts/eval-model.sh shenzhi-wang/Llama3.1-8B-Chinese-Chat
26
 
 
 
 
27
  ./scripts/eval-epochs.sh internlm internlm2_5-7b-chat
28
 
 
 
29
  ./scripts/eval-epochs.sh Qwen Qwen2-7B-Instruct
30
 
31
  ./scripts/eval-epochs.sh shenzhi-wang Mistral-7B-v0.3-Chinese-Chat
 
24
 
25
  # ./scripts/eval-model.sh shenzhi-wang/Llama3.1-8B-Chinese-Chat
26
 
27
+
28
+ export START_EPOCH=2
29
+
30
  ./scripts/eval-epochs.sh internlm internlm2_5-7b-chat
31
 
32
+ export START_EPOCH=1
33
+
34
  ./scripts/eval-epochs.sh Qwen Qwen2-7B-Instruct
35
 
36
  ./scripts/eval-epochs.sh shenzhi-wang Mistral-7B-v0.3-Chinese-Chat