Spaces:
Build error
Build error
finetune results
Browse files
.gitattributes
CHANGED
@@ -63,3 +63,7 @@ logs/l40-1gpu.txt filter=lfs diff=lfs merge=lfs -text
|
|
63 |
logs/l40-4gpu.txt filter=lfs diff=lfs merge=lfs -text
|
64 |
logs/openai-gpt-4o-mini.txt filter=lfs diff=lfs merge=lfs -text
|
65 |
logs/openai-gpt-4o.txt filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
63 |
logs/l40-4gpu.txt filter=lfs diff=lfs merge=lfs -text
|
64 |
logs/openai-gpt-4o-mini.txt filter=lfs diff=lfs merge=lfs -text
|
65 |
logs/openai-gpt-4o.txt filter=lfs diff=lfs merge=lfs -text
|
66 |
+
results/mac-results_few_shots_openai.csv filter=lfs diff=lfs merge=lfs -text
|
67 |
+
results/mac-results_fine_tuned.csv filter=lfs diff=lfs merge=lfs -text
|
68 |
+
results/mac-results_greedy_decoding_metrics.csv filter=lfs diff=lfs merge=lfs -text
|
69 |
+
results/mac-results_few_shots_metrics.csv filter=lfs diff=lfs merge=lfs -text
|
llm_toolkit/eval_epochs.py
CHANGED
@@ -58,9 +58,10 @@ def evaluate_model_all_epochs(
|
|
58 |
for i in range(start_epoch, end_epoch + 1):
|
59 |
print(f"Epoch {i}")
|
60 |
if i > 0:
|
61 |
-
|
|
|
62 |
print(f"loading adapter: {adapter_path}")
|
63 |
-
|
64 |
model.active_adapters = adapter_name
|
65 |
|
66 |
predictions = eval_model(
|
|
|
58 |
for i in range(start_epoch, end_epoch + 1):
|
59 |
print(f"Epoch {i}")
|
60 |
if i > 0:
|
61 |
+
adapter_name = subdirs[i - 1]
|
62 |
+
adapter_path = adapter_path_base + "/" + adapter_name
|
63 |
print(f"loading adapter: {adapter_path}")
|
64 |
+
model.load_adapter(adapter_path, adapter_name=adapter_name)
|
65 |
model.active_adapters = adapter_name
|
66 |
|
67 |
predictions = eval_model(
|
results/mac-results_few_shots_metrics.csv
CHANGED
@@ -1,47 +1,3 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
Qwen/Qwen2-72B-Instruct,3,0.4086244766794449,0.13771788946915253,0.3979712282250465,0.0,0.12709620476610767,0.12709620476610767,0.4063954239173824,0,11.657546337157987
|
5 |
-
Qwen/Qwen2-72B-Instruct,5,0.4132330811975005,0.1439773872150899,0.40387035186513487,0.0,0.11915269196822595,0.11915269196822595,0.41111822769434864,0,17.167696381288614
|
6 |
-
Qwen/Qwen2-72B-Instruct,10,0.41598174489789025,0.14493475334416772,0.4065933507975943,0.0,0.09620476610767872,0.09620476610767872,0.4142591929807702,0,29.728155339805824
|
7 |
-
Qwen/Qwen2-72B-Instruct,50,0.4401536409204816,0.1538634893900684,0.41722880607716234,0.0,0.10150044130626655,0.10150044130626655,0.43823160654983345,0,112.50397175639894
|
8 |
-
Qwen/Qwen2-7B-Instruct,0,0.377477070949433,0.11783492823424507,0.3678523300904837,0.0,0.07149161518093557,0.07149161518093557,0.3763128359886437,0,0.9805825242718447
|
9 |
-
Qwen/Qwen2-7B-Instruct,1,0.38000752971097884,0.11731917392837622,0.371517786678723,0.0,0.07413945278022947,0.07413945278022947,0.37879237953430883,0,1.0529567519858782
|
10 |
-
Qwen/Qwen2-7B-Instruct,3,0.38678180999660744,0.12368875746156333,0.3780278278830778,0.0,0.1412180052956752,0.1412180052956752,0.38444052153933106,0,1.6010591350397176
|
11 |
-
Qwen/Qwen2-7B-Instruct,5,0.38784856371389564,0.1227725469820483,0.38246119910508375,0.0,0.09179170344218888,0.09179170344218888,0.38631555618548774,0,2.2894969108561343
|
12 |
-
Qwen/Qwen2-7B-Instruct,10,0.38526484346757095,0.12535252418966952,0.38202725422893463,0.0,0.10326566637246248,0.10326566637246248,0.3835535147682633,0,4.006178287731686
|
13 |
-
Qwen/Qwen2-7B-Instruct,50,0.3953455943001352,0.12949951844499932,0.3899754114871057,0.0,0.10061782877316858,0.10061782877316858,0.39363409715118836,0,17.46425419240953
|
14 |
-
internlm/internlm2_5-7b-chat,0,0.36816799960793073,0.11360521358693174,0.3600058558701442,0.0,0.2144748455428067,0.2144748455428067,0.3648059323539847,0,1.2241835834068844
|
15 |
-
internlm/internlm2_5-7b-chat,1,0.3719587471180722,0.1157707566176535,0.36379026028083117,0.0,0.14033539276257723,0.14033539276257723,0.36972107700643503,0,1.3124448367166814
|
16 |
-
internlm/internlm2_5-7b-chat,3,0.3747105229822289,0.1154826016668525,0.36859373323449984,0.0,0.17740511915269197,0.17740511915269197,0.37187052462735126,0,1.8578993821712269
|
17 |
-
internlm/internlm2_5-7b-chat,5,0.37285562384505977,0.11541534709366409,0.36845885184482197,0.0,0.14827890556045895,0.14827890556045895,0.37048732274065205,0,2.860547219770521
|
18 |
-
internlm/internlm2_5-7b-chat,10,0.3750895095392996,0.11696492920010637,0.36774089220788087,0.0,0.13062665489849956,0.13062665489849956,0.37298723763770353,0,5.722859664607237
|
19 |
-
internlm/internlm2_5-7b-chat,50,0.37213069871716603,0.11404688073207249,0.3627041392544321,0.0,0.16857899382171226,0.16857899382171226,0.3694484047441432,8,42.29214474845543
|
20 |
-
shenzhi-wang/Llama3.1-70B-Chinese-Chat,0,0.3638704024273502,0.10874677881601094,0.35336472352140924,0.0,0.15445719329214475,0.15445719329214475,0.3614642386796342,0,7.8331862312444835
|
21 |
-
shenzhi-wang/Llama3.1-70B-Chinese-Chat,1,0.37956764543783084,0.11805442002282653,0.36984338962652286,0.0,0.12533097969991175,0.12533097969991175,0.3775255236309064,0,8.307149161518094
|
22 |
-
shenzhi-wang/Llama3.1-70B-Chinese-Chat,3,0.38622483411876246,0.12306660851355093,0.37461197525974343,0.0,0.14386584289496912,0.14386584289496912,0.38384366117983154,0,11.681376875551633
|
23 |
-
shenzhi-wang/Llama3.1-70B-Chinese-Chat,5,0.3895488616778815,0.12582029733797498,0.37850976779334966,0.0,0.14386584289496912,0.14386584289496912,0.38714719527562863,0,17.23389232127096
|
24 |
-
shenzhi-wang/Llama3.1-70B-Chinese-Chat,10,0.3897515010230098,0.12957008401715697,0.3804272354384455,0.0,0.1262135922330097,0.1262135922330097,0.3876399935245347,0,
|
25 |
-
shenzhi-wang/Llama3.1-8B-Chinese-Chat,0,0.3476217085789916,0.09799438963103267,0.33493508618013,0.0,2.2162400706090026,2.2162400706090026,0.319817551404022,2,1.0750220653133276
|
26 |
-
shenzhi-wang/Llama3.1-8B-Chinese-Chat,1,0.35430794534292803,0.10438367949419078,0.34360907692906495,0.0,0.19505736981465135,0.19505736981465135,0.3513601482457364,0,1.1571050308914386
|
27 |
-
shenzhi-wang/Llama3.1-8B-Chinese-Chat,3,0.3600739839089376,0.10709900175348612,0.35151668174502293,0.0,0.14827890556045895,0.14827890556045895,0.3577868691137631,0,1.9814651368049427
|
28 |
-
shenzhi-wang/Llama3.1-8B-Chinese-Chat,5,0.36231904915539526,0.11204735364530892,0.3555966636828387,0.0,0.15798764342453664,0.15798764342453664,0.3598691748988386,0,2.8146513680494265
|
29 |
-
shenzhi-wang/Llama3.1-8B-Chinese-Chat,10,0.3615257973929306,0.10125226501021815,0.35395169595888565,0.0,0.8314210061782877,0.8314210061782877,0.34940648099916116,22,
|
30 |
-
shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,0,0.3284997502705771,0.08313795089297474,0.31837381406868526,0.0,0.12797881729920565,0.12797881729920565,0.3266954815790356,0,1.204766107678729
|
31 |
-
shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1,0.3342115436248988,0.08857909016110346,0.32376944492764814,0.0,0.6690203000882613,0.6690203000882613,0.3250691235789747,1,1.4819064430714917
|
32 |
-
shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,3,0.3435621946945506,0.09605927100886698,0.33411105509944494,0.0,0.1262135922330097,0.1262135922330097,0.3417009211692762,0,2.262135922330097
|
33 |
-
shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,5,0.34429078135481284,0.09638489591361771,0.33818535378281456,0.0,0.07590467784642542,0.07590467784642542,0.34316381414750663,2,3.3883495145631066
|
34 |
-
shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,10,0.3408431061510028,0.09735670499814125,0.3331892932821734,0.0,0.10414827890556046,0.10414827890556046,0.339316280986861,11,6.558693733451015
|
35 |
-
shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,50,0.36184201368489227,0.104864029030581,0.3457669052738412,0.0,0.5136804942630185,0.5136804942630185,0.35413782502473057,2,29.659311562224183
|
36 |
-
gpt-4o-mini,0,0.3797696357415517,0.1208238389018596,0.37055778050320864,0.0,0.09532215357458076,0.09532215357458076,0.37821133607113916,0,1.5939982347749337
|
37 |
-
gpt-4o-mini,1,0.37721414424357197,0.12013402254992751,0.36752595751803463,0.0,0.09179170344218888,0.09179170344218888,0.37572317024740703,0,1.5666372462488967
|
38 |
-
gpt-4o-mini,3,0.3772985230936086,0.12400311006855895,0.3682965259271725,0.0,0.09179170344218888,0.09179170344218888,0.3758072155821894,0,1.2868490732568403
|
39 |
-
gpt-4o-mini,5,0.35541821046691263,0.1202464326274801,0.34743907979125577,0.0,0.05030891438658429,0.05030891438658429,0.3546452926906339,0,1.203883495145631
|
40 |
-
gpt-4o-mini,10,0.37335968903521094,0.1257600824824953,0.3655455159774728,0.0,0.0706090026478376,0.0706090026478376,0.37222227656264567,0,1.1879964695498677
|
41 |
-
gpt-4o-mini,50,0.4044690970661121,0.13972883920222515,0.3915950550621088,0.0,0.08473080317740513,0.08473080317740513,0.4029924080114739,0,1.289496910856134
|
42 |
-
gpt-4o,0,0.3797419877414444,0.12054600115274576,0.37050277223396,0.0,0.09532215357458076,0.09532215357458076,0.37818380151840997,0,1.528684907325684
|
43 |
-
gpt-4o,1,0.37588586538591867,0.12049862468096047,0.36605424160788713,0.0,0.09179170344218888,0.09179170344218888,0.3744001415355042,0,1.203883495145631
|
44 |
-
gpt-4o,3,0.3768512103553621,0.12408746322526747,0.3667929041403734,0.0,0.09355692850838482,0.09355692850838482,0.3753332737090981,0,2.05207413945278
|
45 |
-
gpt-4o,5,0.35772544915145654,0.12169683347842021,0.3484913675543446,0.0,0.0353045013239188,0.0353045013239188,0.3571787674657609,0,1.6840247131509267
|
46 |
-
gpt-4o,10,0.3746444651189953,0.12498238983123719,0.3667923043349673,0.0,0.0706090026478376,0.0706090026478376,0.37350313867182305,0,1.7899382171226832
|
47 |
-
gpt-4o,50,0.40413933252744955,0.13782450337569063,0.39078212423794856,0.0,0.07590467784642542,0.07590467784642542,0.402816463024093,0,2.025595763459841
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:da524c8a2bcc6e996545af1c48cf428675203bbf299fea395f8679418b599013
|
3 |
+
size 7931
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
results/mac-results_few_shots_openai.csv
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
results/mac-results_fine_tuned.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7a82ea7c6d799182bf52b2041afae75eba95ed195079af1b887d26e9f6f39ffe
|
3 |
+
size 764081
|
results/mac-results_greedy_decoding_metrics.csv
CHANGED
@@ -1,24 +1,3 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
Qwen/Qwen2-7B-Instruct,1.02,0.3768162203335968,0.11553860771639841,0.36918004433175405,0.0,0.08649602824360106,0.08649602824360106,0.37541205840978376,0
|
5 |
-
Qwen/Qwen2-7B-Instruct,1.04,0.3715147429622351,0.11311605625702598,0.36366814755671606,0.0,0.09267431597528684,0.09267431597528684,0.3700322977098747,0
|
6 |
-
Qwen/Qwen2-7B-Instruct,1.06,0.3721614566005243,0.10986034422062402,0.36079258503565237,0.0,0.10150044130626655,0.10150044130626655,0.370536326090367,0
|
7 |
-
Qwen/Qwen2-7B-Instruct,1.08,0.3712966405354824,0.10809530671609749,0.35931016773383284,0.0,0.04766107678729038,0.04766107678729038,0.3705315014283968,0
|
8 |
-
Qwen/Qwen2-7B-Instruct,1.10,0.3713527017404089,0.10809698094017595,0.3597422068131812,0.0,0.0820829655781112,0.0820829655781112,0.3700389646409943,0
|
9 |
-
Qwen/Qwen2-7B-Instruct,1.12,0.36820419885143935,0.10505573355971856,0.3585817960835123,0.0,0.22065313327449249,0.22065313327449249,0.3647468909061054,0
|
10 |
-
Qwen/Qwen2-7B-Instruct,1.14,0.36307746488229864,0.10051614663163566,0.34954090679105765,0.0,0.10944395410414828,0.10944395410414828,0.36136917561198173,0
|
11 |
-
Qwen/Qwen2-7B-Instruct,1.16,0.36060381551154586,0.09572351387840275,0.3502273949453767,0.0,0.10150044130626655,0.10150044130626655,0.35902915415886316,1
|
12 |
-
Qwen/Qwen2-7B-Instruct,1.18,0.36078545841521914,0.09571300097111912,0.34428422281617715,0.0,0.0794351279788173,0.0794351279788173,0.35954997493196317,0
|
13 |
-
Qwen/Qwen2-7B-Instruct,1.20,0.3567548354175595,0.0912485469982839,0.3444198668825326,0.0,0.11032656663724624,0.11032656663724624,0.3550628955088913,1
|
14 |
-
Qwen/Qwen2-7B-Instruct,1.22,0.3510044718361491,0.08350689777294566,0.3368270419392757,0.0,0.08914386584289496,0.08914386584289496,0.349656784486179,0
|
15 |
-
Qwen/Qwen2-7B-Instruct,1.24,0.3465600044661264,0.07954262823239741,0.3330694392293648,0.0,0.07413945278022947,0.07413945278022947,0.345451809449638,0
|
16 |
-
Qwen/Qwen2-7B-Instruct,1.26,0.3435165661403993,0.07858780987337025,0.3300088897852168,0.0,0.11297440423654016,0.11297440423654016,0.34184871767976216,1
|
17 |
-
Qwen/Qwen2-7B-Instruct,1.28,0.34053363547339577,0.07203840378380885,0.32582095330226524,0.0,0.11120917917034422,0.11120917917034422,0.3389058385010004,2
|
18 |
-
Qwen/Qwen2-7B-Instruct,1.30,0.33446931317267503,0.062148408497464926,0.32202345280559475,0.00529567519858782,0.14210061782877317,0.147396293027361,0.33235730827665977,6
|
19 |
-
internlm/internlm2_5-7b-chat-1m,1.00,0.3715346402699926,0.1059772684959813,0.36295516834486563,0.0,5.531332744924978,5.531332744924978,0.31189717866457706,1
|
20 |
-
internlm/internlm2_5-7b-chat-1m,1.02,0.352901317633597,0.08697903417673139,0.34000866115889206,0.0,0.12444836716681378,0.12444836716681378,0.3510158814935464,0
|
21 |
-
shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.00,0.38168584246814397,0.11518296996672078,0.3701295888657657,0.0,0.19593998234774934,0.19593998234774934,0.37849615305530687,0
|
22 |
-
shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.02,0.381084663579427,0.11434064727385712,0.3694187231105839,0.0,0.21094439541041482,0.21094439541041482,0.37766082408076884,0
|
23 |
-
shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.04,0.38019108433175514,0.11353152954579881,0.3690593230960736,0.0,0.20123565754633715,0.20123565754633715,0.37692959536692827,0
|
24 |
-
shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.06,0.37862157681270814,0.11220469680226439,0.36854877610676506,0.0,0.20123565754633715,0.20123565754633715,0.37537355194965677,0
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aff0fa4634065ff26f4c401048e7f041828ae97de7d88ef58e922ae99625854a
|
3 |
+
size 3694
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
scripts/eval-mac.sh
CHANGED
@@ -24,8 +24,13 @@ grep MemTotal /proc/meminfo
|
|
24 |
|
25 |
# ./scripts/eval-model.sh shenzhi-wang/Llama3.1-8B-Chinese-Chat
|
26 |
|
|
|
|
|
|
|
27 |
./scripts/eval-epochs.sh internlm internlm2_5-7b-chat
|
28 |
|
|
|
|
|
29 |
./scripts/eval-epochs.sh Qwen Qwen2-7B-Instruct
|
30 |
|
31 |
./scripts/eval-epochs.sh shenzhi-wang Mistral-7B-v0.3-Chinese-Chat
|
|
|
24 |
|
25 |
# ./scripts/eval-model.sh shenzhi-wang/Llama3.1-8B-Chinese-Chat
|
26 |
|
27 |
+
|
28 |
+
export START_EPOCH=2
|
29 |
+
|
30 |
./scripts/eval-epochs.sh internlm internlm2_5-7b-chat
|
31 |
|
32 |
+
export START_EPOCH=1
|
33 |
+
|
34 |
./scripts/eval-epochs.sh Qwen Qwen2-7B-Instruct
|
35 |
|
36 |
./scripts/eval-epochs.sh shenzhi-wang Mistral-7B-v0.3-Chinese-Chat
|