pszemraj's picture
Upload 81m_tied.md
096e543
|
raw
history blame
12.3 kB

hf-causal-experimental (pretrained=BEE-spoke-data/smol_llama-81M-tied,trust_remote_code=True,dtype=float), limit: None, provide_description: False, num_fewshot: 0, batch_size: 64

Task Version Metric Value Stderr
arc_easy 0 acc 0.4162 ± 0.0101
acc_norm 0.3885 ± 0.0100
boolq 1 acc 0.5832 ± 0.0086
lambada_openai 0 ppl 79.4522 ± 3.1355
acc 0.2523 ± 0.0061
openbookqa 0 acc 0.1540 ± 0.0162
acc_norm 0.2780 ± 0.0201
piqa 0 acc 0.6050 ± 0.0114
acc_norm 0.5898 ± 0.0115
winogrande 0 acc 0.5272 ± 0.0140

hf-causal-experimental (pretrained=BEE-spoke-data/smol_llama-81M-tied,trust_remote_code=True,dtype=float), limit: None, provide_description: False, num_fewshot: 25, batch_size: 64

Task Version Metric Value Stderr
arc_challenge 0 acc 0.1672 ± 0.0109
acc_norm 0.2218 ± 0.0121

hf-causal-experimental (pretrained=BEE-spoke-data/smol_llama-81M-tied,trust_remote_code=True,dtype=float), limit: None, provide_description: False, num_fewshot: 10, batch_size: 64

Task Version Metric Value Stderr
hellaswag 0 acc 0.2769 ± 0.0045
acc_norm 0.2923 ± 0.0045

hf-causal-experimental (pretrained=BEE-spoke-data/smol_llama-81M-tied,trust_remote_code=True,dtype=float), limit: None, provide_description: False, num_fewshot: 0, batch_size: 64

Task Version Metric Value Stderr
truthfulqa_mc 1 mc1 0.2424 ± 0.0150
mc2 0.4353 ± 0.0152

hf-causal-experimental (pretrained=BEE-spoke-data/smol_llama-81M-tied,trust_remote_code=True,dtype=float), limit: None, provide_description: False, num_fewshot: 5, batch_size: 64

Task Version Metric Value Stderr
hendrycksTest-abstract_algebra 1 acc 0.2200 ± 0.0416
acc_norm 0.2200 ± 0.0416
hendrycksTest-anatomy 1 acc 0.2741 ± 0.0385
acc_norm 0.2741 ± 0.0385
hendrycksTest-astronomy 1 acc 0.1776 ± 0.0311
acc_norm 0.1776 ± 0.0311
hendrycksTest-business_ethics 1 acc 0.2100 ± 0.0409
acc_norm 0.2100 ± 0.0409
hendrycksTest-clinical_knowledge 1 acc 0.2264 ± 0.0258
acc_norm 0.2264 ± 0.0258
hendrycksTest-college_biology 1 acc 0.2361 ± 0.0355
acc_norm 0.2361 ± 0.0355
hendrycksTest-college_chemistry 1 acc 0.1900 ± 0.0394
acc_norm 0.1900 ± 0.0394
hendrycksTest-college_computer_science 1 acc 0.2100 ± 0.0409
acc_norm 0.2100 ± 0.0409
hendrycksTest-college_mathematics 1 acc 0.1800 ± 0.0386
acc_norm 0.1800 ± 0.0386
hendrycksTest-college_medicine 1 acc 0.2023 ± 0.0306
acc_norm 0.2023 ± 0.0306
hendrycksTest-college_physics 1 acc 0.2157 ± 0.0409
acc_norm 0.2157 ± 0.0409
hendrycksTest-computer_security 1 acc 0.2400 ± 0.0429
acc_norm 0.2400 ± 0.0429
hendrycksTest-conceptual_physics 1 acc 0.2596 ± 0.0287
acc_norm 0.2596 ± 0.0287
hendrycksTest-econometrics 1 acc 0.2544 ± 0.0410
acc_norm 0.2544 ± 0.0410
hendrycksTest-electrical_engineering 1 acc 0.2207 ± 0.0346
acc_norm 0.2207 ± 0.0346
hendrycksTest-elementary_mathematics 1 acc 0.2169 ± 0.0212
acc_norm 0.2169 ± 0.0212
hendrycksTest-formal_logic 1 acc 0.1587 ± 0.0327
acc_norm 0.1587 ± 0.0327
hendrycksTest-global_facts 1 acc 0.1900 ± 0.0394
acc_norm 0.1900 ± 0.0394
hendrycksTest-high_school_biology 1 acc 0.3000 ± 0.0261
acc_norm 0.3000 ± 0.0261
hendrycksTest-high_school_chemistry 1 acc 0.2808 ± 0.0316
acc_norm 0.2808 ± 0.0316
hendrycksTest-high_school_computer_science 1 acc 0.2800 ± 0.0451
acc_norm 0.2800 ± 0.0451
hendrycksTest-high_school_european_history 1 acc 0.2424 ± 0.0335
acc_norm 0.2424 ± 0.0335
hendrycksTest-high_school_geography 1 acc 0.2576 ± 0.0312
acc_norm 0.2576 ± 0.0312
hendrycksTest-high_school_government_and_politics 1 acc 0.2228 ± 0.0300
acc_norm 0.2228 ± 0.0300
hendrycksTest-high_school_macroeconomics 1 acc 0.2231 ± 0.0211
acc_norm 0.2231 ± 0.0211
hendrycksTest-high_school_mathematics 1 acc 0.2370 ± 0.0259
acc_norm 0.2370 ± 0.0259
hendrycksTest-high_school_microeconomics 1 acc 0.2227 ± 0.0270
acc_norm 0.2227 ± 0.0270
hendrycksTest-high_school_physics 1 acc 0.2053 ± 0.0330
acc_norm 0.2053 ± 0.0330
hendrycksTest-high_school_psychology 1 acc 0.2110 ± 0.0175
acc_norm 0.2110 ± 0.0175
hendrycksTest-high_school_statistics 1 acc 0.4120 ± 0.0336
acc_norm 0.4120 ± 0.0336
hendrycksTest-high_school_us_history 1 acc 0.2990 ± 0.0321
acc_norm 0.2990 ± 0.0321
hendrycksTest-high_school_world_history 1 acc 0.2658 ± 0.0288
acc_norm 0.2658 ± 0.0288
hendrycksTest-human_aging 1 acc 0.2287 ± 0.0282
acc_norm 0.2287 ± 0.0282
hendrycksTest-human_sexuality 1 acc 0.2595 ± 0.0384
acc_norm 0.2595 ± 0.0384
hendrycksTest-international_law 1 acc 0.2975 ± 0.0417
acc_norm 0.2975 ± 0.0417
hendrycksTest-jurisprudence 1 acc 0.2315 ± 0.0408
acc_norm 0.2315 ± 0.0408
hendrycksTest-logical_fallacies 1 acc 0.2822 ± 0.0354
acc_norm 0.2822 ± 0.0354
hendrycksTest-machine_learning 1 acc 0.2321 ± 0.0401
acc_norm 0.2321 ± 0.0401
hendrycksTest-management 1 acc 0.1748 ± 0.0376
acc_norm 0.1748 ± 0.0376
hendrycksTest-marketing 1 acc 0.2308 ± 0.0276
acc_norm 0.2308 ± 0.0276
hendrycksTest-medical_genetics 1 acc 0.3000 ± 0.0461
acc_norm 0.3000 ± 0.0461
hendrycksTest-miscellaneous 1 acc 0.2375 ± 0.0152
acc_norm 0.2375 ± 0.0152
hendrycksTest-moral_disputes 1 acc 0.2486 ± 0.0233
acc_norm 0.2486 ± 0.0233
hendrycksTest-moral_scenarios 1 acc 0.2425 ± 0.0143
acc_norm 0.2425 ± 0.0143
hendrycksTest-nutrition 1 acc 0.2288 ± 0.0241
acc_norm 0.2288 ± 0.0241
hendrycksTest-philosophy 1 acc 0.2090 ± 0.0231
acc_norm 0.2090 ± 0.0231
hendrycksTest-prehistory 1 acc 0.2377 ± 0.0237
acc_norm 0.2377 ± 0.0237
hendrycksTest-professional_accounting 1 acc 0.2234 ± 0.0248
acc_norm 0.2234 ± 0.0248
hendrycksTest-professional_law 1 acc 0.2471 ± 0.0110
acc_norm 0.2471 ± 0.0110
hendrycksTest-professional_medicine 1 acc 0.4081 ± 0.0299
acc_norm 0.4081 ± 0.0299
hendrycksTest-professional_psychology 1 acc 0.2565 ± 0.0177
acc_norm 0.2565 ± 0.0177
hendrycksTest-public_relations 1 acc 0.2182 ± 0.0396
acc_norm 0.2182 ± 0.0396
hendrycksTest-security_studies 1 acc 0.2408 ± 0.0274
acc_norm 0.2408 ± 0.0274
hendrycksTest-sociology 1 acc 0.2338 ± 0.0299
acc_norm 0.2338 ± 0.0299
hendrycksTest-us_foreign_policy 1 acc 0.2500 ± 0.0435
acc_norm 0.2500 ± 0.0435
hendrycksTest-virology 1 acc 0.2892 ± 0.0353
acc_norm 0.2892 ± 0.0353
hendrycksTest-world_religions 1 acc 0.2105 ± 0.0313
acc_norm 0.2105 ± 0.0313