diff --git a/evals/arc-challenge/arc_ar_challenge_bloom-1b7.json b/evals/arc-challenge/arc_ar_challenge_bloom-1b7.json deleted file mode 100644 index f11ea3c48ac461ea8df812ba639e5871955a3481..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_ar_challenge_bloom-1b7.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ar_challenge": { - "acc": 0.22818791946308725, - "acc_stderr": 0.02435139725761051, - "acc_norm": 0.2516778523489933, - "acc_norm_stderr": 0.025181904610615872 - } - }, - "versions": { - "arc_ar_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-1b7", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_ar_challenge_bloom-560.json b/evals/arc-challenge/arc_ar_challenge_bloom-560.json deleted file mode 100644 index 49fe745a2caa93a57a99f2a5d13b829f8544cd13..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_ar_challenge_bloom-560.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ar_challenge": { - "acc": 0.2550335570469799, - "acc_stderr": 0.025292327380712708, - "acc_norm": 0.2550335570469799, - "acc_norm_stderr": 0.025292327380712708 - } - }, - "versions": { - "arc_ar_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-560m", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_ar_challenge_bloom-7b1.json b/evals/arc-challenge/arc_ar_challenge_bloom-7b1.json deleted file mode 100644 index b79172a73e91dbbf21909686c17e2c23c1f18bef..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_ar_challenge_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ar_challenge": { - "acc": 0.28187919463087246, - "acc_stderr": 0.026106703750007426, - "acc_norm": 0.3087248322147651, - "acc_norm_stderr": 0.026806063072940547 - } - }, - "versions": { - "arc_ar_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_ar_challenge_gpt2-large.json b/evals/arc-challenge/arc_ar_challenge_gpt2-large.json deleted file mode 100644 index f1aadc6691007c31ca76e985257d9ebfbffa04c5..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_ar_challenge_gpt2-large.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ar_challenge": { - "acc": 0.20134228187919462, - "acc_stderr": 0.023268565767685306, - "acc_norm": 0.21476510067114093, - "acc_norm_stderr": 0.023828868848284352 - } - }, - "versions": { - "arc_ar_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-large", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_ar_challenge_gpt2-medium.json b/evals/arc-challenge/arc_ar_challenge_gpt2-medium.json deleted file mode 100644 index db628063ccf012f4301410acf74c6449499d4a18..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_ar_challenge_gpt2-medium.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ar_challenge": { - "acc": 0.19463087248322147, - "acc_stderr": 0.022973392306598162, - "acc_norm": 0.21140939597315436, - "acc_norm_stderr": 0.02369243605357901 - } - }, - "versions": { - "arc_ar_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-medium", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_ar_challenge_gpt2.json b/evals/arc-challenge/arc_ar_challenge_gpt2.json deleted file mode 100644 index 5deb8a5f49f36a08688564ca109ad5160192b56e..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_ar_challenge_gpt2.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ar_challenge": { - "acc": 0.20134228187919462, - "acc_stderr": 0.023268565767685313, - "acc_norm": 0.22483221476510068, - "acc_norm_stderr": 0.024224169829650755 - } - }, - "versions": { - "arc_ar_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_ar_challenge_llama-7B.json b/evals/arc-challenge/arc_ar_challenge_llama-7B.json deleted file mode 100644 index e1b5a76fae32ffadeb87c9a634cef2c6de55e923..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_ar_challenge_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ar_challenge": { - "acc": 0.22483221476510068, - "acc_stderr": 0.02422416982965075, - "acc_norm": 0.24161073825503357, - "acc_norm_stderr": 0.024838535108028477 - } - }, - "versions": { - "arc_ar_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_bn_challenge_bloom-1b7.json b/evals/arc-challenge/arc_bn_challenge_bloom-1b7.json deleted file mode 100644 index fa55573d46ebd614a4feb5a1aac46df0effefe2f..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_bn_challenge_bloom-1b7.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_bn_challenge": { - "acc": 0.20945945945945946, - "acc_stderr": 0.023691963473475724, - "acc_norm": 0.2533783783783784, - "acc_norm_stderr": 0.025323518629100008 - } - }, - "versions": { - "arc_bn_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-1b7", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_bn_challenge_bloom-560.json b/evals/arc-challenge/arc_bn_challenge_bloom-560.json deleted file mode 100644 index 389eeb09c0a92f6b7861501b6a3e0b9caff08e3e..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_bn_challenge_bloom-560.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_bn_challenge": { - "acc": 0.22972972972972974, - "acc_stderr": 0.024491712953916975, - "acc_norm": 0.24662162162162163, - "acc_norm_stderr": 0.025096383517594287 - } - }, - "versions": { - "arc_bn_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-560m", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_bn_challenge_bloom-7b1.json b/evals/arc-challenge/arc_bn_challenge_bloom-7b1.json deleted file mode 100644 index 7cf6ca71cd6f8268d0ed709fbff3ff9aa1aa20f9..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_bn_challenge_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_bn_challenge": { - "acc": 0.23986486486486486, - "acc_stderr": 0.02486094967084638, - "acc_norm": 0.28040540540540543, - "acc_norm_stderr": 0.026153277917823237 - } - }, - "versions": { - "arc_bn_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_bn_challenge_gpt2-medium.json b/evals/arc-challenge/arc_bn_challenge_gpt2-medium.json deleted file mode 100644 index 69dd44fcae67f0511715af28d9a6762dc0732634..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_bn_challenge_gpt2-medium.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_bn_challenge": { - "acc": 0.20608108108108109, - "acc_stderr": 0.02355028295929425, - "acc_norm": 0.24662162162162163, - "acc_norm_stderr": 0.02509638351759427 - } - }, - "versions": { - "arc_bn_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-medium", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_bn_challenge_gpt2.json b/evals/arc-challenge/arc_bn_challenge_gpt2.json deleted file mode 100644 index 2de0f9a7b900ac9accabd3ade0c8a4d14d7fda03..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_bn_challenge_gpt2.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_bn_challenge": { - "acc": 0.22635135135135134, - "acc_stderr": 0.024364215012920555, - "acc_norm": 0.2668918918918919, - "acc_norm_stderr": 0.025753762926257917 - } - }, - "versions": { - "arc_bn_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_bn_challenge_llama-7B.json b/evals/arc-challenge/arc_bn_challenge_llama-7B.json deleted file mode 100644 index a3dbec93edb13b0fdf7c70d9a22d0f709e0a25b2..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_bn_challenge_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_bn_challenge": { - "acc": 0.22635135135135134, - "acc_stderr": 0.024364215012920565, - "acc_norm": 0.26013513513513514, - "acc_norm_stderr": 0.02554257639364025 - } - }, - "versions": { - "arc_bn_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_ca_challenge_bloom-1b7.json b/evals/arc-challenge/arc_ca_challenge_bloom-1b7.json deleted file mode 100644 index 80c6381676cf5f4508fe26a2e71b75de9f5857f5..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_ca_challenge_bloom-1b7.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ca_challenge": { - "acc": 0.2356902356902357, - "acc_stderr": 0.02466946003490763, - "acc_norm": 0.27946127946127947, - "acc_norm_stderr": 0.026082164400369843 - } - }, - "versions": { - "arc_ca_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-1b7", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_ca_challenge_bloom-560.json b/evals/arc-challenge/arc_ca_challenge_bloom-560.json deleted file mode 100644 index 74ea721d64eabef94a72533148cf4d15946ea667..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_ca_challenge_bloom-560.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ca_challenge": { - "acc": 0.2053872053872054, - "acc_stderr": 0.02348110951859932, - "acc_norm": 0.23232323232323232, - "acc_norm_stderr": 0.02454650495612789 - } - }, - "versions": { - "arc_ca_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-560m", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_ca_challenge_bloom-7b1.json b/evals/arc-challenge/arc_ca_challenge_bloom-7b1.json deleted file mode 100644 index 828e5442ee5f197e68f640cec0d3f5a4d2190a86..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_ca_challenge_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ca_challenge": { - "acc": 0.3164983164983165, - "acc_stderr": 0.02703395838420779, - "acc_norm": 0.3434343434343434, - "acc_norm_stderr": 0.0276003816062635 - } - }, - "versions": { - "arc_ca_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_ca_challenge_gpt2-large.json b/evals/arc-challenge/arc_ca_challenge_gpt2-large.json deleted file mode 100644 index 1d1333c44929e8c397db2c9c89aa32f6c849e02f..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_ca_challenge_gpt2-large.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ca_challenge": { - "acc": 0.20875420875420875, - "acc_stderr": 0.02362258775627148, - "acc_norm": 0.22895622895622897, - "acc_norm_stderr": 0.02442136264227106 - } - }, - "versions": { - "arc_ca_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-large", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_ca_challenge_gpt2-medium.json b/evals/arc-challenge/arc_ca_challenge_gpt2-medium.json deleted file mode 100644 index b9427197beac9ba8529aa3e8014b5dee0307e089..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_ca_challenge_gpt2-medium.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ca_challenge": { - "acc": 0.20875420875420875, - "acc_stderr": 0.023622587756271473, - "acc_norm": 0.21212121212121213, - "acc_norm_stderr": 0.023761611918761673 - } - }, - "versions": { - "arc_ca_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-medium", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_ca_challenge_gpt2.json b/evals/arc-challenge/arc_ca_challenge_gpt2.json deleted file mode 100644 index a9ebfd334ce3c7fa9305ddb2650d0c9ed8d727ac..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_ca_challenge_gpt2.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ca_challenge": { - "acc": 0.21885521885521886, - "acc_stderr": 0.024032467624412215, - "acc_norm": 0.21885521885521886, - "acc_norm_stderr": 0.02403246762441221 - } - }, - "versions": { - "arc_ca_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_ca_challenge_llama-7B.json b/evals/arc-challenge/arc_ca_challenge_llama-7B.json deleted file mode 100644 index 5b79736bea0e6806983af2b1d26982bb71d2169c..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_ca_challenge_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ca_challenge": { - "acc": 0.29292929292929293, - "acc_stderr": 0.026452514969665927, - "acc_norm": 0.29292929292929293, - "acc_norm_stderr": 0.02645251496966592 - } - }, - "versions": { - "arc_ca_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_da_challenge_bloom-1b7.json b/evals/arc-challenge/arc_da_challenge_bloom-1b7.json deleted file mode 100644 index ad507f37ee73db4c175fcd2ff76b2949c5186f12..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_da_challenge_bloom-1b7.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_da_challenge": { - "acc": 0.2255892255892256, - "acc_stderr": 0.02429399929295737, - "acc_norm": 0.26262626262626265, - "acc_norm_stderr": 0.02557802773320011 - } - }, - "versions": { - "arc_da_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-1b7", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_da_challenge_bloom-560.json b/evals/arc-challenge/arc_da_challenge_bloom-560.json deleted file mode 100644 index 76c97cf086a3d4eb479d7ea19745c4f301127a2e..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_da_challenge_bloom-560.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_da_challenge": { - "acc": 0.25925925925925924, - "acc_stderr": 0.025471492792791667, - "acc_norm": 0.24579124579124578, - "acc_norm_stderr": 0.025025521384235284 - } - }, - "versions": { - "arc_da_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-560m", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_da_challenge_bloom-7b1.json b/evals/arc-challenge/arc_da_challenge_bloom-7b1.json deleted file mode 100644 index 38cbbb63a1aa857301e47a632ca28cb48df2b26a..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_da_challenge_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_da_challenge": { - "acc": 0.24242424242424243, - "acc_stderr": 0.02490893747050877, - "acc_norm": 0.24915824915824916, - "acc_norm_stderr": 0.025140041284626418 - } - }, - "versions": { - "arc_da_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_da_challenge_gpt2-large.json b/evals/arc-challenge/arc_da_challenge_gpt2-large.json deleted file mode 100644 index c8ee21dc7b9e87604443ebe5bc43e5cd6006ac8a..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_da_challenge_gpt2-large.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_da_challenge": { - "acc": 0.23232323232323232, - "acc_stderr": 0.02454650495612789, - "acc_norm": 0.24242424242424243, - "acc_norm_stderr": 0.024908937470508753 - } - }, - "versions": { - "arc_da_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-large", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_da_challenge_gpt2-medium.json b/evals/arc-challenge/arc_da_challenge_gpt2-medium.json deleted file mode 100644 index df7aa6d8d8bffd69ae15219bdb1f31971d2146b7..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_da_challenge_gpt2-medium.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_da_challenge": { - "acc": 0.24579124579124578, - "acc_stderr": 0.0250255213842353, - "acc_norm": 0.2727272727272727, - "acc_norm_stderr": 0.025886127156886297 - } - }, - "versions": { - "arc_da_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-medium", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_da_challenge_gpt2.json b/evals/arc-challenge/arc_da_challenge_gpt2.json deleted file mode 100644 index e06d761ac718567edd82446e7cab3db268352caf..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_da_challenge_gpt2.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_da_challenge": { - "acc": 0.2222222222222222, - "acc_stderr": 0.02416437978893547, - "acc_norm": 0.23905723905723905, - "acc_norm_stderr": 0.024790260423468984 - } - }, - "versions": { - "arc_da_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_da_challenge_llama-7B.json b/evals/arc-challenge/arc_da_challenge_llama-7B.json deleted file mode 100644 index 0669687f3d0755614d71660a1b71b9c1d16c99af..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_da_challenge_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_da_challenge": { - "acc": 0.3063973063973064, - "acc_stderr": 0.026794891419479452, - "acc_norm": 0.3367003367003367, - "acc_norm_stderr": 0.02746823841289221 - } - }, - "versions": { - "arc_da_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_de_challenge_bloom-1b7.json b/evals/arc-challenge/arc_de_challenge_bloom-1b7.json deleted file mode 100644 index 2c10bc700c0ecb2dfc8bde73b2f3f18879be1571..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_de_challenge_bloom-1b7.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_de_challenge": { - "acc": 0.24496644295302014, - "acc_stderr": 0.024955035980898946, - "acc_norm": 0.2953020134228188, - "acc_norm_stderr": 0.026470155629081085 - } - }, - "versions": { - "arc_de_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-1b7", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_de_challenge_bloom-560.json b/evals/arc-challenge/arc_de_challenge_bloom-560.json deleted file mode 100644 index 0c23e9b1eaef780d6a824e7c0f623556d950ca89..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_de_challenge_bloom-560.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_de_challenge": { - "acc": 0.2348993288590604, - "acc_stderr": 0.024599255015999244, - "acc_norm": 0.28187919463087246, - "acc_norm_stderr": 0.026106703750007426 - } - }, - "versions": { - "arc_de_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-560m", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_de_challenge_bloom-7b1.json b/evals/arc-challenge/arc_de_challenge_bloom-7b1.json deleted file mode 100644 index 477d702b1bc9eee6d2f6b2ada459a35f84ed90e2..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_de_challenge_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_de_challenge": { - "acc": 0.2684563758389262, - "acc_stderr": 0.0257145395148175, - "acc_norm": 0.2684563758389262, - "acc_norm_stderr": 0.0257145395148175 - } - }, - "versions": { - "arc_de_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_de_challenge_gpt2-large.json b/evals/arc-challenge/arc_de_challenge_gpt2-large.json deleted file mode 100644 index 2bc523b2a951a72b3cd9a3ca1f364c1880010ab0..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_de_challenge_gpt2-large.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_de_challenge": { - "acc": 0.23825503355704697, - "acc_stderr": 0.024719951493159625, - "acc_norm": 0.27181208053691275, - "acc_norm_stderr": 0.025815342279487567 - } - }, - "versions": { - "arc_de_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-large", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_de_challenge_gpt2-medium.json b/evals/arc-challenge/arc_de_challenge_gpt2-medium.json deleted file mode 100644 index 45b24780309957f9064133758d7f8cccdb182f96..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_de_challenge_gpt2-medium.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_de_challenge": { - "acc": 0.23825503355704697, - "acc_stderr": 0.024719951493159625, - "acc_norm": 0.28859060402684567, - "acc_norm_stderr": 0.026291942108676806 - } - }, - "versions": { - "arc_de_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-medium", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_de_challenge_gpt2.json b/evals/arc-challenge/arc_de_challenge_gpt2.json deleted file mode 100644 index dcac4b017ab401c82005ea115725c223d14f4bbb..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_de_challenge_gpt2.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_de_challenge": { - "acc": 0.22483221476510068, - "acc_stderr": 0.02422416982965075, - "acc_norm": 0.21140939597315436, - "acc_norm_stderr": 0.02369243605357901 - } - }, - "versions": { - "arc_de_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_de_challenge_llama-7B.json b/evals/arc-challenge/arc_de_challenge_llama-7B.json deleted file mode 100644 index 8cb6300f14d8c556143f550509be7862841dc7c6..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_de_challenge_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_de_challenge": { - "acc": 0.2785234899328859, - "acc_stderr": 0.0260114035784859, - "acc_norm": 0.348993288590604, - "acc_norm_stderr": 0.027658144793750224 - } - }, - "versions": { - "arc_de_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_es_challenge_bloom-1b7.json b/evals/arc-challenge/arc_es_challenge_bloom-1b7.json deleted file mode 100644 index 74eba78a722fcedb488ec904b2f0d58171c8a749..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_es_challenge_bloom-1b7.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_es_challenge": { - "acc": 0.2356902356902357, - "acc_stderr": 0.02466946003490763, - "acc_norm": 0.2895622895622896, - "acc_norm_stderr": 0.026362594432681956 - } - }, - "versions": { - "arc_es_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-1b7", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_es_challenge_bloom-560.json b/evals/arc-challenge/arc_es_challenge_bloom-560.json deleted file mode 100644 index f03023ac512f6466bc05adcbbd4b74fafdb0701e..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_es_challenge_bloom-560.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_es_challenge": { - "acc": 0.2255892255892256, - "acc_stderr": 0.024293999292957367, - "acc_norm": 0.2356902356902357, - "acc_norm_stderr": 0.02466946003490764 - } - }, - "versions": { - "arc_es_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-560m", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_es_challenge_bloom-7b1.json b/evals/arc-challenge/arc_es_challenge_bloom-7b1.json deleted file mode 100644 index 42cce52cd279c31092e728aadcc63cb1e0a04b59..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_es_challenge_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_es_challenge": { - "acc": 0.3265993265993266, - "acc_stderr": 0.027258287015652305, - "acc_norm": 0.3602693602693603, - "acc_norm_stderr": 0.02790399493827167 - } - }, - "versions": { - "arc_es_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_es_challenge_gpt2-large.json b/evals/arc-challenge/arc_es_challenge_gpt2-large.json deleted file mode 100644 index 8889a96dc89f373c32d03d03beba715496d3c5cf..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_es_challenge_gpt2-large.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_es_challenge": { - "acc": 0.2222222222222222, - "acc_stderr": 0.024164379788935483, - "acc_norm": 0.26262626262626265, - "acc_norm_stderr": 0.02557802773320012 - } - }, - "versions": { - "arc_es_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-large", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_es_challenge_gpt2-medium.json b/evals/arc-challenge/arc_es_challenge_gpt2-medium.json deleted file mode 100644 index 292e3ed1cc0e8b1b1063554055397c13de7ff5f7..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_es_challenge_gpt2-medium.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_es_challenge": { - "acc": 0.1919191919191919, - "acc_stderr": 0.022889733897083934, - "acc_norm": 0.25252525252525254, - "acc_norm_stderr": 0.02525252525252536 - } - }, - "versions": { - "arc_es_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-medium", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_es_challenge_gpt2.json b/evals/arc-challenge/arc_es_challenge_gpt2.json deleted file mode 100644 index e71f05e3b44a477a0c85e997c61776163460f160..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_es_challenge_gpt2.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_es_challenge": { - "acc": 0.19865319865319866, - "acc_stderr": 0.023190610381322127, - "acc_norm": 0.24579124579124578, - "acc_norm_stderr": 0.0250255213842353 - } - }, - "versions": { - "arc_es_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_es_challenge_llama-7B.json b/evals/arc-challenge/arc_es_challenge_llama-7B.json deleted file mode 100644 index 0fab72d1a1f2e4fd24095bb5ec61c4a1d8f08aee..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_es_challenge_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_es_challenge": { - "acc": 0.3501683501683502, - "acc_stderr": 0.027726370308831506, - "acc_norm": 0.3602693602693603, - "acc_norm_stderr": 0.02790399493827167 - } - }, - "versions": { - "arc_es_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_eu_challenge_bloom-1b7.json b/evals/arc-challenge/arc_eu_challenge_bloom-1b7.json deleted file mode 100644 index ec1113a347e63807533e24faa9f8f1133a725ba3..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_eu_challenge_bloom-1b7.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_eu_challenge": { - "acc": 0.22377622377622378, - "acc_stderr": 0.02468755105337312, - "acc_norm": 0.2517482517482518, - "acc_norm_stderr": 0.02570896966075011 - } - }, - "versions": { - "arc_eu_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-1b7", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_eu_challenge_bloom-560.json b/evals/arc-challenge/arc_eu_challenge_bloom-560.json deleted file mode 100644 index d21d146ef31af9e17f56082cab45ffcd1938858f..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_eu_challenge_bloom-560.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_eu_challenge": { - "acc": 0.24475524475524477, - "acc_stderr": 0.02546756553847068, - "acc_norm": 0.19230769230769232, - "acc_norm_stderr": 0.023345268410264786 - } - }, - "versions": { - "arc_eu_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-560m", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_eu_challenge_bloom-7b1.json b/evals/arc-challenge/arc_eu_challenge_bloom-7b1.json deleted file mode 100644 index a5c3fd12b9223764b5f572dbfa37a6903f058c5e..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_eu_challenge_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_eu_challenge": { - "acc": 0.23076923076923078, - "acc_stderr": 0.024957141712425013, - "acc_norm": 0.24125874125874125, - "acc_norm_stderr": 0.025343462496583764 - } - }, - "versions": { - "arc_eu_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_eu_challenge_gpt2-large.json b/evals/arc-challenge/arc_eu_challenge_gpt2-large.json deleted file mode 100644 index 1ca1581ef49b197cacfd25186739d7697494240c..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_eu_challenge_gpt2-large.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_eu_challenge": { - "acc": 0.25874125874125875, - "acc_stderr": 0.02594151450124707, - "acc_norm": 0.24125874125874125, - "acc_norm_stderr": 0.025343462496583737 - } - }, - "versions": { - "arc_eu_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-large", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_eu_challenge_gpt2-medium.json b/evals/arc-challenge/arc_eu_challenge_gpt2-medium.json deleted file mode 100644 index 9fcb0f103e4f8b17826dc742c5e2fd7760677501..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_eu_challenge_gpt2-medium.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_eu_challenge": { - "acc": 0.2762237762237762, - "acc_stderr": 0.026485626798716442, - "acc_norm": 0.25874125874125875, - "acc_norm_stderr": 0.025941514501247064 - } - }, - "versions": { - "arc_eu_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-medium", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_eu_challenge_gpt2.json b/evals/arc-challenge/arc_eu_challenge_gpt2.json deleted file mode 100644 index 7a6f7747e337535ab8fba538b1b3e6292e596be8..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_eu_challenge_gpt2.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_eu_challenge": { - "acc": 0.2762237762237762, - "acc_stderr": 0.026485626798716456, - "acc_norm": 0.24825174825174826, - "acc_norm_stderr": 0.025589390464738234 - } - }, - "versions": { - "arc_eu_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_eu_challenge_llama-7B.json b/evals/arc-challenge/arc_eu_challenge_llama-7B.json deleted file mode 100644 index 748beb769c74d6f45c8e93c5a0151df8949243d5..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_eu_challenge_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_eu_challenge": { - "acc": 0.26223776223776224, - "acc_stderr": 0.026054539173797044, - "acc_norm": 0.23426573426573427, - "acc_norm_stderr": 0.02508828621716978 - } - }, - "versions": { - "arc_eu_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_fr_challenge_bloom-1b7.json b/evals/arc-challenge/arc_fr_challenge_bloom-1b7.json deleted file mode 100644 index e45f16627cad6e7f9c00c5e957f834e5d38c0364..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_fr_challenge_bloom-1b7.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_fr_challenge": { - "acc": 0.2550335570469799, - "acc_stderr": 0.025292327380712687, - "acc_norm": 0.2953020134228188, - "acc_norm_stderr": 0.026470155629081078 - } - }, - "versions": { - "arc_fr_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-1b7", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_fr_challenge_bloom-560.json b/evals/arc-challenge/arc_fr_challenge_bloom-560.json deleted file mode 100644 index c6a22e37448b26cc7b45d56b9eb1cb9358ea8a34..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_fr_challenge_bloom-560.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_fr_challenge": { - "acc": 0.2348993288590604, - "acc_stderr": 0.024599255015999244, - "acc_norm": 0.25838926174496646, - "acc_norm_stderr": 0.025400777524610105 - } - }, - "versions": { - "arc_fr_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-560m", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_fr_challenge_bloom-7b1.json b/evals/arc-challenge/arc_fr_challenge_bloom-7b1.json deleted file mode 100644 index e7fc02c83acce1c27f68cacb276ebf9d1038459b..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_fr_challenge_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_fr_challenge": { - "acc": 0.36577181208053694, - "acc_stderr": 0.027947930997299652, - "acc_norm": 0.3825503355704698, - "acc_norm_stderr": 0.02820115194087938 - } - }, - "versions": { - "arc_fr_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_fr_challenge_gpt2-large.json b/evals/arc-challenge/arc_fr_challenge_gpt2-large.json deleted file mode 100644 index 9aae5d2ce6adfb2eb44ca3f0cdc1108895cd0a83..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_fr_challenge_gpt2-large.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_fr_challenge": { - "acc": 0.1912751677852349, - "acc_stderr": 0.02282188225534101, - "acc_norm": 0.2684563758389262, - "acc_norm_stderr": 0.025714539514817496 - } - }, - "versions": { - "arc_fr_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-large", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_fr_challenge_gpt2-medium.json b/evals/arc-challenge/arc_fr_challenge_gpt2-medium.json deleted file mode 100644 index 465234e97d674cd00fa45996ea2f08a2d3e81dff..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_fr_challenge_gpt2-medium.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_fr_challenge": { - "acc": 0.2181208053691275, - "acc_stderr": 0.023962942745646792, - "acc_norm": 0.2785234899328859, - "acc_norm_stderr": 0.026011403578485918 - } - }, - "versions": { - "arc_fr_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-medium", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_fr_challenge_gpt2.json b/evals/arc-challenge/arc_fr_challenge_gpt2.json deleted file mode 100644 index 4e91d18eac5ed9bf7def9d899e70e9280a10d994..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_fr_challenge_gpt2.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_fr_challenge": { - "acc": 0.2080536912751678, - "acc_stderr": 0.023553603370264107, - "acc_norm": 0.2751677852348993, - "acc_norm_stderr": 0.025914289910427518 - } - }, - "versions": { - "arc_fr_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_fr_challenge_llama-7B.json b/evals/arc-challenge/arc_fr_challenge_llama-7B.json deleted file mode 100644 index 289f9e2b1689351de784a6a0a22e47ebaa0bcc28..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_fr_challenge_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_fr_challenge": { - "acc": 0.3523489932885906, - "acc_stderr": 0.027719080218117063, - "acc_norm": 0.3422818791946309, - "acc_norm_stderr": 0.027531738303985358 - } - }, - "versions": { - "arc_fr_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_gu_challenge_bloom-1b7.json b/evals/arc-challenge/arc_gu_challenge_bloom-1b7.json deleted file mode 100644 index a68c6f6a88aaab21388ac0f6f47a96fcad831091..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_gu_challenge_bloom-1b7.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_gu_challenge": { - "acc": 0.23693379790940766, - "acc_stderr": 0.02514268188080883, - "acc_norm": 0.2613240418118467, - "acc_norm_stderr": 0.025979671112800046 - } - }, - "versions": { - "arc_gu_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-1b7", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_gu_challenge_bloom-560.json b/evals/arc-challenge/arc_gu_challenge_bloom-560.json deleted file mode 100644 index 8e1e6a4854fc92fa9250450b250a4769a4c3586d..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_gu_challenge_bloom-560.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_gu_challenge": { - "acc": 0.21951219512195122, - "acc_stderr": 0.0244753759026465, - "acc_norm": 0.25435540069686413, - "acc_norm_stderr": 0.025751551710541783 - } - }, - "versions": { - "arc_gu_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-560m", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_gu_challenge_bloom-7b1.json b/evals/arc-challenge/arc_gu_challenge_bloom-7b1.json deleted file mode 100644 index 920acb43e2275592dbf6351e0ee175bbb1a322c1..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_gu_challenge_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_gu_challenge": { - "acc": 0.23693379790940766, - "acc_stderr": 0.02514268188080883, - "acc_norm": 0.23693379790940766, - "acc_norm_stderr": 0.025142681880808825 - } - }, - "versions": { - "arc_gu_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_gu_challenge_gpt2-large.json b/evals/arc-challenge/arc_gu_challenge_gpt2-large.json deleted file mode 100644 index c441954523c6d4bea5cc1b2cba0305b6c41fee49..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_gu_challenge_gpt2-large.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_gu_challenge": { - "acc": 0.22996515679442509, - "acc_stderr": 0.02488302588342452, - "acc_norm": 0.23693379790940766, - "acc_norm_stderr": 0.025142681880808832 - } - }, - "versions": { - "arc_gu_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-large", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_gu_challenge_gpt2-medium.json b/evals/arc-challenge/arc_gu_challenge_gpt2-medium.json deleted file mode 100644 index 7aaeca4ab77d4bf203d3bf29e50b2c3f50320f78..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_gu_challenge_gpt2-medium.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_gu_challenge": { - "acc": 0.2229965156794425, - "acc_stderr": 0.02461373413263406, - "acc_norm": 0.2508710801393728, - "acc_norm_stderr": 0.02563424701238326 - } - }, - "versions": { - "arc_gu_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-medium", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_gu_challenge_gpt2.json b/evals/arc-challenge/arc_gu_challenge_gpt2.json deleted file mode 100644 index a988ac9706a7406299e0de78b92c41a2151d0204..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_gu_challenge_gpt2.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_gu_challenge": { - "acc": 0.22996515679442509, - "acc_stderr": 0.024883025883424517, - "acc_norm": 0.24390243902439024, - "acc_norm_stderr": 0.025392997717581856 - } - }, - "versions": { - "arc_gu_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_gu_challenge_llama-7B.json b/evals/arc-challenge/arc_gu_challenge_llama-7B.json deleted file mode 100644 index 12e906c731a45f8bd9b92a525fa2d3edc9a6f62e..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_gu_challenge_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_gu_challenge": { - "acc": 0.20557491289198607, - "acc_stderr": 0.023896181928798988, - "acc_norm": 0.26480836236933797, - "acc_norm_stderr": 0.026090542561414385 - } - }, - "versions": { - "arc_gu_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_hi_challenge_bloom-1b7.json b/evals/arc-challenge/arc_hi_challenge_bloom-1b7.json deleted file mode 100644 index 474da43c63438f6e87405fb3780c9b001241b895..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_hi_challenge_bloom-1b7.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_hi_challenge": { - "acc": 0.21140939597315436, - "acc_stderr": 0.02369243605357901, - "acc_norm": 0.23825503355704697, - "acc_norm_stderr": 0.024719951493159625 - } - }, - "versions": { - "arc_hi_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-1b7", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_hi_challenge_bloom-560.json b/evals/arc-challenge/arc_hi_challenge_bloom-560.json deleted file mode 100644 index 1606ed0007915536346cb01b3395ab2cb67b09a9..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_hi_challenge_bloom-560.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_hi_challenge": { - "acc": 0.19798657718120805, - "acc_stderr": 0.023122269968056355, - "acc_norm": 0.2181208053691275, - "acc_norm_stderr": 0.023962942745646806 - } - }, - "versions": { - "arc_hi_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-560m", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_hi_challenge_bloom-7b1.json b/evals/arc-challenge/arc_hi_challenge_bloom-7b1.json deleted file mode 100644 index b5660d5853f1219cfdbd0d886a4fccd9e6a3ab2b..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_hi_challenge_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_hi_challenge": { - "acc": 0.25838926174496646, - "acc_stderr": 0.025400777524610105, - "acc_norm": 0.29194630872483224, - "acc_norm_stderr": 0.026381917944561784 - } - }, - "versions": { - "arc_hi_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_hi_challenge_gpt2-large.json b/evals/arc-challenge/arc_hi_challenge_gpt2-large.json deleted file mode 100644 index e6870360e984b19d105ccc86592d36a7564ff98a..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_hi_challenge_gpt2-large.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_hi_challenge": { - "acc": 0.22818791946308725, - "acc_stderr": 0.024351397257610513, - "acc_norm": 0.25838926174496646, - "acc_norm_stderr": 0.025400777524610105 - } - }, - "versions": { - "arc_hi_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-large", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_hi_challenge_gpt2-medium.json b/evals/arc-challenge/arc_hi_challenge_gpt2-medium.json deleted file mode 100644 index f64cba429b30075841311a50303cbff1487551af..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_hi_challenge_gpt2-medium.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_hi_challenge": { - "acc": 0.24161073825503357, - "acc_stderr": 0.02483853510802848, - "acc_norm": 0.27181208053691275, - "acc_norm_stderr": 0.025815342279487567 - } - }, - "versions": { - "arc_hi_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-medium", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_hi_challenge_gpt2.json b/evals/arc-challenge/arc_hi_challenge_gpt2.json deleted file mode 100644 index 9ccb8fb7bd3bc4c523ed703b76c3d2526c010107..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_hi_challenge_gpt2.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_hi_challenge": { - "acc": 0.2181208053691275, - "acc_stderr": 0.023962942745646785, - "acc_norm": 0.2785234899328859, - "acc_norm_stderr": 0.026011403578485925 - } - }, - "versions": { - "arc_hi_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_hi_challenge_llama-7B.json b/evals/arc-challenge/arc_hi_challenge_llama-7B.json deleted file mode 100644 index 90d5c1ec99c8e977e4997800431e69a1dc078659..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_hi_challenge_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_hi_challenge": { - "acc": 0.20469798657718122, - "acc_stderr": 0.02341232810510543, - "acc_norm": 0.2751677852348993, - "acc_norm_stderr": 0.025914289910427518 - } - }, - "versions": { - "arc_hi_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_hr_challenge_bloom-1b7.json b/evals/arc-challenge/arc_hr_challenge_bloom-1b7.json deleted file mode 100644 index c4ea79c0ffc6047bb74b51d401771a577f7b2a2e..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_hr_challenge_bloom-1b7.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_hr_challenge": { - "acc": 0.24579124579124578, - "acc_stderr": 0.025025521384235302, - "acc_norm": 0.25925925925925924, - "acc_norm_stderr": 0.025471492792791692 - } - }, - "versions": { - "arc_hr_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-1b7", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_hr_challenge_bloom-560.json b/evals/arc-challenge/arc_hr_challenge_bloom-560.json deleted file mode 100644 index d0388389e9fdfe66978f0bb663af6b9c14905b74..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_hr_challenge_bloom-560.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_hr_challenge": { - "acc": 0.19865319865319866, - "acc_stderr": 0.023190610381322117, - "acc_norm": 0.2558922558922559, - "acc_norm_stderr": 0.025363000375801963 - } - }, - "versions": { - "arc_hr_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-560m", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_hr_challenge_bloom-7b1.json b/evals/arc-challenge/arc_hr_challenge_bloom-7b1.json deleted file mode 100644 index 27a6b5e7862ae33a52b4fcee86a333d1819e8514..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_hr_challenge_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_hr_challenge": { - "acc": 0.23905723905723905, - "acc_stderr": 0.02479026042346899, - "acc_norm": 0.2962962962962963, - "acc_norm_stderr": 0.026540687854980666 - } - }, - "versions": { - "arc_hr_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_hr_challenge_gpt2-large.json b/evals/arc-challenge/arc_hr_challenge_gpt2-large.json deleted file mode 100644 index daac6d38e4cc4974c0a8b524053297e0971694a9..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_hr_challenge_gpt2-large.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_hr_challenge": { - "acc": 0.18855218855218855, - "acc_stderr": 0.0227352759557704, - "acc_norm": 0.2255892255892256, - "acc_norm_stderr": 0.02429399929295737 - } - }, - "versions": { - "arc_hr_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-large", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_hr_challenge_gpt2-medium.json b/evals/arc-challenge/arc_hr_challenge_gpt2-medium.json deleted file mode 100644 index b69e7a89e1d024529a1ccfa184f0ed211ab024e6..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_hr_challenge_gpt2-medium.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_hr_challenge": { - "acc": 0.18855218855218855, - "acc_stderr": 0.0227352759557704, - "acc_norm": 0.2255892255892256, - "acc_norm_stderr": 0.024293999292957367 - } - }, - "versions": { - "arc_hr_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-medium", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_hr_challenge_gpt2.json b/evals/arc-challenge/arc_hr_challenge_gpt2.json deleted file mode 100644 index d27da666a194a216383a01fe3c520895dbaada29..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_hr_challenge_gpt2.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_hr_challenge": { - "acc": 0.19528619528619529, - "acc_stderr": 0.02304149438665811, - "acc_norm": 0.24242424242424243, - "acc_norm_stderr": 0.02490893747050875 - } - }, - "versions": { - "arc_hr_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_hr_challenge_llama-7B.json b/evals/arc-challenge/arc_hr_challenge_llama-7B.json deleted file mode 100644 index cc0a77d97f36393c01b3325f7f341ed832c808cb..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_hr_challenge_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_hr_challenge": { - "acc": 0.2996632996632997, - "acc_stderr": 0.026627130450114996, - "acc_norm": 0.3468013468013468, - "acc_norm_stderr": 0.027664139917201607 - } - }, - "versions": { - "arc_hr_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_hu_challenge_bloom-1b7.json b/evals/arc-challenge/arc_hu_challenge_bloom-1b7.json deleted file mode 100644 index d6ee518fa194a5cab2b0fcc73ab71cfa9a4c7938..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_hu_challenge_bloom-1b7.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_hu_challenge": { - "acc": 0.20875420875420875, - "acc_stderr": 0.023622587756271476, - "acc_norm": 0.21212121212121213, - "acc_norm_stderr": 0.023761611918761676 - } - }, - "versions": { - "arc_hu_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-1b7", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_hu_challenge_bloom-560.json b/evals/arc-challenge/arc_hu_challenge_bloom-560.json deleted file mode 100644 index 4326e9a449bfff5b4bffcb01ae73902068b16858..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_hu_challenge_bloom-560.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_hu_challenge": { - "acc": 0.20202020202020202, - "acc_stderr": 0.023337132573282595, - "acc_norm": 0.23905723905723905, - "acc_norm_stderr": 0.024790260423468987 - } - }, - "versions": { - "arc_hu_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-560m", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_hu_challenge_bloom-7b1.json b/evals/arc-challenge/arc_hu_challenge_bloom-7b1.json deleted file mode 100644 index 7638b2f77f7140b0c0af0df71d4b9e1fd457bfb3..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_hu_challenge_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_hu_challenge": { - "acc": 0.2222222222222222, - "acc_stderr": 0.02416437978893547, - "acc_norm": 0.265993265993266, - "acc_norm_stderr": 0.025682629556652854 - } - }, - "versions": { - "arc_hu_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_hu_challenge_gpt2-large.json b/evals/arc-challenge/arc_hu_challenge_gpt2-large.json deleted file mode 100644 index 9a7113da6667b32d4460a28d91f71e3e716239d0..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_hu_challenge_gpt2-large.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_hu_challenge": { - "acc": 0.21212121212121213, - "acc_stderr": 0.023761611918761655, - "acc_norm": 0.24242424242424243, - "acc_norm_stderr": 0.02490893747050876 - } - }, - "versions": { - "arc_hu_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-large", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_hu_challenge_gpt2-medium.json b/evals/arc-challenge/arc_hu_challenge_gpt2-medium.json deleted file mode 100644 index 9f05d0f663b1d94cfc4087ba1aae889603546e4a..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_hu_challenge_gpt2-medium.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_hu_challenge": { - "acc": 0.2356902356902357, - "acc_stderr": 0.02466946003490763, - "acc_norm": 0.2828282828282828, - "acc_norm_stderr": 0.026177438014745417 - } - }, - "versions": { - "arc_hu_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-medium", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_hu_challenge_gpt2.json b/evals/arc-challenge/arc_hu_challenge_gpt2.json deleted file mode 100644 index 3cdc244f3a355351f2b2e8826aed014e23f29fab..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_hu_challenge_gpt2.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_hu_challenge": { - "acc": 0.2053872053872054, - "acc_stderr": 0.023481109518599295, - "acc_norm": 0.25252525252525254, - "acc_norm_stderr": 0.025252525252525353 - } - }, - "versions": { - "arc_hu_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_hu_challenge_llama-7B.json b/evals/arc-challenge/arc_hu_challenge_llama-7B.json deleted file mode 100644 index d0add74575f51f34aaed4497cfc6e42d0d8d9bc9..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_hu_challenge_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_hu_challenge": { - "acc": 0.24915824915824916, - "acc_stderr": 0.025140041284626418, - "acc_norm": 0.30976430976430974, - "acc_norm_stderr": 0.0268762417790141 - } - }, - "versions": { - "arc_hu_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_hy_challenge_bloom-1b7.json b/evals/arc-challenge/arc_hy_challenge_bloom-1b7.json deleted file mode 100644 index c569232cfdeeffa2b9c398fa8102342e55669d6d..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_hy_challenge_bloom-1b7.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_hy_challenge": { - "acc": 0.2206896551724138, - "acc_stderr": 0.024394801425351647, - "acc_norm": 0.27241379310344827, - "acc_norm_stderr": 0.026188332965202905 - } - }, - "versions": { - "arc_hy_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-1b7", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_hy_challenge_bloom-7b1.json b/evals/arc-challenge/arc_hy_challenge_bloom-7b1.json deleted file mode 100644 index 6c5bcfbaa2c0570aa97441fc418e71f242460803..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_hy_challenge_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_hy_challenge": { - "acc": 0.18620689655172415, - "acc_stderr": 0.022898443475326664, - "acc_norm": 0.2689655172413793, - "acc_norm_stderr": 0.02608364690576629 - } - }, - "versions": { - "arc_hy_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_hy_challenge_gpt2-large.json b/evals/arc-challenge/arc_hy_challenge_gpt2-large.json deleted file mode 100644 index d3fa3d404e18049ccef76e50f8abe3deed88b1e6..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_hy_challenge_gpt2-large.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_hy_challenge": { - "acc": 0.19310344827586207, - "acc_stderr": 0.02321961545031108, - "acc_norm": 0.23793103448275862, - "acc_norm_stderr": 0.025048040852790374 - } - }, - "versions": { - "arc_hy_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-large", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_hy_challenge_gpt2-medium.json b/evals/arc-challenge/arc_hy_challenge_gpt2-medium.json deleted file mode 100644 index a8f1fd794a777a25dca5bd3d54b52082a503039d..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_hy_challenge_gpt2-medium.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_hy_challenge": { - "acc": 0.20689655172413793, - "acc_stderr": 0.02382827611454507, - "acc_norm": 0.25862068965517243, - "acc_norm_stderr": 0.025757454562272446 - } - }, - "versions": { - "arc_hy_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-medium", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_hy_challenge_gpt2.json b/evals/arc-challenge/arc_hy_challenge_gpt2.json deleted file mode 100644 index a6b0c05a8a5c5112ef3326264ffa348cbe02c2ff..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_hy_challenge_gpt2.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_hy_challenge": { - "acc": 0.1793103448275862, - "acc_stderr": 0.022565410117928373, - "acc_norm": 0.27241379310344827, - "acc_norm_stderr": 0.026188332965202905 - } - }, - "versions": { - "arc_hy_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_hy_challenge_llama-7B.json b/evals/arc-challenge/arc_hy_challenge_llama-7B.json deleted file mode 100644 index 76c60ed9c16ffa50256b3420a3d1c544d27d0f8a..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_hy_challenge_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_hy_challenge": { - "acc": 0.2206896551724138, - "acc_stderr": 0.024394801425351637, - "acc_norm": 0.30344827586206896, - "acc_norm_stderr": 0.02704394858012006 - } - }, - "versions": { - "arc_hy_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_id_challenge_bloom-1b7.json b/evals/arc-challenge/arc_id_challenge_bloom-1b7.json deleted file mode 100644 index 8edb6191b5ef4693fcf7dfc5cfad9800d7044c56..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_id_challenge_bloom-1b7.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_id_challenge": { - "acc": 0.2986577181208054, - "acc_stderr": 0.026556672487880535, - "acc_norm": 0.2751677852348993, - "acc_norm_stderr": 0.025914289910427518 - } - }, - "versions": { - "arc_id_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-1b7", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_id_challenge_bloom-560.json b/evals/arc-challenge/arc_id_challenge_bloom-560.json deleted file mode 100644 index 1d88eb711d44c2d77c4554d4f4d6e553aa1209eb..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_id_challenge_bloom-560.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_id_challenge": { - "acc": 0.24496644295302014, - "acc_stderr": 0.024955035980898963, - "acc_norm": 0.28187919463087246, - "acc_norm_stderr": 0.026106703750007423 - } - }, - "versions": { - "arc_id_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-560m", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_id_challenge_bloom-7b1.json b/evals/arc-challenge/arc_id_challenge_bloom-7b1.json deleted file mode 100644 index 9d6908c8177308068c88e133ad1287687c46dcce..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_id_challenge_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_id_challenge": { - "acc": 0.3187919463087248, - "acc_stderr": 0.027040538296634997, - "acc_norm": 0.3825503355704698, - "acc_norm_stderr": 0.028201151940879375 - } - }, - "versions": { - "arc_id_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_id_challenge_gpt2-large.json b/evals/arc-challenge/arc_id_challenge_gpt2-large.json deleted file mode 100644 index ab5432ed0c027006e5940d1dbd8e9231eccd5ab0..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_id_challenge_gpt2-large.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_id_challenge": { - "acc": 0.23825503355704697, - "acc_stderr": 0.02471995149315962, - "acc_norm": 0.2684563758389262, - "acc_norm_stderr": 0.025714539514817496 - } - }, - "versions": { - "arc_id_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-large", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_id_challenge_gpt2-medium.json b/evals/arc-challenge/arc_id_challenge_gpt2-medium.json deleted file mode 100644 index 156b2294f71673c6950d132b56805c5e36900b92..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_id_challenge_gpt2-medium.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_id_challenge": { - "acc": 0.2080536912751678, - "acc_stderr": 0.023553603370264114, - "acc_norm": 0.2483221476510067, - "acc_norm_stderr": 0.025069483148037884 - } - }, - "versions": { - "arc_id_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-medium", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_id_challenge_gpt2.json b/evals/arc-challenge/arc_id_challenge_gpt2.json deleted file mode 100644 index ef1ed97c321fe9cc50de905c218517b2d6bb812d..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_id_challenge_gpt2.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_id_challenge": { - "acc": 0.23825503355704697, - "acc_stderr": 0.024719951493159628, - "acc_norm": 0.2785234899328859, - "acc_norm_stderr": 0.026011403578485907 - } - }, - "versions": { - "arc_id_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_id_challenge_llama-7B.json b/evals/arc-challenge/arc_id_challenge_llama-7B.json deleted file mode 100644 index 531f6f81397ca5506b0f36d1291417201eb9b72e..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_id_challenge_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_id_challenge": { - "acc": 0.23154362416107382, - "acc_stderr": 0.024476414420146617, - "acc_norm": 0.28523489932885904, - "acc_norm_stderr": 0.02620021021413825 - } - }, - "versions": { - "arc_id_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_it_challenge_bloom-1b7.json b/evals/arc-challenge/arc_it_challenge_bloom-1b7.json deleted file mode 100644 index c38c75e09195bcf94e26d180f17837747473c6f7..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_it_challenge_bloom-1b7.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_it_challenge": { - "acc": 0.2558922558922559, - "acc_stderr": 0.025363000375801963, - "acc_norm": 0.24579124579124578, - "acc_norm_stderr": 0.025025521384235284 - } - }, - "versions": { - "arc_it_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-1b7", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_it_challenge_bloom-560.json b/evals/arc-challenge/arc_it_challenge_bloom-560.json deleted file mode 100644 index a1001fcc2f2df8d064ae2cefca3cbcf0212ed670..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_it_challenge_bloom-560.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_it_challenge": { - "acc": 0.20202020202020202, - "acc_stderr": 0.023337132573282612, - "acc_norm": 0.23232323232323232, - "acc_norm_stderr": 0.02454650495612789 - } - }, - "versions": { - "arc_it_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-560m", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_it_challenge_bloom-7b1.json b/evals/arc-challenge/arc_it_challenge_bloom-7b1.json deleted file mode 100644 index fe8c476fe99201a63e06353589f9b571026510a6..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_it_challenge_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_it_challenge": { - "acc": 0.24242424242424243, - "acc_stderr": 0.02490893747050875, - "acc_norm": 0.23232323232323232, - "acc_norm_stderr": 0.02454650495612789 - } - }, - "versions": { - "arc_it_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_it_challenge_gpt2-large.json b/evals/arc-challenge/arc_it_challenge_gpt2-large.json deleted file mode 100644 index 2508d33a6975391a9665c19ebb10213e84bd23da..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_it_challenge_gpt2-large.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_it_challenge": { - "acc": 0.2255892255892256, - "acc_stderr": 0.02429399929295737, - "acc_norm": 0.25252525252525254, - "acc_norm_stderr": 0.025252525252525342 - } - }, - "versions": { - "arc_it_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-large", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_it_challenge_gpt2.json b/evals/arc-challenge/arc_it_challenge_gpt2.json deleted file mode 100644 index 611874b61c1374b902d583cf5cefbc4492ed6ac6..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_it_challenge_gpt2.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_it_challenge": { - "acc": 0.22895622895622897, - "acc_stderr": 0.024421362642271068, - "acc_norm": 0.24579124579124578, - "acc_norm_stderr": 0.025025521384235284 - } - }, - "versions": { - "arc_it_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_it_challenge_llama-7B.json b/evals/arc-challenge/arc_it_challenge_llama-7B.json deleted file mode 100644 index 026bc2c2a59b0b1e397e34c3f50a439cc3237e6c..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_it_challenge_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_it_challenge": { - "acc": 0.3164983164983165, - "acc_stderr": 0.02703395838420781, - "acc_norm": 0.3367003367003367, - "acc_norm_stderr": 0.02746823841289221 - } - }, - "versions": { - "arc_it_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_kn_challenge_bloom-1b7.json b/evals/arc-challenge/arc_kn_challenge_bloom-1b7.json deleted file mode 100644 index d30129acdd6c23d97224155d05ff525778afc39a..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_kn_challenge_bloom-1b7.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_kn_challenge": { - "acc": 0.2097902097902098, - "acc_stderr": 0.024118005042923673, - "acc_norm": 0.25874125874125875, - "acc_norm_stderr": 0.025941514501247074 - } - }, - "versions": { - "arc_kn_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-1b7", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_kn_challenge_bloom-560.json b/evals/arc-challenge/arc_kn_challenge_bloom-560.json deleted file mode 100644 index 9061ffd18bb78ef2415b46937475b366aaba5e70..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_kn_challenge_bloom-560.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_kn_challenge": { - "acc": 0.2097902097902098, - "acc_stderr": 0.024118005042923676, - "acc_norm": 0.2727272727272727, - "acc_norm_stderr": 0.026380954549454924 - } - }, - "versions": { - "arc_kn_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-560m", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_kn_challenge_bloom-7b1.json b/evals/arc-challenge/arc_kn_challenge_bloom-7b1.json deleted file mode 100644 index 083303db0d99abb50df9664e66431757fcbc34cf..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_kn_challenge_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_kn_challenge": { - "acc": 0.2062937062937063, - "acc_stderr": 0.023969030679396822, - "acc_norm": 0.27972027972027974, - "acc_norm_stderr": 0.02658827368712313 - } - }, - "versions": { - "arc_kn_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_kn_challenge_gpt2-large.json b/evals/arc-challenge/arc_kn_challenge_gpt2-large.json deleted file mode 100644 index cc1d0795f8679f5f353a8fe04a823ce8944d6180..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_kn_challenge_gpt2-large.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_kn_challenge": { - "acc": 0.24125874125874125, - "acc_stderr": 0.02534346249658375, - "acc_norm": 0.2062937062937063, - "acc_norm_stderr": 0.02396903067939682 - } - }, - "versions": { - "arc_kn_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-large", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_kn_challenge_gpt2-medium.json b/evals/arc-challenge/arc_kn_challenge_gpt2-medium.json deleted file mode 100644 index 3272316d0c0fa316ff58bd4f0a3c248c27457501..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_kn_challenge_gpt2-medium.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_kn_challenge": { - "acc": 0.23076923076923078, - "acc_stderr": 0.02495714171242502, - "acc_norm": 0.23426573426573427, - "acc_norm_stderr": 0.025088286217169773 - } - }, - "versions": { - "arc_kn_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-medium", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_kn_challenge_gpt2.json b/evals/arc-challenge/arc_kn_challenge_gpt2.json deleted file mode 100644 index 06e41e33136f376ee8441914155f63301d2b3150..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_kn_challenge_gpt2.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_kn_challenge": { - "acc": 0.21678321678321677, - "acc_stderr": 0.02440795482238759, - "acc_norm": 0.1993006993006993, - "acc_norm_stderr": 0.023662831210753306 - } - }, - "versions": { - "arc_kn_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_kn_challenge_llama-7B.json b/evals/arc-challenge/arc_kn_challenge_llama-7B.json deleted file mode 100644 index 54ade592ef4b8faca4ac733019e8a288ffcd7080..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_kn_challenge_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_kn_challenge": { - "acc": 0.25524475524475526, - "acc_stderr": 0.025826334320570847, - "acc_norm": 0.2762237762237762, - "acc_norm_stderr": 0.026485626798716456 - } - }, - "versions": { - "arc_kn_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_ml_challenge_bloom-1b7.json b/evals/arc-challenge/arc_ml_challenge_bloom-1b7.json deleted file mode 100644 index 237a4de001e4d03d3a5da1bd85ff383ee5ed3641..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_ml_challenge_bloom-1b7.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ml_challenge": { - "acc": 0.20270270270270271, - "acc_stderr": 0.023406091994174035, - "acc_norm": 0.20945945945945946, - "acc_norm_stderr": 0.023691963473475734 - } - }, - "versions": { - "arc_ml_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-1b7", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_ml_challenge_bloom-560.json b/evals/arc-challenge/arc_ml_challenge_bloom-560.json deleted file mode 100644 index b276b36482cf0a1c5ed243c8a17297e981587426..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_ml_challenge_bloom-560.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ml_challenge": { - "acc": 0.19932432432432431, - "acc_stderr": 0.02325934388926828, - "acc_norm": 0.23310810810810811, - "acc_norm_stderr": 0.024616978985669728 - } - }, - "versions": { - "arc_ml_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-560m", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_ml_challenge_bloom-7b1.json b/evals/arc-challenge/arc_ml_challenge_bloom-7b1.json deleted file mode 100644 index 57e340993dc80aab56386e3c1ade388f4d786241..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_ml_challenge_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ml_challenge": { - "acc": 0.22635135135135134, - "acc_stderr": 0.024364215012920545, - "acc_norm": 0.22297297297297297, - "acc_norm_stderr": 0.02423444993634421 - } - }, - "versions": { - "arc_ml_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_ml_challenge_gpt2-large.json b/evals/arc-challenge/arc_ml_challenge_gpt2-large.json deleted file mode 100644 index a23148b0cf58ef04dc9ab3bb8d26aedadda9296f..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_ml_challenge_gpt2-large.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ml_challenge": { - "acc": 0.22972972972972974, - "acc_stderr": 0.024491712953916972, - "acc_norm": 0.22297297297297297, - "acc_norm_stderr": 0.024234449936344216 - } - }, - "versions": { - "arc_ml_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-large", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_ml_challenge_gpt2-medium.json b/evals/arc-challenge/arc_ml_challenge_gpt2-medium.json deleted file mode 100644 index 9aa842f5ce9d59030c7aae3de538f9b3ea816580..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_ml_challenge_gpt2-medium.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ml_challenge": { - "acc": 0.2533783783783784, - "acc_stderr": 0.0253235186291, - "acc_norm": 0.21283783783783783, - "acc_norm_stderr": 0.0238311783119674 - } - }, - "versions": { - "arc_ml_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-medium", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_ml_challenge_llama-7B.json b/evals/arc-challenge/arc_ml_challenge_llama-7B.json deleted file mode 100644 index 3f4555f5009cd795dea8981be98bec45e2ed9369..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_ml_challenge_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ml_challenge": { - "acc": 0.21621621621621623, - "acc_stderr": 0.023967970439477224, - "acc_norm": 0.20270270270270271, - "acc_norm_stderr": 0.023406091994174035 - } - }, - "versions": { - "arc_ml_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_mr_challenge_bloom-1b7.json b/evals/arc-challenge/arc_mr_challenge_bloom-1b7.json deleted file mode 100644 index c8b3bb6a26b22a95c0a8de8ae3221f476963428f..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_mr_challenge_bloom-1b7.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_mr_challenge": { - "acc": 0.24067796610169492, - "acc_stderr": 0.02493202205172924, - "acc_norm": 0.2440677966101695, - "acc_norm_stderr": 0.02505088069031971 - } - }, - "versions": { - "arc_mr_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-1b7", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_mr_challenge_bloom-560.json b/evals/arc-challenge/arc_mr_challenge_bloom-560.json deleted file mode 100644 index 213f904f45633d7bdef01eef045a28ec2636faf5..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_mr_challenge_bloom-560.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_mr_challenge": { - "acc": 0.2440677966101695, - "acc_stderr": 0.025050880690319716, - "acc_norm": 0.22372881355932203, - "acc_norm_stderr": 0.02430491058853199 - } - }, - "versions": { - "arc_mr_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-560m", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_mr_challenge_bloom-7b1.json b/evals/arc-challenge/arc_mr_challenge_bloom-7b1.json deleted file mode 100644 index 4a6cfb61ab6cccf8da1ad0ec46c1bde46e11be82..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_mr_challenge_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_mr_challenge": { - "acc": 0.23389830508474577, - "acc_stderr": 0.024687839412166384, - "acc_norm": 0.2440677966101695, - "acc_norm_stderr": 0.025050880690319702 - } - }, - "versions": { - "arc_mr_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_mr_challenge_gpt2-large.json b/evals/arc-challenge/arc_mr_challenge_gpt2-large.json deleted file mode 100644 index 380f5aee1d555e85568122130af494663cb3123f..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_mr_challenge_gpt2-large.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_mr_challenge": { - "acc": 0.2, - "acc_stderr": 0.023328473740792135, - "acc_norm": 0.2440677966101695, - "acc_norm_stderr": 0.025050880690319702 - } - }, - "versions": { - "arc_mr_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-large", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_mr_challenge_gpt2-medium.json b/evals/arc-challenge/arc_mr_challenge_gpt2-medium.json deleted file mode 100644 index 7df5889da7e82e2529e4532947c4e0e8507ba94c..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_mr_challenge_gpt2-medium.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_mr_challenge": { - "acc": 0.2, - "acc_stderr": 0.023328473740792135, - "acc_norm": 0.22372881355932203, - "acc_norm_stderr": 0.024304910588531993 - } - }, - "versions": { - "arc_mr_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-medium", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_mr_challenge_gpt2.json b/evals/arc-challenge/arc_mr_challenge_gpt2.json deleted file mode 100644 index 8344c19a2efa7d7c252e94ea149ef5b421b34214..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_mr_challenge_gpt2.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_mr_challenge": { - "acc": 0.18305084745762712, - "acc_stderr": 0.02255328043040195, - "acc_norm": 0.2033898305084746, - "acc_norm_stderr": 0.023475447251410726 - } - }, - "versions": { - "arc_mr_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_mr_challenge_llama-7B.json b/evals/arc-challenge/arc_mr_challenge_llama-7B.json deleted file mode 100644 index f1cf03e6c1c130bd7352dd7963fe03ae5f4303fe..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_mr_challenge_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_mr_challenge": { - "acc": 0.2271186440677966, - "acc_stderr": 0.024434819973932945, - "acc_norm": 0.2711864406779661, - "acc_norm_stderr": 0.025927971596786177 - } - }, - "versions": { - "arc_mr_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_ne_challenge_bloom-1b7.json b/evals/arc-challenge/arc_ne_challenge_bloom-1b7.json deleted file mode 100644 index 9ef6fea604fc9172e63676717b7455a756bbbd4e..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_ne_challenge_bloom-1b7.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ne_challenge": { - "acc": 0.2222222222222222, - "acc_stderr": 0.024164379788935486, - "acc_norm": 0.30303030303030304, - "acc_norm_stderr": 0.026711859553317677 - } - }, - "versions": { - "arc_ne_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-1b7", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_ne_challenge_bloom-560.json b/evals/arc-challenge/arc_ne_challenge_bloom-560.json deleted file mode 100644 index 490a9ae38f7edf0f013f898d0c075db2184dc99b..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_ne_challenge_bloom-560.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ne_challenge": { - "acc": 0.25925925925925924, - "acc_stderr": 0.02547149279279167, - "acc_norm": 0.28619528619528617, - "acc_norm_stderr": 0.02627090829835463 - } - }, - "versions": { - "arc_ne_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-560m", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_ne_challenge_bloom-7b1.json b/evals/arc-challenge/arc_ne_challenge_bloom-7b1.json deleted file mode 100644 index 0b1c6c30b759cb29ce78c358d0d709a7b53f16f3..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_ne_challenge_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ne_challenge": { - "acc": 0.24242424242424243, - "acc_stderr": 0.024908937470508766, - "acc_norm": 0.2996632996632997, - "acc_norm_stderr": 0.02662713045011499 - } - }, - "versions": { - "arc_ne_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_ne_challenge_gpt2-large.json b/evals/arc-challenge/arc_ne_challenge_gpt2-large.json deleted file mode 100644 index 82b4b764b3fb7ef15563ca6d2c27830e3aef8d51..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_ne_challenge_gpt2-large.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ne_challenge": { - "acc": 0.23905723905723905, - "acc_stderr": 0.024790260423468984, - "acc_norm": 0.23905723905723905, - "acc_norm_stderr": 0.02479026042346898 - } - }, - "versions": { - "arc_ne_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-large", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_ne_challenge_gpt2-medium.json b/evals/arc-challenge/arc_ne_challenge_gpt2-medium.json deleted file mode 100644 index 18464b4f845260d9e4122a7c74c4fc758519296a..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_ne_challenge_gpt2-medium.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ne_challenge": { - "acc": 0.23905723905723905, - "acc_stderr": 0.024790260423468984, - "acc_norm": 0.24579124579124578, - "acc_norm_stderr": 0.025025521384235295 - } - }, - "versions": { - "arc_ne_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-medium", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_ne_challenge_gpt2.json b/evals/arc-challenge/arc_ne_challenge_gpt2.json deleted file mode 100644 index 669e0661f7894b2bdc02512e274ab12a340e6f2c..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_ne_challenge_gpt2.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ne_challenge": { - "acc": 0.2356902356902357, - "acc_stderr": 0.024669460034907637, - "acc_norm": 0.2255892255892256, - "acc_norm_stderr": 0.02429399929295737 - } - }, - "versions": { - "arc_ne_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_nl_challenge_bloom-1b7.json b/evals/arc-challenge/arc_nl_challenge_bloom-1b7.json deleted file mode 100644 index de6df0fa84c07702ad9d3005757f4412e835e175..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_nl_challenge_bloom-1b7.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_nl_challenge": { - "acc": 0.20469798657718122, - "acc_stderr": 0.02341232810510543, - "acc_norm": 0.24161073825503357, - "acc_norm_stderr": 0.024838535108028484 - } - }, - "versions": { - "arc_nl_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-1b7", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_nl_challenge_bloom-560.json b/evals/arc-challenge/arc_nl_challenge_bloom-560.json deleted file mode 100644 index 4bd9dec46927eea8709a44925f7f7f5e4d35c055..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_nl_challenge_bloom-560.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_nl_challenge": { - "acc": 0.22483221476510068, - "acc_stderr": 0.024224169829650748, - "acc_norm": 0.2651006711409396, - "acc_norm_stderr": 0.025611859712206003 - } - }, - "versions": { - "arc_nl_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-560m", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_nl_challenge_bloom-7b1.json b/evals/arc-challenge/arc_nl_challenge_bloom-7b1.json deleted file mode 100644 index 5360e3ed9ed9f43f4cbddc65166e1d83d89a29e6..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_nl_challenge_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_nl_challenge": { - "acc": 0.20134228187919462, - "acc_stderr": 0.0232685657676853, - "acc_norm": 0.2684563758389262, - "acc_norm_stderr": 0.025714539514817496 - } - }, - "versions": { - "arc_nl_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_nl_challenge_gpt2-large.json b/evals/arc-challenge/arc_nl_challenge_gpt2-large.json deleted file mode 100644 index 432863c5e4840c2d01bdac986765c61050413f9f..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_nl_challenge_gpt2-large.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_nl_challenge": { - "acc": 0.2080536912751678, - "acc_stderr": 0.023553603370264114, - "acc_norm": 0.2516778523489933, - "acc_norm_stderr": 0.025181904610615855 - } - }, - "versions": { - "arc_nl_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-large", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_nl_challenge_gpt2-medium.json b/evals/arc-challenge/arc_nl_challenge_gpt2-medium.json deleted file mode 100644 index 65d7c05ced99e1bd53aa3110a033d9c0975025fa..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_nl_challenge_gpt2-medium.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_nl_challenge": { - "acc": 0.23154362416107382, - "acc_stderr": 0.024476414420146628, - "acc_norm": 0.2550335570469799, - "acc_norm_stderr": 0.025292327380712687 - } - }, - "versions": { - "arc_nl_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-medium", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_nl_challenge_gpt2.json b/evals/arc-challenge/arc_nl_challenge_gpt2.json deleted file mode 100644 index bce39d9e1424be6bf01a0c15447e59c3348a08d6..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_nl_challenge_gpt2.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_nl_challenge": { - "acc": 0.21476510067114093, - "acc_stderr": 0.023828868848284373, - "acc_norm": 0.24496644295302014, - "acc_norm_stderr": 0.024955035980898956 - } - }, - "versions": { - "arc_nl_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_nl_challenge_llama-7B.json b/evals/arc-challenge/arc_nl_challenge_llama-7B.json deleted file mode 100644 index a9b3e1e927abac3aba0720a5085b3a1b041af85b..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_nl_challenge_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_nl_challenge": { - "acc": 0.2953020134228188, - "acc_stderr": 0.026470155629081078, - "acc_norm": 0.32550335570469796, - "acc_norm_stderr": 0.027188760373954457 - } - }, - "versions": { - "arc_nl_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_pt_challenge_bloom-1b7.json b/evals/arc-challenge/arc_pt_challenge_bloom-1b7.json deleted file mode 100644 index 86206aa4c02654dee089146263800252a9280415..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_pt_challenge_bloom-1b7.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_pt_challenge": { - "acc": 0.22483221476510068, - "acc_stderr": 0.024224169829650755, - "acc_norm": 0.28187919463087246, - "acc_norm_stderr": 0.026106703750007426 - } - }, - "versions": { - "arc_pt_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-1b7", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_pt_challenge_bloom-560.json b/evals/arc-challenge/arc_pt_challenge_bloom-560.json deleted file mode 100644 index 11021802d7ffa732fc84739fd8ec1d531dc637b6..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_pt_challenge_bloom-560.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_pt_challenge": { - "acc": 0.22483221476510068, - "acc_stderr": 0.02422416982965075, - "acc_norm": 0.23154362416107382, - "acc_norm_stderr": 0.02447641442014662 - } - }, - "versions": { - "arc_pt_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-560m", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_pt_challenge_bloom-7b1.json b/evals/arc-challenge/arc_pt_challenge_bloom-7b1.json deleted file mode 100644 index e9f27045095eca6ce035e90605bdff561f37a5a8..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_pt_challenge_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_pt_challenge": { - "acc": 0.348993288590604, - "acc_stderr": 0.02765814479375022, - "acc_norm": 0.3724832214765101, - "acc_norm_stderr": 0.02805354855477509 - } - }, - "versions": { - "arc_pt_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_pt_challenge_gpt2-large.json b/evals/arc-challenge/arc_pt_challenge_gpt2-large.json deleted file mode 100644 index fd1a4b8d1948d7ebf686b68f03b68fae0c5e41de..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_pt_challenge_gpt2-large.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_pt_challenge": { - "acc": 0.18791946308724833, - "acc_stderr": 0.022667687029933926, - "acc_norm": 0.24161073825503357, - "acc_norm_stderr": 0.024838535108028477 - } - }, - "versions": { - "arc_pt_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-large", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_pt_challenge_gpt2-medium.json b/evals/arc-challenge/arc_pt_challenge_gpt2-medium.json deleted file mode 100644 index 0380aff06ff37610aa48dddf5d15f62376f1d08b..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_pt_challenge_gpt2-medium.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_pt_challenge": { - "acc": 0.18120805369127516, - "acc_stderr": 0.02235101779623449, - "acc_norm": 0.2348993288590604, - "acc_norm_stderr": 0.024599255015999244 - } - }, - "versions": { - "arc_pt_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-medium", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_pt_challenge_gpt2.json b/evals/arc-challenge/arc_pt_challenge_gpt2.json deleted file mode 100644 index 6a1952ed53a80de06750b3d6155487089a0672bd..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_pt_challenge_gpt2.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_pt_challenge": { - "acc": 0.19463087248322147, - "acc_stderr": 0.022973392306598166, - "acc_norm": 0.2483221476510067, - "acc_norm_stderr": 0.025069483148037884 - } - }, - "versions": { - "arc_pt_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_pt_challenge_llama-7B.json b/evals/arc-challenge/arc_pt_challenge_llama-7B.json deleted file mode 100644 index e49526aa9a3f1e1f7fda72f9bf9b3a58227a95ce..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_pt_challenge_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_pt_challenge": { - "acc": 0.32550335570469796, - "acc_stderr": 0.027188760373954457, - "acc_norm": 0.33557046979865773, - "acc_norm_stderr": 0.027399214125091453 - } - }, - "versions": { - "arc_pt_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_ro_challenge_bloom-1b7.json b/evals/arc-challenge/arc_ro_challenge_bloom-1b7.json deleted file mode 100644 index bd189e9050be188d43e3bac19cd42c400c5df7c8..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_ro_challenge_bloom-1b7.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ro_challenge": { - "acc": 0.24915824915824916, - "acc_stderr": 0.025140041284626418, - "acc_norm": 0.28619528619528617, - "acc_norm_stderr": 0.026270908298354635 - } - }, - "versions": { - "arc_ro_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-1b7", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_ro_challenge_bloom-560.json b/evals/arc-challenge/arc_ro_challenge_bloom-560.json deleted file mode 100644 index a797f1ebfa7d92e0c78e624b99da52e77c92822c..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_ro_challenge_bloom-560.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ro_challenge": { - "acc": 0.20875420875420875, - "acc_stderr": 0.023622587756271473, - "acc_norm": 0.26936026936026936, - "acc_norm_stderr": 0.025785321789052268 - } - }, - "versions": { - "arc_ro_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-560m", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_ro_challenge_bloom-7b1.json b/evals/arc-challenge/arc_ro_challenge_bloom-7b1.json deleted file mode 100644 index 7e63a3d72b4f1a770523a9859787818e4e1ed26e..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_ro_challenge_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ro_challenge": { - "acc": 0.25252525252525254, - "acc_stderr": 0.025252525252525346, - "acc_norm": 0.30303030303030304, - "acc_norm_stderr": 0.02671185955331767 - } - }, - "versions": { - "arc_ro_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_ro_challenge_gpt2-large.json b/evals/arc-challenge/arc_ro_challenge_gpt2-large.json deleted file mode 100644 index 68f4f45196bec82ad2ec165f33cae93bfbedbe44..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_ro_challenge_gpt2-large.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ro_challenge": { - "acc": 0.18855218855218855, - "acc_stderr": 0.022735275955770386, - "acc_norm": 0.2828282828282828, - "acc_norm_stderr": 0.026177438014745407 - } - }, - "versions": { - "arc_ro_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-large", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_ro_challenge_gpt2-medium.json b/evals/arc-challenge/arc_ro_challenge_gpt2-medium.json deleted file mode 100644 index 5df0a11438afe98b491a6e5528d70eacb48652cf..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_ro_challenge_gpt2-medium.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ro_challenge": { - "acc": 0.18855218855218855, - "acc_stderr": 0.022735275955770375, - "acc_norm": 0.2558922558922559, - "acc_norm_stderr": 0.025363000375801976 - } - }, - "versions": { - "arc_ro_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-medium", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_ro_challenge_gpt2.json b/evals/arc-challenge/arc_ro_challenge_gpt2.json deleted file mode 100644 index 37203889a39601337bd2d8ffcd85a3e4693013ad..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_ro_challenge_gpt2.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ro_challenge": { - "acc": 0.20875420875420875, - "acc_stderr": 0.02362258775627147, - "acc_norm": 0.2962962962962963, - "acc_norm_stderr": 0.026540687854980673 - } - }, - "versions": { - "arc_ro_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_ro_challenge_llama-7B.json b/evals/arc-challenge/arc_ro_challenge_llama-7B.json deleted file mode 100644 index 37d943e737472a25d2c879425d478f6dd746e1f4..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_ro_challenge_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ro_challenge": { - "acc": 0.2828282828282828, - "acc_stderr": 0.02617743801474542, - "acc_norm": 0.3164983164983165, - "acc_norm_stderr": 0.027033958384207805 - } - }, - "versions": { - "arc_ro_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_ru_challenge_bloom-1b7.json b/evals/arc-challenge/arc_ru_challenge_bloom-1b7.json deleted file mode 100644 index fc9a3f783edc283ec79c7906da73bc8a27f80a9d..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_ru_challenge_bloom-1b7.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ru_challenge": { - "acc": 0.25252525252525254, - "acc_stderr": 0.02525252525252537, - "acc_norm": 0.3569023569023569, - "acc_norm_stderr": 0.027846288057490554 - } - }, - "versions": { - "arc_ru_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-1b7", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_ru_challenge_bloom-560.json b/evals/arc-challenge/arc_ru_challenge_bloom-560.json deleted file mode 100644 index 863c94dcc4459d25ef7faec70a11d6199434c8af..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_ru_challenge_bloom-560.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ru_challenge": { - "acc": 0.24915824915824916, - "acc_stderr": 0.025140041284626418, - "acc_norm": 0.3333333333333333, - "acc_norm_stderr": 0.027399831217559588 - } - }, - "versions": { - "arc_ru_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-560m", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_ru_challenge_bloom-7b1.json b/evals/arc-challenge/arc_ru_challenge_bloom-7b1.json deleted file mode 100644 index 5b61e526e728d5523f1e61b4fe49307c1c872c4c..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_ru_challenge_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ru_challenge": { - "acc": 0.25925925925925924, - "acc_stderr": 0.025471492792791674, - "acc_norm": 0.32996632996632996, - "acc_norm_stderr": 0.02732985145570343 - } - }, - "versions": { - "arc_ru_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_ru_challenge_gpt2-large.json b/evals/arc-challenge/arc_ru_challenge_gpt2-large.json deleted file mode 100644 index fd367513e4157fb1556348f212a5c6e94922beee..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_ru_challenge_gpt2-large.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ru_challenge": { - "acc": 0.24579124579124578, - "acc_stderr": 0.02502552138423529, - "acc_norm": 0.29292929292929293, - "acc_norm_stderr": 0.026452514969665924 - } - }, - "versions": { - "arc_ru_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-large", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_ru_challenge_gpt2-medium.json b/evals/arc-challenge/arc_ru_challenge_gpt2-medium.json deleted file mode 100644 index 8a7b6aee643ab931ddd7a2528c36075699604170..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_ru_challenge_gpt2-medium.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ru_challenge": { - "acc": 0.21548821548821548, - "acc_stderr": 0.023898224834697, - "acc_norm": 0.2558922558922559, - "acc_norm_stderr": 0.025363000375801963 - } - }, - "versions": { - "arc_ru_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-medium", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_ru_challenge_gpt2.json b/evals/arc-challenge/arc_ru_challenge_gpt2.json deleted file mode 100644 index 6c01167509035c09b2ab40ba64c6f23d0d3b61c6..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_ru_challenge_gpt2.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ru_challenge": { - "acc": 0.19865319865319866, - "acc_stderr": 0.023190610381322137, - "acc_norm": 0.26936026936026936, - "acc_norm_stderr": 0.025785321789052268 - } - }, - "versions": { - "arc_ru_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_ru_challenge_llama-7B.json b/evals/arc-challenge/arc_ru_challenge_llama-7B.json deleted file mode 100644 index c6af8bacc84e8232e587af0b1b62f0360595f5b8..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_ru_challenge_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ru_challenge": { - "acc": 0.2895622895622896, - "acc_stderr": 0.026362594432681956, - "acc_norm": 0.3333333333333333, - "acc_norm_stderr": 0.027399831217559577 - } - }, - "versions": { - "arc_ru_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_sk_challenge_bloom-1b7.json b/evals/arc-challenge/arc_sk_challenge_bloom-1b7.json deleted file mode 100644 index 5c061cbf7e912082f72face7e42633294acb46b4..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_sk_challenge_bloom-1b7.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_sk_challenge": { - "acc": 0.2516778523489933, - "acc_stderr": 0.02518190461061586, - "acc_norm": 0.2516778523489933, - "acc_norm_stderr": 0.025181904610615865 - } - }, - "versions": { - "arc_sk_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-1b7", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_sk_challenge_bloom-560.json b/evals/arc-challenge/arc_sk_challenge_bloom-560.json deleted file mode 100644 index 77221ca57be5ff0cc96e73fc774d0670d7c7208c..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_sk_challenge_bloom-560.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_sk_challenge": { - "acc": 0.24161073825503357, - "acc_stderr": 0.02483853510802848, - "acc_norm": 0.22483221476510068, - "acc_norm_stderr": 0.02422416982965075 - } - }, - "versions": { - "arc_sk_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-560m", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_sk_challenge_bloom-7b1.json b/evals/arc-challenge/arc_sk_challenge_bloom-7b1.json deleted file mode 100644 index 2d78271208e5af3f6496e645f8b79b3b7394aa34..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_sk_challenge_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_sk_challenge": { - "acc": 0.2348993288590604, - "acc_stderr": 0.024599255015999244, - "acc_norm": 0.25838926174496646, - "acc_norm_stderr": 0.025400777524610105 - } - }, - "versions": { - "arc_sk_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_sk_challenge_gpt2-large.json b/evals/arc-challenge/arc_sk_challenge_gpt2-large.json deleted file mode 100644 index 128f662c32c44780afb9fd950815540a151364d6..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_sk_challenge_gpt2-large.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_sk_challenge": { - "acc": 0.24161073825503357, - "acc_stderr": 0.02483853510802848, - "acc_norm": 0.2516778523489933, - "acc_norm_stderr": 0.025181904610615858 - } - }, - "versions": { - "arc_sk_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-large", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_sk_challenge_gpt2-medium.json b/evals/arc-challenge/arc_sk_challenge_gpt2-medium.json deleted file mode 100644 index 75bc31afba2a470fbe33869562f865ae458240c8..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_sk_challenge_gpt2-medium.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_sk_challenge": { - "acc": 0.23825503355704697, - "acc_stderr": 0.02471995149315962, - "acc_norm": 0.24496644295302014, - "acc_norm_stderr": 0.02495503598089895 - } - }, - "versions": { - "arc_sk_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-medium", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_sk_challenge_gpt2.json b/evals/arc-challenge/arc_sk_challenge_gpt2.json deleted file mode 100644 index 28459f8e1e1dc32e8d92343933fa438b717eb85b..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_sk_challenge_gpt2.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_sk_challenge": { - "acc": 0.2348993288590604, - "acc_stderr": 0.024599255015999244, - "acc_norm": 0.23154362416107382, - "acc_norm_stderr": 0.02447641442014662 - } - }, - "versions": { - "arc_sk_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_sk_challenge_llama-7B.json b/evals/arc-challenge/arc_sk_challenge_llama-7B.json deleted file mode 100644 index 3701c2f5034fd64259683639da7b904f8bf0d1d1..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_sk_challenge_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_sk_challenge": { - "acc": 0.2348993288590604, - "acc_stderr": 0.024599255015999244, - "acc_norm": 0.2550335570469799, - "acc_norm_stderr": 0.025292327380712683 - } - }, - "versions": { - "arc_sk_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_sr_challenge_bloom-1b7.json b/evals/arc-challenge/arc_sr_challenge_bloom-1b7.json deleted file mode 100644 index dbdcdb6f40e4a2a2d630ac6967d84266a19ee386..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_sr_challenge_bloom-1b7.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_sr_challenge": { - "acc": 0.23986486486486486, - "acc_stderr": 0.024860949670846393, - "acc_norm": 0.2635135135135135, - "acc_norm_stderr": 0.025649141242391035 - } - }, - "versions": { - "arc_sr_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-1b7", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_sr_challenge_bloom-560.json b/evals/arc-challenge/arc_sr_challenge_bloom-560.json deleted file mode 100644 index f4e4aafa24a952d05d4ff3efde104237233e2747..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_sr_challenge_bloom-560.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_sr_challenge": { - "acc": 0.22972972972972974, - "acc_stderr": 0.02449171295391697, - "acc_norm": 0.27702702702702703, - "acc_norm_stderr": 0.02605620088360472 - } - }, - "versions": { - "arc_sr_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-560m", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_sr_challenge_bloom-7b1.json b/evals/arc-challenge/arc_sr_challenge_bloom-7b1.json deleted file mode 100644 index e70cc59ff97ac76e9506b0a8c29249c91543af45..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_sr_challenge_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_sr_challenge": { - "acc": 0.26013513513513514, - "acc_stderr": 0.025542576393640232, - "acc_norm": 0.30067567567567566, - "acc_norm_stderr": 0.026697921821786215 - } - }, - "versions": { - "arc_sr_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_sr_challenge_gpt2-large.json b/evals/arc-challenge/arc_sr_challenge_gpt2-large.json deleted file mode 100644 index 381e33947c532c85c78a23c4986d737ed19bc7e1..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_sr_challenge_gpt2-large.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_sr_challenge": { - "acc": 0.1891891891891892, - "acc_stderr": 0.022803258753373676, - "acc_norm": 0.24324324324324326, - "acc_norm_stderr": 0.024979718407699757 - } - }, - "versions": { - "arc_sr_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-large", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_sr_challenge_gpt2-medium.json b/evals/arc-challenge/arc_sr_challenge_gpt2-medium.json deleted file mode 100644 index d59206fddbda1dfd8cd1e6514ca6cba7f09dd45b..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_sr_challenge_gpt2-medium.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_sr_challenge": { - "acc": 0.20608108108108109, - "acc_stderr": 0.023550282959294247, - "acc_norm": 0.24662162162162163, - "acc_norm_stderr": 0.02509638351759426 - } - }, - "versions": { - "arc_sr_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-medium", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_sr_challenge_gpt2.json b/evals/arc-challenge/arc_sr_challenge_gpt2.json deleted file mode 100644 index ed4d03dcbbbdb78f9e36972c6c09ea65f958accf..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_sr_challenge_gpt2.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_sr_challenge": { - "acc": 0.18243243243243243, - "acc_stderr": 0.0224854634796718, - "acc_norm": 0.22972972972972974, - "acc_norm_stderr": 0.024491712953916972 - } - }, - "versions": { - "arc_sr_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_sr_challenge_llama-7B.json b/evals/arc-challenge/arc_sr_challenge_llama-7B.json deleted file mode 100644 index 9a1c5c3f8986ce3acbf704e6d2fbd4d82fbcc724..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_sr_challenge_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_sr_challenge": { - "acc": 0.2905405405405405, - "acc_stderr": 0.026433590266607382, - "acc_norm": 0.2972972972972973, - "acc_norm_stderr": 0.02661155695908287 - } - }, - "versions": { - "arc_sr_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_sv_challenge_bloom-1b7.json b/evals/arc-challenge/arc_sv_challenge_bloom-1b7.json deleted file mode 100644 index 962c6f1d023be86a6fa7adf0d018a08eda14f1b8..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_sv_challenge_bloom-1b7.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_sv_challenge": { - "acc": 0.20202020202020202, - "acc_stderr": 0.023337132573282605, - "acc_norm": 0.23232323232323232, - "acc_norm_stderr": 0.02454650495612789 - } - }, - "versions": { - "arc_sv_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-1b7", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_sv_challenge_bloom-560.json b/evals/arc-challenge/arc_sv_challenge_bloom-560.json deleted file mode 100644 index 9477cbe0f42a6cdde99f9a0af2293c4b1c23cf00..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_sv_challenge_bloom-560.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_sv_challenge": { - "acc": 0.21212121212121213, - "acc_stderr": 0.02376161191876168, - "acc_norm": 0.2053872053872054, - "acc_norm_stderr": 0.023481109518599313 - } - }, - "versions": { - "arc_sv_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-560m", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_sv_challenge_bloom-7b1.json b/evals/arc-challenge/arc_sv_challenge_bloom-7b1.json deleted file mode 100644 index c89c1d01bfea674f9f7d9549f8abf2abe32192f8..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_sv_challenge_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_sv_challenge": { - "acc": 0.2255892255892256, - "acc_stderr": 0.024293999292957367, - "acc_norm": 0.265993265993266, - "acc_norm_stderr": 0.02568262955665285 - } - }, - "versions": { - "arc_sv_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_sv_challenge_gpt2-large.json b/evals/arc-challenge/arc_sv_challenge_gpt2-large.json deleted file mode 100644 index c090b83981933a41b620746123d08d4ba90f53a2..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_sv_challenge_gpt2-large.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_sv_challenge": { - "acc": 0.22895622895622897, - "acc_stderr": 0.02442136264227106, - "acc_norm": 0.23232323232323232, - "acc_norm_stderr": 0.02454650495612789 - } - }, - "versions": { - "arc_sv_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-large", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_sv_challenge_gpt2-medium.json b/evals/arc-challenge/arc_sv_challenge_gpt2-medium.json deleted file mode 100644 index 31f537c4fb8157ec63b8cbcb4d2001cfd08e1533..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_sv_challenge_gpt2-medium.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_sv_challenge": { - "acc": 0.2255892255892256, - "acc_stderr": 0.024293999292957367, - "acc_norm": 0.24242424242424243, - "acc_norm_stderr": 0.02490893747050876 - } - }, - "versions": { - "arc_sv_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-medium", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_sv_challenge_llama-7B.json b/evals/arc-challenge/arc_sv_challenge_llama-7B.json deleted file mode 100644 index c2c4e7550c402c4d3dbaf7d6ea56dbf864c439ce..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_sv_challenge_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_sv_challenge": { - "acc": 0.2962962962962963, - "acc_stderr": 0.026540687854980646, - "acc_norm": 0.30303030303030304, - "acc_norm_stderr": 0.02671185955331767 - } - }, - "versions": { - "arc_sv_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_ta_challenge_bloom-1b7.json b/evals/arc-challenge/arc_ta_challenge_bloom-1b7.json deleted file mode 100644 index a937aa6dd9066efa74a5b88515612f7dc4ba6691..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_ta_challenge_bloom-1b7.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ta_challenge": { - "acc": 0.21283783783783783, - "acc_stderr": 0.02383117831196738, - "acc_norm": 0.25675675675675674, - "acc_norm_stderr": 0.025434043955304575 - } - }, - "versions": { - "arc_ta_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-1b7", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_ta_challenge_bloom-560.json b/evals/arc-challenge/arc_ta_challenge_bloom-560.json deleted file mode 100644 index 6b1c389d448803dd7a2c483cec6aa7ff1876c4a6..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_ta_challenge_bloom-560.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ta_challenge": { - "acc": 0.19932432432432431, - "acc_stderr": 0.02325934388926828, - "acc_norm": 0.2533783783783784, - "acc_norm_stderr": 0.025323518629100025 - } - }, - "versions": { - "arc_ta_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-560m", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_ta_challenge_bloom-7b1.json b/evals/arc-challenge/arc_ta_challenge_bloom-7b1.json deleted file mode 100644 index a5da07219683283eaafbda47b1ed0957be400dda..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_ta_challenge_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ta_challenge": { - "acc": 0.23310810810810811, - "acc_stderr": 0.024616978985669728, - "acc_norm": 0.24324324324324326, - "acc_norm_stderr": 0.02497971840769973 - } - }, - "versions": { - "arc_ta_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_ta_challenge_gpt2-large.json b/evals/arc-challenge/arc_ta_challenge_gpt2-large.json deleted file mode 100644 index 918cb1c7f6be3a7693ecf8713714c664843cfc38..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_ta_challenge_gpt2-large.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ta_challenge": { - "acc": 0.21283783783783783, - "acc_stderr": 0.02383117831196738, - "acc_norm": 0.23310810810810811, - "acc_norm_stderr": 0.024616978985669724 - } - }, - "versions": { - "arc_ta_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-large", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_ta_challenge_gpt2-medium.json b/evals/arc-challenge/arc_ta_challenge_gpt2-medium.json deleted file mode 100644 index 6af3ab31fdcf16311ec8594bad8ee052c05b16bc..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_ta_challenge_gpt2-medium.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ta_challenge": { - "acc": 0.2195945945945946, - "acc_stderr": 0.02410238110604679, - "acc_norm": 0.2668918918918919, - "acc_norm_stderr": 0.025753762926257903 - } - }, - "versions": { - "arc_ta_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-medium", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_ta_challenge_gpt2.json b/evals/arc-challenge/arc_ta_challenge_gpt2.json deleted file mode 100644 index 5245a03aac201f65f42e53dcabf6d1f7c0717d52..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_ta_challenge_gpt2.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ta_challenge": { - "acc": 0.23986486486486486, - "acc_stderr": 0.024860949670846396, - "acc_norm": 0.26013513513513514, - "acc_norm_stderr": 0.025542576393640246 - } - }, - "versions": { - "arc_ta_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_ta_challenge_llama-7B.json b/evals/arc-challenge/arc_ta_challenge_llama-7B.json deleted file mode 100644 index 241feef032d750202d858fbc9162e3549a178160..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_ta_challenge_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ta_challenge": { - "acc": 0.20270270270270271, - "acc_stderr": 0.02340609199417405, - "acc_norm": 0.22297297297297297, - "acc_norm_stderr": 0.02423444993634422 - } - }, - "versions": { - "arc_ta_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_te_challenge_bloom-1b7.json b/evals/arc-challenge/arc_te_challenge_bloom-1b7.json deleted file mode 100644 index ce9a2c9841dcb9e494770a8c9199b82c8ab4c9f7..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_te_challenge_bloom-1b7.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_te_challenge": { - "acc": 0.21897810218978103, - "acc_stderr": 0.02502941075517834, - "acc_norm": 0.2591240875912409, - "acc_norm_stderr": 0.026518277256436896 - } - }, - "versions": { - "arc_te_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-1b7", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_te_challenge_bloom-560.json b/evals/arc-challenge/arc_te_challenge_bloom-560.json deleted file mode 100644 index 0d326f4a1b5d45a12a085af0588dc48da1242b19..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_te_challenge_bloom-560.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_te_challenge": { - "acc": 0.22627737226277372, - "acc_stderr": 0.02532397574413385, - "acc_norm": 0.24087591240875914, - "acc_norm_stderr": 0.025880445559939208 - } - }, - "versions": { - "arc_te_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-560m", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_te_challenge_bloom-7b1.json b/evals/arc-challenge/arc_te_challenge_bloom-7b1.json deleted file mode 100644 index 1c6d34bb9da6f86f1a4494caba49a2d1bab46bcf..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_te_challenge_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_te_challenge": { - "acc": 0.20072992700729927, - "acc_stderr": 0.024242171306158907, - "acc_norm": 0.25547445255474455, - "acc_norm_stderr": 0.026395641265678074 - } - }, - "versions": { - "arc_te_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_te_challenge_gpt2-large.json b/evals/arc-challenge/arc_te_challenge_gpt2-large.json deleted file mode 100644 index 226ed83458102ea0a3f4161159558d6ae8875357..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_te_challenge_gpt2-large.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_te_challenge": { - "acc": 0.22627737226277372, - "acc_stderr": 0.02532397574413385, - "acc_norm": 0.24087591240875914, - "acc_norm_stderr": 0.025880445559939208 - } - }, - "versions": { - "arc_te_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-large", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_te_challenge_gpt2-medium.json b/evals/arc-challenge/arc_te_challenge_gpt2-medium.json deleted file mode 100644 index a5bd92092ab22f31db2d36d69626c32b485ab331..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_te_challenge_gpt2-medium.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_te_challenge": { - "acc": 0.2116788321167883, - "acc_stderr": 0.02472344500978517, - "acc_norm": 0.22992700729927007, - "acc_norm_stderr": 0.025467107178386465 - } - }, - "versions": { - "arc_te_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-medium", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_te_challenge_gpt2.json b/evals/arc-challenge/arc_te_challenge_gpt2.json deleted file mode 100644 index c6b5f06c5f92b644a3c4ac037330810277460f0a..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_te_challenge_gpt2.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_te_challenge": { - "acc": 0.22627737226277372, - "acc_stderr": 0.02532397574413385, - "acc_norm": 0.24087591240875914, - "acc_norm_stderr": 0.025880445559939215 - } - }, - "versions": { - "arc_te_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_te_challenge_llama-7B.json b/evals/arc-challenge/arc_te_challenge_llama-7B.json deleted file mode 100644 index a20fb71e7ce5932ff220ab3a23466714b469cd51..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_te_challenge_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_te_challenge": { - "acc": 0.24087591240875914, - "acc_stderr": 0.025880445559939215, - "acc_norm": 0.26277372262773724, - "acc_norm_stderr": 0.026638517193281797 - } - }, - "versions": { - "arc_te_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_uk_challenge_bloom-1b7.json b/evals/arc-challenge/arc_uk_challenge_bloom-1b7.json deleted file mode 100644 index 72eee1e288b03359fecf649039ec7e1a796086ee..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_uk_challenge_bloom-1b7.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_uk_challenge": { - "acc": 0.24579124579124578, - "acc_stderr": 0.025025521384235305, - "acc_norm": 0.28619528619528617, - "acc_norm_stderr": 0.026270908298354635 - } - }, - "versions": { - "arc_uk_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-1b7", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_uk_challenge_bloom-560.json b/evals/arc-challenge/arc_uk_challenge_bloom-560.json deleted file mode 100644 index ef5e9d5a99c327e81413b16eb715a91e70b6c5b3..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_uk_challenge_bloom-560.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_uk_challenge": { - "acc": 0.265993265993266, - "acc_stderr": 0.02568262955665285, - "acc_norm": 0.2895622895622896, - "acc_norm_stderr": 0.026362594432681956 - } - }, - "versions": { - "arc_uk_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-560m", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_uk_challenge_bloom-7b1.json b/evals/arc-challenge/arc_uk_challenge_bloom-7b1.json deleted file mode 100644 index 3c2cc6b833fb7540bcca14af70e018d3eb236524..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_uk_challenge_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_uk_challenge": { - "acc": 0.2222222222222222, - "acc_stderr": 0.02416437978893547, - "acc_norm": 0.265993265993266, - "acc_norm_stderr": 0.02568262955665285 - } - }, - "versions": { - "arc_uk_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_uk_challenge_gpt2-large.json b/evals/arc-challenge/arc_uk_challenge_gpt2-large.json deleted file mode 100644 index c03f6ddf265c02f0fc83f91f5c16d2586666d682..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_uk_challenge_gpt2-large.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_uk_challenge": { - "acc": 0.23232323232323232, - "acc_stderr": 0.02454650495612789, - "acc_norm": 0.27946127946127947, - "acc_norm_stderr": 0.026082164400369843 - } - }, - "versions": { - "arc_uk_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-large", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_uk_challenge_gpt2-medium.json b/evals/arc-challenge/arc_uk_challenge_gpt2-medium.json deleted file mode 100644 index 51083b7158f2de8700c8c253b7e5e98eba1626a9..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_uk_challenge_gpt2-medium.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_uk_challenge": { - "acc": 0.2222222222222222, - "acc_stderr": 0.02416437978893546, - "acc_norm": 0.265993265993266, - "acc_norm_stderr": 0.02568262955665285 - } - }, - "versions": { - "arc_uk_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-medium", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_uk_challenge_gpt2.json b/evals/arc-challenge/arc_uk_challenge_gpt2.json deleted file mode 100644 index e32104934ab1fe23828d680bf766e04e93ea044a..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_uk_challenge_gpt2.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_uk_challenge": { - "acc": 0.21212121212121213, - "acc_stderr": 0.023761611918761662, - "acc_norm": 0.24242424242424243, - "acc_norm_stderr": 0.02490893747050876 - } - }, - "versions": { - "arc_uk_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_uk_challenge_llama-7B.json b/evals/arc-challenge/arc_uk_challenge_llama-7B.json deleted file mode 100644 index a02491cf171678a4ddc940caa47d4c778b0e3cf5..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_uk_challenge_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_uk_challenge": { - "acc": 0.30976430976430974, - "acc_stderr": 0.026876241779014095, - "acc_norm": 0.3367003367003367, - "acc_norm_stderr": 0.027468238412892212 - } - }, - "versions": { - "arc_uk_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_vi_challenge_bloom-1b7.json b/evals/arc-challenge/arc_vi_challenge_bloom-1b7.json deleted file mode 100644 index 508c46f8cd77b71773ecc8623d362eae91a1dc3f..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_vi_challenge_bloom-1b7.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_vi_challenge": { - "acc": 0.24496644295302014, - "acc_stderr": 0.024955035980898942, - "acc_norm": 0.28187919463087246, - "acc_norm_stderr": 0.026106703750007423 - } - }, - "versions": { - "arc_vi_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-1b7", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_vi_challenge_bloom-560.json b/evals/arc-challenge/arc_vi_challenge_bloom-560.json deleted file mode 100644 index 70d9cffdbf7b3adea2bbded15e8a36d7f930b24b..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_vi_challenge_bloom-560.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_vi_challenge": { - "acc": 0.2483221476510067, - "acc_stderr": 0.025069483148037874, - "acc_norm": 0.25838926174496646, - "acc_norm_stderr": 0.025400777524610105 - } - }, - "versions": { - "arc_vi_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-560m", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_vi_challenge_bloom-7b1.json b/evals/arc-challenge/arc_vi_challenge_bloom-7b1.json deleted file mode 100644 index f1588613ea4565257bfb7f46328c5e696a1434de..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_vi_challenge_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_vi_challenge": { - "acc": 0.3087248322147651, - "acc_stderr": 0.02680606307294056, - "acc_norm": 0.3288590604026846, - "acc_norm_stderr": 0.02726048303556786 - } - }, - "versions": { - "arc_vi_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_vi_challenge_gpt2-large.json b/evals/arc-challenge/arc_vi_challenge_gpt2-large.json deleted file mode 100644 index c071ea16496ed3627a0dc0840835a827894a8a61..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_vi_challenge_gpt2-large.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_vi_challenge": { - "acc": 0.18120805369127516, - "acc_stderr": 0.02235101779623446, - "acc_norm": 0.23825503355704697, - "acc_norm_stderr": 0.024719951493159628 - } - }, - "versions": { - "arc_vi_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-large", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_vi_challenge_gpt2-medium.json b/evals/arc-challenge/arc_vi_challenge_gpt2-medium.json deleted file mode 100644 index 0cb1f34c59a21cb916520b7e956a1bd193ba1395..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_vi_challenge_gpt2-medium.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_vi_challenge": { - "acc": 0.2080536912751678, - "acc_stderr": 0.023553603370264103, - "acc_norm": 0.23825503355704697, - "acc_norm_stderr": 0.024719951493159628 - } - }, - "versions": { - "arc_vi_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-medium", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_vi_challenge_gpt2.json b/evals/arc-challenge/arc_vi_challenge_gpt2.json deleted file mode 100644 index 6f912cfc57fb3d8efe3773d82b7a95532a6f69b0..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_vi_challenge_gpt2.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_vi_challenge": { - "acc": 0.2080536912751678, - "acc_stderr": 0.0235536033702641, - "acc_norm": 0.2080536912751678, - "acc_norm_stderr": 0.023553603370264124 - } - }, - "versions": { - "arc_vi_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_vi_challenge_llama-7B.json b/evals/arc-challenge/arc_vi_challenge_llama-7B.json deleted file mode 100644 index 8427c0ad1958ea7ad114255f020f43c5d50d076c..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_vi_challenge_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_vi_challenge": { - "acc": 0.1912751677852349, - "acc_stderr": 0.022821882255340997, - "acc_norm": 0.2516778523489933, - "acc_norm_stderr": 0.025181904610615855 - } - }, - "versions": { - "arc_vi_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_zh_challenge_bloom-1b7.json b/evals/arc-challenge/arc_zh_challenge_bloom-1b7.json deleted file mode 100644 index 4626e7c607b4dd4f9c82472abe983c30203c245c..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_zh_challenge_bloom-1b7.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_zh_challenge": { - "acc": 0.25252525252525254, - "acc_stderr": 0.025252525252525356, - "acc_norm": 0.25925925925925924, - "acc_norm_stderr": 0.025471492792791674 - } - }, - "versions": { - "arc_zh_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-1b7", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_zh_challenge_bloom-560.json b/evals/arc-challenge/arc_zh_challenge_bloom-560.json deleted file mode 100644 index 127c0ce8f0b322902ecae312152c6905394bf82e..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_zh_challenge_bloom-560.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_zh_challenge": { - "acc": 0.24242424242424243, - "acc_stderr": 0.024908937470508753, - "acc_norm": 0.26936026936026936, - "acc_norm_stderr": 0.025785321789052268 - } - }, - "versions": { - "arc_zh_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-560m", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_zh_challenge_bloom-7b1.json b/evals/arc-challenge/arc_zh_challenge_bloom-7b1.json deleted file mode 100644 index b488311a8cccbd9e611c8abe983c979453acd882..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_zh_challenge_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_zh_challenge": { - "acc": 0.3400673400673401, - "acc_stderr": 0.027535084762190663, - "acc_norm": 0.367003367003367, - "acc_norm_stderr": 0.028014951100692458 - } - }, - "versions": { - "arc_zh_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_zh_challenge_gpt2-large.json b/evals/arc-challenge/arc_zh_challenge_gpt2-large.json deleted file mode 100644 index b20ff9d4fb351205e7abdc821a99a7a9c62aa9c6..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_zh_challenge_gpt2-large.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_zh_challenge": { - "acc": 0.21548821548821548, - "acc_stderr": 0.023898224834697, - "acc_norm": 0.24915824915824916, - "acc_norm_stderr": 0.025140041284626418 - } - }, - "versions": { - "arc_zh_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-large", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_zh_challenge_gpt2-medium.json b/evals/arc-challenge/arc_zh_challenge_gpt2-medium.json deleted file mode 100644 index fe9d9b64694a7c0355b5de8e14577532c3e16db0..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_zh_challenge_gpt2-medium.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_zh_challenge": { - "acc": 0.21548821548821548, - "acc_stderr": 0.023898224834697005, - "acc_norm": 0.23232323232323232, - "acc_norm_stderr": 0.02454650495612789 - } - }, - "versions": { - "arc_zh_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2-medium", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_zh_challenge_gpt2.json b/evals/arc-challenge/arc_zh_challenge_gpt2.json deleted file mode 100644 index d8da342e3dfff17d37f9f34a3f90753cb4850243..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_zh_challenge_gpt2.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_zh_challenge": { - "acc": 0.20875420875420875, - "acc_stderr": 0.023622587756271476, - "acc_norm": 0.22895622895622897, - "acc_norm_stderr": 0.02442136264227106 - } - }, - "versions": { - "arc_zh_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc-challenge/arc_zh_challenge_llama-7B.json b/evals/arc-challenge/arc_zh_challenge_llama-7B.json deleted file mode 100644 index 51e82fa68d852ff2bafe284c29d895d2422b66e9..0000000000000000000000000000000000000000 --- a/evals/arc-challenge/arc_zh_challenge_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_zh_challenge": { - "acc": 0.2558922558922559, - "acc_stderr": 0.02536300037580196, - "acc_norm": 0.27946127946127947, - "acc_norm_stderr": 0.026082164400369843 - } - }, - "versions": { - "arc_zh_challenge": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_sr_mc_bloom-7b1.json b/evals/arc/arc_ar-bloom-7b1.json similarity index 54% rename from evals/truthfulqa-mc/truthfulqa_sr_mc_bloom-7b1.json rename to evals/arc/arc_ar-bloom-7b1.json index 7a9be337308c1b4de36187d0139341115ab5acc1..66c115459f73a74be6bd4b1b3933509010a82342 100644 --- a/evals/truthfulqa-mc/truthfulqa_sr_mc_bloom-7b1.json +++ b/evals/arc/arc_ar-bloom-7b1.json @@ -1,19 +1,19 @@ { "results": { - "truthfulqa_sr_mc": { - "mc1": 0.2875318066157761, - "mc1_stderr": 0.016154400981864346, - "mc2": 0.4611856949025646, - "mc2_stderr": 0.01648960635223338 + "arc_ar": { + "acc": 0.2634730538922156, + "acc_stderr": 0.012889646336321774, + "acc_norm": 0.31394354148845166, + "acc_norm_stderr": 0.013579515768185788 } }, "versions": { - "truthfulqa_sr_mc": 1 + "arc_ar": 0 }, "config": { "model": "hf-auto", "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": "1", + "batch_size": 1, "device": "cuda", "no_cache": false, "limit": null, diff --git a/evals/truthfulqa-mc/truthfulqa_ne_mc_llama-7B.json b/evals/arc/arc_ar-llama-7B.json similarity index 56% rename from evals/truthfulqa-mc/truthfulqa_ne_mc_llama-7B.json rename to evals/arc/arc_ar-llama-7B.json index 547c3b78ee0caef9b096972901f0b3d40c939029..31293a19637055f69dbf3fb11cadfd2fde391402 100644 --- a/evals/truthfulqa-mc/truthfulqa_ne_mc_llama-7B.json +++ b/evals/arc/arc_ar-llama-7B.json @@ -1,19 +1,19 @@ { "results": { - "truthfulqa_ne_mc": { - "mc1": 0.2906091370558376, - "mc1_stderr": 0.016184901529011933, - "mc2": 0.466774725144191, - "mc2_stderr": 0.01677791483100084 + "arc_ar": { + "acc": 0.19760479041916168, + "acc_stderr": 0.011651221980953499, + "acc_norm": 0.24636441402908468, + "acc_norm_stderr": 0.012608059960468694 } }, "versions": { - "truthfulqa_ne_mc": 1 + "arc_ar": 0 }, "config": { "model": "hf-auto", "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", + "batch_size": 1, "device": "cuda", "no_cache": false, "limit": null, diff --git a/evals/truthfulqa-mc/truthfulqa_sk_mc_bloom-7b1.json b/evals/arc/arc_bn-bloom-7b1.json similarity index 54% rename from evals/truthfulqa-mc/truthfulqa_sk_mc_bloom-7b1.json rename to evals/arc/arc_bn-bloom-7b1.json index 9bb50aa50589d9959d2accbb09d2d099246f74e5..b7b877a4a649f59197b24de7b3ec917785979683 100644 --- a/evals/truthfulqa-mc/truthfulqa_sk_mc_bloom-7b1.json +++ b/evals/arc/arc_bn-bloom-7b1.json @@ -1,19 +1,19 @@ { "results": { - "truthfulqa_sk_mc": { - "mc1": 0.23846153846153847, - "mc1_stderr": 0.015268148070057835, - "mc2": 0.4379856829317774, - "mc2_stderr": 0.016560323561497736 + "arc_bn": { + "acc": 0.22412318220701455, + "acc_stderr": 0.012201644195165715, + "acc_norm": 0.2617621899059025, + "acc_norm_stderr": 0.012862641889254466 } }, "versions": { - "truthfulqa_sk_mc": 1 + "arc_bn": 0 }, "config": { "model": "hf-auto", "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": "1", + "batch_size": 1, "device": "cuda", "no_cache": false, "limit": null, diff --git a/evals/truthfulqa-mc/truthfulqa_ar_mc_llama-7B.json b/evals/arc/arc_bn-llama-7B.json similarity index 56% rename from evals/truthfulqa-mc/truthfulqa_ar_mc_llama-7B.json rename to evals/arc/arc_bn-llama-7B.json index 5f817545d204b5083023e5456ee8029ce2191005..1dafcad0f0dbcae9d42395e2697e1ddc5c1ba0c2 100644 --- a/evals/truthfulqa-mc/truthfulqa_ar_mc_llama-7B.json +++ b/evals/arc/arc_bn-llama-7B.json @@ -1,19 +1,19 @@ { "results": { - "truthfulqa_ar_mc": { - "mc1": 0.2777777777777778, - "mc1_stderr": 0.016109958670672858, - "mc2": 0.4504998624708924, - "mc2_stderr": 0.01620052408197046 + "arc_bn": { + "acc": 0.1899059024807528, + "acc_stderr": 0.011476660752315397, + "acc_norm": 0.2583404619332763, + "acc_norm_stderr": 0.012807875214816267 } }, "versions": { - "truthfulqa_ar_mc": 1 + "arc_bn": 0 }, "config": { "model": "hf-auto", "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", + "batch_size": 1, "device": "cuda", "no_cache": false, "limit": null, diff --git a/evals/arc/arc_ca-bloom-7b1.json b/evals/arc/arc_ca-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..f0a15e06750a49e5570198c619957cce3e35cf0c --- /dev/null +++ b/evals/arc/arc_ca-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_ca": { + "acc": 0.31989708404802747, + "acc_stderr": 0.01366562491926326, + "acc_norm": 0.34734133790737565, + "acc_norm_stderr": 0.013949489903701517 + } + }, + "versions": { + "arc_ca": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_ca_mc_llama-7B.json b/evals/arc/arc_ca-llama-7B.json similarity index 56% rename from evals/truthfulqa-mc/truthfulqa_ca_mc_llama-7B.json rename to evals/arc/arc_ca-llama-7B.json index dd6e11c0a02074e790f1099cbbeb59e13a69f2e1..f0e3b53912555842b913d4cc78b61de1b70a2380 100644 --- a/evals/truthfulqa-mc/truthfulqa_ca_mc_llama-7B.json +++ b/evals/arc/arc_ca-llama-7B.json @@ -1,19 +1,19 @@ { "results": { - "truthfulqa_ca_mc": { - "mc1": 0.2336328626444159, - "mc1_stderr": 0.015170350095728855, - "mc2": 0.388488309525287, - "mc2_stderr": 0.015026705835089502 + "arc_ca": { + "acc": 0.3276157804459691, + "acc_stderr": 0.01375080741597368, + "acc_norm": 0.3507718696397942, + "acc_norm_stderr": 0.013981316936172217 } }, "versions": { - "truthfulqa_ca_mc": 1 + "arc_ca": 0 }, "config": { "model": "hf-auto", "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", + "batch_size": 1, "device": "cuda", "no_cache": false, "limit": null, diff --git a/evals/arc/arc_da-bloom-7b1.json b/evals/arc/arc_da-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..1f4e588f7cac0716c4285f186e6d2aa122ee795d --- /dev/null +++ b/evals/arc/arc_da-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_da": { + "acc": 0.20137103684661525, + "acc_stderr": 0.011744154502532795, + "acc_norm": 0.24592973436161097, + "acc_norm_stderr": 0.012611366681285752 + } + }, + "versions": { + "arc_da": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_da-llama-7B.json b/evals/arc/arc_da-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..814a2fb017691ccd12afbf034c490e10a646843e --- /dev/null +++ b/evals/arc/arc_da-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_da": { + "acc": 0.286203941730934, + "acc_stderr": 0.013236574332463879, + "acc_norm": 0.3273350471293916, + "acc_norm_stderr": 0.013741887176251822 + } + }, + "versions": { + "arc_da": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_de-bloom-7b1.json b/evals/arc/arc_de-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..205cbe1e5a60177701994fa2eca97338da50bd02 --- /dev/null +++ b/evals/arc/arc_de-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_de": { + "acc": 0.22241231822070145, + "acc_stderr": 0.012168377742629776, + "acc_norm": 0.262617621899059, + "acc_norm_stderr": 0.01287617552045283 + } + }, + "versions": { + "arc_de": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_de-llama-7B.json b/evals/arc/arc_de-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..f13cfc00bfd0ac6e8b6e48a5c0bc3b99c3140b69 --- /dev/null +++ b/evals/arc/arc_de-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_de": { + "acc": 0.2951240376390077, + "acc_stderr": 0.013345572865502645, + "acc_norm": 0.35072711719418304, + "acc_norm_stderr": 0.013962940383743043 + } + }, + "versions": { + "arc_de": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_es-bloom-7b1.json b/evals/arc/arc_es-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..39a5c5211ff20ef49014baa232a8ea2a9d8884be --- /dev/null +++ b/evals/arc/arc_es-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_es": { + "acc": 0.3316239316239316, + "acc_stderr": 0.013769752111910177, + "acc_norm": 0.3811965811965812, + "acc_norm_stderr": 0.01420507709573084 + } + }, + "versions": { + "arc_es": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_es-llama-7B.json b/evals/arc/arc_es-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..11544ff8942a30c3fb128aa473ea30d88443b0e6 --- /dev/null +++ b/evals/arc/arc_es-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_es": { + "acc": 0.3606837606837607, + "acc_stderr": 0.014044746572948867, + "acc_norm": 0.3683760683760684, + "acc_norm_stderr": 0.014108074259155369 + } + }, + "versions": { + "arc_es": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_eu-bloom-7b1.json b/evals/arc/arc_eu-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..156fd60ab449125d255226262654e5337e4cb697 --- /dev/null +++ b/evals/arc/arc_eu-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_eu": { + "acc": 0.22056239015817222, + "acc_stderr": 0.01229634886589257, + "acc_norm": 0.2521968365553603, + "acc_norm_stderr": 0.012879032347922939 + } + }, + "versions": { + "arc_eu": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_eu-llama-7B.json b/evals/arc/arc_eu-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..10a039f055cb172c7978f840a54bec6cc724948c --- /dev/null +++ b/evals/arc/arc_eu-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_eu": { + "acc": 0.20738137082601055, + "acc_stderr": 0.012023662461166562, + "acc_norm": 0.2451669595782074, + "acc_norm_stderr": 0.012757811738008544 + } + }, + "versions": { + "arc_eu": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc-challenge/arc_sv_challenge_gpt2.json b/evals/arc/arc_fr-bloom-7b1.json similarity index 50% rename from evals/arc-challenge/arc_sv_challenge_gpt2.json rename to evals/arc/arc_fr-bloom-7b1.json index 718a97a6d9df935c9f0818257fda43ef3bfc7996..78cbf1e3cfc337f169be33735f919ab397b8d085 100644 --- a/evals/arc-challenge/arc_sv_challenge_gpt2.json +++ b/evals/arc/arc_fr-bloom-7b1.json @@ -1,19 +1,19 @@ { "results": { - "arc_sv_challenge": { - "acc": 0.2255892255892256, - "acc_stderr": 0.024293999292957367, - "acc_norm": 0.2356902356902357, - "acc_norm_stderr": 0.024669460034907637 + "arc_fr": { + "acc": 0.32677502138579984, + "acc_stderr": 0.01372407602199982, + "acc_norm": 0.3669803250641574, + "acc_norm_stderr": 0.014102904772197396 } }, "versions": { - "arc_sv_challenge": 0 + "arc_fr": 0 }, "config": { "model": "hf-auto", - "model_args": "pretrained=gpt2", - "batch_size": "1", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, "device": "cuda", "no_cache": false, "limit": null, diff --git a/evals/arc/arc_fr-llama-7B.json b/evals/arc/arc_fr-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..c79866a45e043e6b6e5e139f5ac63dfb8b522f27 --- /dev/null +++ b/evals/arc/arc_fr-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_fr": { + "acc": 0.3473053892215569, + "acc_stderr": 0.013931226499492353, + "acc_norm": 0.3729683490162532, + "acc_norm_stderr": 0.014150093168782438 + } + }, + "versions": { + "arc_fr": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_gu-bloom-7b1.json b/evals/arc/arc_gu-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..c78878020cb8341b5adb388627ffa309dde3ad3a --- /dev/null +++ b/evals/arc/arc_gu-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_gu": { + "acc": 0.2206896551724138, + "acc_stderr": 0.012181604374453973, + "acc_norm": 0.2336206896551724, + "acc_norm_stderr": 0.012428989430945793 + } + }, + "versions": { + "arc_gu": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_gu-llama-7B.json b/evals/arc/arc_gu-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..afadd880b353d2482c13ab85d24811ac5ea5fd57 --- /dev/null +++ b/evals/arc/arc_gu-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_gu": { + "acc": 0.2120689655172414, + "acc_stderr": 0.012007177871292825, + "acc_norm": 0.23189655172413792, + "acc_norm_stderr": 0.012396962423413033 + } + }, + "versions": { + "arc_gu": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_hi-bloom-7b1.json b/evals/arc/arc_hi-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..70136df6c1f9731ab888c323fa0128c0beb43524 --- /dev/null +++ b/evals/arc/arc_hi-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_hi": { + "acc": 0.2363013698630137, + "acc_stderr": 0.012435369590403731, + "acc_norm": 0.2919520547945205, + "acc_norm_stderr": 0.013309191484613488 + } + }, + "versions": { + "arc_hi": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_hi-llama-7B.json b/evals/arc/arc_hi-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..ddcd58ade570221ad656710d0944a241789b1d8b --- /dev/null +++ b/evals/arc/arc_hi-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_hi": { + "acc": 0.21232876712328766, + "acc_stderr": 0.011971304657273123, + "acc_norm": 0.25, + "acc_norm_stderr": 0.012675503164084846 + } + }, + "versions": { + "arc_hi": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_hr-bloom-7b1.json b/evals/arc/arc_hr-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..80efc06ef94471b0b04935089a967e72d9e2095e --- /dev/null +++ b/evals/arc/arc_hr-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_hr": { + "acc": 0.19332763045337895, + "acc_stderr": 0.011555111310342437, + "acc_norm": 0.2369546621043627, + "acc_norm_stderr": 0.012441890624187792 + } + }, + "versions": { + "arc_hr": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_hr-llama-7B.json b/evals/arc/arc_hr-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..9c50fa3252a0133486190ed9d5cbc497e1a17fe9 --- /dev/null +++ b/evals/arc/arc_hr-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_hr": { + "acc": 0.2754491017964072, + "acc_stderr": 0.01307174925264165, + "acc_norm": 0.330196749358426, + "acc_norm_stderr": 0.013760638974726852 + } + }, + "versions": { + "arc_hr": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_hu-bloom-7b1.json b/evals/arc/arc_hu-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..3c7e8773a07af63cf8522b314bbd0611c37c7b98 --- /dev/null +++ b/evals/arc/arc_hu-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_hu": { + "acc": 0.1969178082191781, + "acc_stderr": 0.011640913614197496, + "acc_norm": 0.2585616438356164, + "acc_norm_stderr": 0.0128169339627777 + } + }, + "versions": { + "arc_hu": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_hu-llama-7B.json b/evals/arc/arc_hu-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..ac3191180768a88cd6c937d51bf005adb11c7ccf --- /dev/null +++ b/evals/arc/arc_hu-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_hu": { + "acc": 0.2517123287671233, + "acc_stderr": 0.012704310825494622, + "acc_norm": 0.2979452054794521, + "acc_norm_stderr": 0.013388079339102703 + } + }, + "versions": { + "arc_hu": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_hy-bloom-7b1.json b/evals/arc/arc_hy-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..d138545e18f6bb49f13d11bd9cd3b515db23815b --- /dev/null +++ b/evals/arc/arc_hy-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_hy": { + "acc": 0.21181818181818182, + "acc_stderr": 0.01232525683396216, + "acc_norm": 0.26181818181818184, + "acc_norm_stderr": 0.013261197012809796 + } + }, + "versions": { + "arc_hy": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_hy-llama-7B.json b/evals/arc/arc_hy-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..35e46c981f8bc3bf9374fdf6ad4b483f4c65762b --- /dev/null +++ b/evals/arc/arc_hy-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_hy": { + "acc": 0.19454545454545455, + "acc_stderr": 0.011940766785664334, + "acc_norm": 0.2718181818181818, + "acc_norm_stderr": 0.013420241182110736 + } + }, + "versions": { + "arc_hy": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_id-bloom-7b1.json b/evals/arc/arc_id-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..a2cc8cf230eda88935959ff54b9ded1986940b84 --- /dev/null +++ b/evals/arc/arc_id-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_id": { + "acc": 0.3128205128205128, + "acc_stderr": 0.013560492090917607, + "acc_norm": 0.3598290598290598, + "acc_norm_stderr": 0.014037469945597791 + } + }, + "versions": { + "arc_id": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_id-llama-7B.json b/evals/arc/arc_id-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..59fcc7ff10a29c0f82833ce5df7a260a8d4bbd42 --- /dev/null +++ b/evals/arc/arc_id-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_id": { + "acc": 0.19316239316239317, + "acc_stderr": 0.011546413314069014, + "acc_norm": 0.26666666666666666, + "acc_norm_stderr": 0.012933850109759573 + } + }, + "versions": { + "arc_id": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_it-bloom-7b1.json b/evals/arc/arc_it-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..7eda117416da15b68b1713aa6ef9ff77e69fd826 --- /dev/null +++ b/evals/arc/arc_it-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_it": { + "acc": 0.24037639007698888, + "acc_stderr": 0.01250327289928353, + "acc_norm": 0.28999144568006846, + "acc_norm_stderr": 0.01327709194338097 + } + }, + "versions": { + "arc_it": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_it-llama-7B.json b/evals/arc/arc_it-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..76b8875276c1b0078d3d087c16397df3b3ea9200 --- /dev/null +++ b/evals/arc/arc_it-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_it": { + "acc": 0.31736526946107785, + "acc_stderr": 0.013619227292898307, + "acc_norm": 0.3575705731394354, + "acc_norm_stderr": 0.014024008839912006 + } + }, + "versions": { + "arc_it": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_kn-bloom-7b1.json b/evals/arc/arc_kn-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..e92b7d0d555bc117110f34dbbc68d327f5092f5f --- /dev/null +++ b/evals/arc/arc_kn-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_kn": { + "acc": 0.2221254355400697, + "acc_stderr": 0.012273607270054452, + "acc_norm": 0.24738675958188153, + "acc_norm_stderr": 0.012740675198098838 + } + }, + "versions": { + "arc_kn": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_kn-llama-7B.json b/evals/arc/arc_kn-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..39ae5661b6403f677d4427689194c417f1f2f8b5 --- /dev/null +++ b/evals/arc/arc_kn-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_kn": { + "acc": 0.20470383275261325, + "acc_stderr": 0.011913674295957856, + "acc_norm": 0.24738675958188153, + "acc_norm_stderr": 0.012740675198098834 + } + }, + "versions": { + "arc_kn": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_ml-bloom-7b1.json b/evals/arc/arc_ml-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..f7c83104b2f7701b8a7af344179886c58a0e89a0 --- /dev/null +++ b/evals/arc/arc_ml-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_ml": { + "acc": 0.2075306479859895, + "acc_stderr": 0.01200575665793095, + "acc_norm": 0.2635726795096322, + "acc_norm_stderr": 0.013042844591075362 + } + }, + "versions": { + "arc_ml": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_ml-llama-7B.json b/evals/arc/arc_ml-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..fc465c13860754471e99430d5e6c5e1df5046b2e --- /dev/null +++ b/evals/arc/arc_ml-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_ml": { + "acc": 0.21628721541155868, + "acc_stderr": 0.012188522634632977, + "acc_norm": 0.27845884413309985, + "acc_norm_stderr": 0.013269918016014967 + } + }, + "versions": { + "arc_ml": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc-challenge/arc_it_challenge_gpt2-medium.json b/evals/arc/arc_mr-bloom-7b1.json similarity index 51% rename from evals/arc-challenge/arc_it_challenge_gpt2-medium.json rename to evals/arc/arc_mr-bloom-7b1.json index 2663af9d466539843f48e70d58dd9a236db69c79..cb854d6690652622f9f24d8c241c70b1cab749f9 100644 --- a/evals/arc-challenge/arc_it_challenge_gpt2-medium.json +++ b/evals/arc/arc_mr-bloom-7b1.json @@ -1,19 +1,19 @@ { "results": { - "arc_it_challenge": { - "acc": 0.2255892255892256, - "acc_stderr": 0.02429399929295737, + "arc_mr": { + "acc": 0.23376623376623376, + "acc_stderr": 0.012458582396003653, "acc_norm": 0.2727272727272727, - "acc_norm_stderr": 0.025886127156886297 + "acc_norm_stderr": 0.013110221561502926 } }, "versions": { - "arc_it_challenge": 0 + "arc_mr": 0 }, "config": { "model": "hf-auto", - "model_args": "pretrained=gpt2-medium", - "batch_size": "1", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, "device": "cuda", "no_cache": false, "limit": null, diff --git a/evals/arc/arc_mr-llama-7B.json b/evals/arc/arc_mr-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..0755f8ce24bf655025ef6eb6414570573beb9858 --- /dev/null +++ b/evals/arc/arc_mr-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_mr": { + "acc": 0.2051948051948052, + "acc_stderr": 0.011888050053276677, + "acc_norm": 0.2545454545454545, + "acc_norm_stderr": 0.012823020964319998 + } + }, + "versions": { + "arc_mr": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_ne-bloom-7b1.json b/evals/arc/arc_ne-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..8642b825a874e720a4bb8c0f92ff6fc304357c9f --- /dev/null +++ b/evals/arc/arc_ne-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_ne": { + "acc": 0.21300256629597947, + "acc_stderr": 0.01198002307808546, + "acc_norm": 0.223267750213858, + "acc_norm_stderr": 0.012185048029719049 + } + }, + "versions": { + "arc_ne": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_ne-llama-7B.json b/evals/arc/arc_ne-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..e20341882d82d53d339ccb9e726250d842765069 --- /dev/null +++ b/evals/arc/arc_ne-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_ne": { + "acc": 0.2172797262617622, + "acc_stderr": 0.012066782166932105, + "acc_norm": 0.24294268605645852, + "acc_norm_stderr": 0.012548588352773893 + } + }, + "versions": { + "arc_ne": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_nl-bloom-7b1.json b/evals/arc/arc_nl-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..70e6704be7426e916dc20cd9645eb2e99bb6b03a --- /dev/null +++ b/evals/arc/arc_nl-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_nl": { + "acc": 0.1881950384944397, + "acc_stderr": 0.011436905010368727, + "acc_norm": 0.2309666381522669, + "acc_norm_stderr": 0.012331780770152612 + } + }, + "versions": { + "arc_nl": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc-challenge/arc_ne_challenge_llama-7B.json b/evals/arc/arc_nl-llama-7B.json similarity index 61% rename from evals/arc-challenge/arc_ne_challenge_llama-7B.json rename to evals/arc/arc_nl-llama-7B.json index a22c844ed32434eb2d404f76e104c502e7218625..6258adbcef55b321a04101c65d1c369f53c6cc09 100644 --- a/evals/arc-challenge/arc_ne_challenge_llama-7B.json +++ b/evals/arc/arc_nl-llama-7B.json @@ -1,19 +1,19 @@ { "results": { - "arc_ne_challenge": { - "acc": 0.2255892255892256, - "acc_stderr": 0.024293999292957367, - "acc_norm": 0.265993265993266, - "acc_norm_stderr": 0.025682629556652858 + "arc_nl": { + "acc": 0.32677502138579984, + "acc_stderr": 0.013724076021999824, + "acc_norm": 0.3361847733105218, + "acc_norm_stderr": 0.013822646555385164 } }, "versions": { - "arc_ne_challenge": 0 + "arc_nl": 0 }, "config": { "model": "hf-auto", "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", + "batch_size": 1, "device": "cuda", "no_cache": false, "limit": null, diff --git a/evals/arc/arc_pt-bloom-7b1.json b/evals/arc/arc_pt-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..880d8570463408853523eec06407b3c8ed9e5b11 --- /dev/null +++ b/evals/arc/arc_pt-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_pt": { + "acc": 0.3401709401709402, + "acc_stderr": 0.013856612397310694, + "acc_norm": 0.4, + "acc_norm_stderr": 0.014328422047021531 + } + }, + "versions": { + "arc_pt": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_pt-llama-7B.json b/evals/arc/arc_pt-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..0a856face8fef0cab72d3cda7305f6949d011ce3 --- /dev/null +++ b/evals/arc/arc_pt-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_pt": { + "acc": 0.3367521367521368, + "acc_stderr": 0.01382247630777062, + "acc_norm": 0.37777777777777777, + "acc_norm_stderr": 0.014180244103534094 + } + }, + "versions": { + "arc_pt": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_ro-bloom-7b1.json b/evals/arc/arc_ro-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..083766c1f50d79393939908a8f8837dcc7cb697d --- /dev/null +++ b/evals/arc/arc_ro-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_ro": { + "acc": 0.2099400171379606, + "acc_stderr": 0.011926921791273557, + "acc_norm": 0.26906598114824337, + "acc_norm_stderr": 0.012987310039914976 + } + }, + "versions": { + "arc_ro": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_ro-llama-7B.json b/evals/arc/arc_ro-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..eab2e4a70b967696417355b0d11bd69cabf3ddc5 --- /dev/null +++ b/evals/arc/arc_ro-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_ro": { + "acc": 0.30077120822622105, + "acc_stderr": 0.013430077114209907, + "acc_norm": 0.32390745501285345, + "acc_norm_stderr": 0.013704533924425027 + } + }, + "versions": { + "arc_ro": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_ru-bloom-7b1.json b/evals/arc/arc_ru-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..1ff9ed6089fca2642658a8e6f9f74471739e87e6 --- /dev/null +++ b/evals/arc/arc_ru-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_ru": { + "acc": 0.21043627031650983, + "acc_stderr": 0.01192703439080346, + "acc_norm": 0.2754491017964072, + "acc_norm_stderr": 0.01307174925264165 + } + }, + "versions": { + "arc_ru": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_ru-llama-7B.json b/evals/arc/arc_ru-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..f62854eef188594fdc60a93341410fac7a49fa14 --- /dev/null +++ b/evals/arc/arc_ru-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_ru": { + "acc": 0.2934131736526946, + "acc_stderr": 0.013322973103306575, + "acc_norm": 0.32078699743370404, + "acc_norm_stderr": 0.013658089444975752 + } + }, + "versions": { + "arc_ru": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_sk-bloom-7b1.json b/evals/arc/arc_sk-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..4404e57e2290a69cce8029b89f0939593bbe7d8e --- /dev/null +++ b/evals/arc/arc_sk-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_sk": { + "acc": 0.20359281437125748, + "acc_stderr": 0.011782227020010716, + "acc_norm": 0.24893071000855432, + "acc_norm_stderr": 0.012651960282598879 + } + }, + "versions": { + "arc_sk": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_sk-llama-7B.json b/evals/arc/arc_sk-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..b018df9a5453495bb3ff51f8908c88c064d888a4 --- /dev/null +++ b/evals/arc/arc_sk-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_sk": { + "acc": 0.23609923011120615, + "acc_stderr": 0.012426371635795894, + "acc_norm": 0.28999144568006846, + "acc_norm_stderr": 0.013277091943380979 + } + }, + "versions": { + "arc_sk": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_sr-bloom-7b1.json b/evals/arc/arc_sr-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..ca68a7fae3c2920f66e9f6948396528ea7efe421 --- /dev/null +++ b/evals/arc/arc_sr-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_sr": { + "acc": 0.2172797262617622, + "acc_stderr": 0.012066782166932079, + "acc_norm": 0.25149700598802394, + "acc_norm_stderr": 0.01269526466186626 + } + }, + "versions": { + "arc_sr": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_sr-llama-7B.json b/evals/arc/arc_sr-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..dbe0e415ecd651a7afbe25423df0f79ddbf30b59 --- /dev/null +++ b/evals/arc/arc_sr-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_sr": { + "acc": 0.25748502994011974, + "acc_stderr": 0.012794024494042348, + "acc_norm": 0.30795551753635586, + "acc_norm_stderr": 0.013507954174822524 + } + }, + "versions": { + "arc_sr": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_sv-bloom-7b1.json b/evals/arc/arc_sv-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..e602b4d12926dbb93b567be032a836cb50b2ff51 --- /dev/null +++ b/evals/arc/arc_sv-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_sv": { + "acc": 0.20515021459227467, + "acc_stderr": 0.011835920197074948, + "acc_norm": 0.2515021459227468, + "acc_norm_stderr": 0.012717145410329311 + } + }, + "versions": { + "arc_sv": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_sv-llama-7B.json b/evals/arc/arc_sv-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..3cacd9bbf330b2d6be85b2903f5d124c0045cc94 --- /dev/null +++ b/evals/arc/arc_sv-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_sv": { + "acc": 0.303862660944206, + "acc_stderr": 0.013480613043590443, + "acc_norm": 0.34935622317596565, + "acc_norm_stderr": 0.013974278424227307 + } + }, + "versions": { + "arc_sv": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_ta-bloom-7b1.json b/evals/arc/arc_ta-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..68a6f4875888d86505752626ba4a52fd12cc3c84 --- /dev/null +++ b/evals/arc/arc_ta-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_ta": { + "acc": 0.22942206654991243, + "acc_stderr": 0.01244752638770244, + "acc_norm": 0.24168126094570927, + "acc_norm_stderr": 0.012673733216040754 + } + }, + "versions": { + "arc_ta": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_ta-llama-7B.json b/evals/arc/arc_ta-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..d7c697739212d1bec5e84f1a4e6f0017d500ecc7 --- /dev/null +++ b/evals/arc/arc_ta-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_ta": { + "acc": 0.2075306479859895, + "acc_stderr": 0.012005756657930957, + "acc_norm": 0.27495621716287216, + "acc_norm_stderr": 0.013218161880960047 + } + }, + "versions": { + "arc_ta": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_te-bloom-7b1.json b/evals/arc/arc_te-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..1be31afe5307f0b3c626e305437b1932d4457b68 --- /dev/null +++ b/evals/arc/arc_te-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_te": { + "acc": 0.20175438596491227, + "acc_stderr": 0.01189098690363561, + "acc_norm": 0.24298245614035088, + "acc_norm_stderr": 0.01270803987901337 + } + }, + "versions": { + "arc_te": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_te-llama-7B.json b/evals/arc/arc_te-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..f84a1b907c92965f5829cbd68e89759d2d1ef9d7 --- /dev/null +++ b/evals/arc/arc_te-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_te": { + "acc": 0.2026315789473684, + "acc_stderr": 0.011910259341316062, + "acc_norm": 0.2517543859649123, + "acc_norm_stderr": 0.012860230436368953 + } + }, + "versions": { + "arc_te": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_uk-bloom-7b1.json b/evals/arc/arc_uk-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..05233ff08727d5cac7dd74429dbc024eb5fd5f4f --- /dev/null +++ b/evals/arc/arc_uk-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_uk": { + "acc": 0.1958939264328486, + "acc_stderr": 0.011613035012800898, + "acc_norm": 0.2275449101796407, + "acc_norm_stderr": 0.012267293637033645 + } + }, + "versions": { + "arc_uk": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_uk-llama-7B.json b/evals/arc/arc_uk-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..717afd73b3550c42e809f9bdb7fac834e805b5ee --- /dev/null +++ b/evals/arc/arc_uk-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_uk": { + "acc": 0.28999144568006846, + "acc_stderr": 0.013277091943380968, + "acc_norm": 0.32934131736526945, + "acc_norm_stderr": 0.013751575689336035 + } + }, + "versions": { + "arc_uk": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_vi-bloom-7b1.json b/evals/arc/arc_vi-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..4bc8e4783cc71214d4ba57feef30a0bfee5774c2 --- /dev/null +++ b/evals/arc/arc_vi-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_vi": { + "acc": 0.28974358974358977, + "acc_stderr": 0.013268054405378885, + "acc_norm": 0.3367521367521368, + "acc_norm_stderr": 0.01382247630777062 + } + }, + "versions": { + "arc_vi": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_vi-llama-7B.json b/evals/arc/arc_vi-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..7c14775b05df6587593cb1cbb921ee6ac86a8370 --- /dev/null +++ b/evals/arc/arc_vi-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_vi": { + "acc": 0.20256410256410257, + "acc_stderr": 0.011754979539893694, + "acc_norm": 0.23675213675213674, + "acc_norm_stderr": 0.01243290160581911 + } + }, + "versions": { + "arc_vi": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_zh-bloom-7b1.json b/evals/arc/arc_zh-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..c4deb085367a11032bec8e265cc4cb91fe75a0f5 --- /dev/null +++ b/evals/arc/arc_zh-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_zh": { + "acc": 0.3076923076923077, + "acc_stderr": 0.013498970320941413, + "acc_norm": 0.37264957264957266, + "acc_norm_stderr": 0.014141587247061969 + } + }, + "versions": { + "arc_zh": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_zh-llama-7B.json b/evals/arc/arc_zh-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..9cca2a2335f34f3b9eb36c125304f260fc3f8cd9 --- /dev/null +++ b/evals/arc/arc_zh-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_zh": { + "acc": 0.2564102564102564, + "acc_stderr": 0.012771065618749024, + "acc_norm": 0.2982905982905983, + "acc_norm_stderr": 0.013381080232166387 + } + }, + "versions": { + "arc_zh": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/mmlu/mmlu_ar-bloom-7b1.json b/evals/mmlu/mmlu_ar-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..b6e593af4922000fb94fdaab7a48477f593319ba --- /dev/null +++ b/evals/mmlu/mmlu_ar-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "mmlu_ar": { + "acc": 0.26531559405940597, + "acc_stderr": 0.0038831388933726414, + "acc_norm": 0.2754486386138614, + "acc_norm_stderr": 0.003929217133330591 + } + }, + "versions": { + "mmlu_ar": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/mmlu/mmlu_bn-bloom-7b1.json b/evals/mmlu/mmlu_bn-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..89c8ade0841c9df16a86355a7b703e726726acfa --- /dev/null +++ b/evals/mmlu/mmlu_bn-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "mmlu_bn": { + "acc": 0.2671137646192852, + "acc_stderr": 0.004001512896559074, + "acc_norm": 0.28150813772797906, + "acc_norm_stderr": 0.004067374934957544 + } + }, + "versions": { + "mmlu_bn": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc-challenge/arc_bn_challenge_gpt2-large.json b/evals/mmlu/mmlu_ca-bloom-7b1.json similarity index 52% rename from evals/arc-challenge/arc_bn_challenge_gpt2-large.json rename to evals/mmlu/mmlu_ca-bloom-7b1.json index 6b36e33e7bf7866400a4c7d058836627255b75a8..b760f91f32565b551455d9bf715837b34540ec24 100644 --- a/evals/arc-challenge/arc_bn_challenge_gpt2-large.json +++ b/evals/mmlu/mmlu_ca-bloom-7b1.json @@ -1,19 +1,19 @@ { "results": { - "arc_bn_challenge": { - "acc": 0.2195945945945946, - "acc_stderr": 0.024102381106046785, - "acc_norm": 0.2668918918918919, - "acc_norm_stderr": 0.025753762926257924 + "mmlu_ca": { + "acc": 0.2785041045910611, + "acc_stderr": 0.003908294722890792, + "acc_norm": 0.28785345089692915, + "acc_norm_stderr": 0.003947525835346328 } }, "versions": { - "arc_bn_challenge": 0 + "mmlu_ca": 0 }, "config": { "model": "hf-auto", - "model_args": "pretrained=gpt2-large", - "batch_size": "1", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, "device": "cuda", "no_cache": false, "limit": null, diff --git a/evals/mmlu/mmlu_da-bloom-7b1.json b/evals/mmlu/mmlu_da-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..5b81f4f5ab7529c0d7efd0c3b2c040d9e4643cc2 --- /dev/null +++ b/evals/mmlu/mmlu_da-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "mmlu_da": { + "acc": 0.2557170982886567, + "acc_stderr": 0.0037964676375075402, + "acc_norm": 0.2705588368923217, + "acc_norm_stderr": 0.003865954982495375 + } + }, + "versions": { + "mmlu_da": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/mmlu/mmlu_de-bloom-7b1.json b/evals/mmlu/mmlu_de-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..40c8412a571fbf0d4f63f6290e66bfbbab5fa943 --- /dev/null +++ b/evals/mmlu/mmlu_de-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "mmlu_de": { + "acc": 0.2670085985819882, + "acc_stderr": 0.0038422837632401587, + "acc_norm": 0.2812641424045859, + "acc_norm_stderr": 0.003904983582450586 + } + }, + "versions": { + "mmlu_de": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/mmlu/mmlu_de-llama-7B.json b/evals/mmlu/mmlu_de-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..48403f057f5a6bffdb9e4cb2644c286f80b5ccf0 --- /dev/null +++ b/evals/mmlu/mmlu_de-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "mmlu_de": { + "acc": 0.3045708251621662, + "acc_stderr": 0.003997127255569371, + "acc_norm": 0.2988384371700106, + "acc_norm_stderr": 0.003975618018830569 + } + }, + "versions": { + "mmlu_de": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/mmlu/mmlu_es-bloom-7b1.json b/evals/mmlu/mmlu_es-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..1ca552b581fe950c76b7e801b8922438a03f50b6 --- /dev/null +++ b/evals/mmlu/mmlu_es-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "mmlu_es": { + "acc": 0.2846857657117144, + "acc_stderr": 0.00390811532232558, + "acc_norm": 0.28926053697315135, + "acc_norm_stderr": 0.003926773662056655 + } + }, + "versions": { + "mmlu_es": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/mmlu/mmlu_es-llama-7B.json b/evals/mmlu/mmlu_es-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..6c5c8136a88729662690739c773310e7e60685c7 --- /dev/null +++ b/evals/mmlu/mmlu_es-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "mmlu_es": { + "acc": 0.30808459577021147, + "acc_stderr": 0.00399850416060033, + "acc_norm": 0.30268486575671216, + "acc_norm_stderr": 0.0039787436578546075 + } + }, + "versions": { + "mmlu_es": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/mmlu/mmlu_eu-bloom-7b1.json b/evals/mmlu/mmlu_eu-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..bd26e106ebaee3484061fd6d78bd4e9d52579fcd --- /dev/null +++ b/evals/mmlu/mmlu_eu-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "mmlu_eu": { + "acc": 0.2576611914684972, + "acc_stderr": 0.003953719493412054, + "acc_norm": 0.2735147503473073, + "acc_norm_stderr": 0.0040298051028790725 + } + }, + "versions": { + "mmlu_eu": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/mmlu/mmlu_fr-bloom-7b1.json b/evals/mmlu/mmlu_fr-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..518cf70d5d420bdf6c38c7dc1d83ad8289360cb0 --- /dev/null +++ b/evals/mmlu/mmlu_fr-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "mmlu_fr": { + "acc": 0.2887479948055916, + "acc_stderr": 0.0039609687595635185, + "acc_norm": 0.29860209304102053, + "acc_norm_stderr": 0.003999989334139082 + } + }, + "versions": { + "mmlu_fr": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/mmlu/mmlu_gu-bloom-7b1.json b/evals/mmlu/mmlu_gu-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..08db474bfffcd53c11f37cca5a5523de19ab27b2 --- /dev/null +++ b/evals/mmlu/mmlu_gu-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "mmlu_gu": { + "acc": 0.24933390631714655, + "acc_stderr": 0.004010971174274014, + "acc_norm": 0.26566394499355395, + "acc_norm_stderr": 0.004094955673385403 + } + }, + "versions": { + "mmlu_gu": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/mmlu/mmlu_hi-bloom-7b1.json b/evals/mmlu/mmlu_hi-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..8402e114c7f1914a4c05f4a1f91ecb4aad9df2d8 --- /dev/null +++ b/evals/mmlu/mmlu_hi-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "mmlu_hi": { + "acc": 0.2666237838707084, + "acc_stderr": 0.00396526756671177, + "acc_norm": 0.2751467395674198, + "acc_norm_stderr": 0.004004671316183439 + } + }, + "versions": { + "mmlu_hi": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/mmlu/mmlu_hr-bloom-7b1.json b/evals/mmlu/mmlu_hr-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..11c2e3822a0ada199f63dd7adb04e6c604d3151e --- /dev/null +++ b/evals/mmlu/mmlu_hr-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "mmlu_hr": { + "acc": 0.25448737450562825, + "acc_stderr": 0.0037988075329188904, + "acc_norm": 0.26954669911773654, + "acc_norm_stderr": 0.0038699014491549413 + } + }, + "versions": { + "mmlu_hr": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/mmlu/mmlu_hu-bloom-7b1.json b/evals/mmlu/mmlu_hu-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..b5cd6a42f13e7a2790a24766a0455177825ac001 --- /dev/null +++ b/evals/mmlu/mmlu_hu-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "mmlu_hu": { + "acc": 0.25, + "acc_stderr": 0.0037944175097970817, + "acc_norm": 0.269041769041769, + "acc_norm_stderr": 0.0038859804834747223 + } + }, + "versions": { + "mmlu_hu": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/mmlu/mmlu_hy-bloom-7b1.json b/evals/mmlu/mmlu_hy-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..5b33b978463855a30343b21fc48c4d5eeefe9ed4 --- /dev/null +++ b/evals/mmlu/mmlu_hy-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "mmlu_hy": { + "acc": 0.24754384354053807, + "acc_stderr": 0.004135735206626923, + "acc_norm": 0.2570930125791938, + "acc_norm_stderr": 0.004187920399106458 + } + }, + "versions": { + "mmlu_hy": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/mmlu/mmlu_id-bloom-7b1.json b/evals/mmlu/mmlu_id-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..eab2b6f207224be214da56e0b7642b6e08ab6522 --- /dev/null +++ b/evals/mmlu/mmlu_id-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "mmlu_id": { + "acc": 0.26631554843141747, + "acc_stderr": 0.0038620444798720234, + "acc_norm": 0.28058926799480954, + "acc_norm_stderr": 0.003925439934317792 + } + }, + "versions": { + "mmlu_id": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/mmlu/mmlu_it-bloom-7b1.json b/evals/mmlu/mmlu_it-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..f1fd4d72695bef88e7d84fea1cef3fe7a204b1d4 --- /dev/null +++ b/evals/mmlu/mmlu_it-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "mmlu_it": { + "acc": 0.26161516960036263, + "acc_stderr": 0.0038202735800333108, + "acc_norm": 0.2760444209413009, + "acc_norm_stderr": 0.0038856803174993136 + } + }, + "versions": { + "mmlu_it": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/mmlu/mmlu_it-llama-7B.json b/evals/mmlu/mmlu_it-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..4911cc10b24667a5ceebaa64adfc01511364c093 --- /dev/null +++ b/evals/mmlu/mmlu_it-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "mmlu_it": { + "acc": 0.29848152904736724, + "acc_stderr": 0.003977405833855968, + "acc_norm": 0.29901034977713986, + "acc_norm_stderr": 0.003979426926074157 + } + }, + "versions": { + "mmlu_it": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/mmlu/mmlu_kn-bloom-7b1.json b/evals/mmlu/mmlu_kn-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..cdc6e7a6340ce902630293fdf1c6020b92559efd --- /dev/null +++ b/evals/mmlu/mmlu_kn-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "mmlu_kn": { + "acc": 0.24622316459051152, + "acc_stderr": 0.0040494962676919264, + "acc_norm": 0.26716141001855287, + "acc_norm_stderr": 0.004159165326445932 + } + }, + "versions": { + "mmlu_kn": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/mmlu/mmlu_ml-bloom-7b1.json b/evals/mmlu/mmlu_ml-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..0dfd9c349dd00e3ccd1fece3fcf4c414525835bb --- /dev/null +++ b/evals/mmlu/mmlu_ml-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "mmlu_ml": { + "acc": 0.24646354733405876, + "acc_stderr": 0.0041039285720239, + "acc_norm": 0.26414581066376497, + "acc_norm_stderr": 0.0041984507173371734 + } + }, + "versions": { + "mmlu_ml": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/mmlu/mmlu_mr-bloom-7b1.json b/evals/mmlu/mmlu_mr-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..de6dc10fd113d66213dca64afc3849f020f6285e --- /dev/null +++ b/evals/mmlu/mmlu_mr-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "mmlu_mr": { + "acc": 0.2495736213757817, + "acc_stderr": 0.003900219801135433, + "acc_norm": 0.26289287744660117, + "acc_norm_stderr": 0.003967257688070526 + } + }, + "versions": { + "mmlu_mr": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/mmlu/mmlu_mr-llama-7B.json b/evals/mmlu/mmlu_mr-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..a68274469ffcdac51ed2534e328a082e752259d5 --- /dev/null +++ b/evals/mmlu/mmlu_mr-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "mmlu_mr": { + "acc": 0.24941119142369853, + "acc_stderr": 0.0038993723464080766, + "acc_norm": 0.2784861528465849, + "acc_norm_stderr": 0.004039799718714403 + } + }, + "versions": { + "mmlu_mr": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/mmlu/mmlu_ne-bloom-7b1.json b/evals/mmlu/mmlu_ne-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..63db04e7a0d9e7387ac032f7c649cd67f1996ea4 --- /dev/null +++ b/evals/mmlu/mmlu_ne-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "mmlu_ne": { + "acc": 0.2568858909499719, + "acc_stderr": 0.003915419717331052, + "acc_norm": 0.2658797077009556, + "acc_norm_stderr": 0.0039591928340292366 + } + }, + "versions": { + "mmlu_ne": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/mmlu/mmlu_ne-llama-7B.json b/evals/mmlu/mmlu_ne-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..5f6048f4b5b7f57e7bc90c0226fb4fb987b1f1b5 --- /dev/null +++ b/evals/mmlu/mmlu_ne-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "mmlu_ne": { + "acc": 0.245483016140689, + "acc_stderr": 0.0038567872193795804, + "acc_norm": 0.2774431863807918, + "acc_norm_stderr": 0.004012393111736023 + } + }, + "versions": { + "mmlu_ne": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/mmlu/mmlu_nl-bloom-7b1.json b/evals/mmlu/mmlu_nl-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..11f031c8a30795485e92c546f4b04d94df9c7e32 --- /dev/null +++ b/evals/mmlu/mmlu_nl-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "mmlu_nl": { + "acc": 0.25931547393185095, + "acc_stderr": 0.0038180275621108187, + "acc_norm": 0.2749487743796008, + "acc_norm_stderr": 0.003889720954246996 + } + }, + "versions": { + "mmlu_nl": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/mmlu/mmlu_nl-llama-7B.json b/evals/mmlu/mmlu_nl-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..db5993204885ee62dd922204656d17bcc53a0869 --- /dev/null +++ b/evals/mmlu/mmlu_nl-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "mmlu_nl": { + "acc": 0.3053046975791151, + "acc_stderr": 0.004012103530956046, + "acc_norm": 0.2983987250512256, + "acc_norm_stderr": 0.003986133809323066 + } + }, + "versions": { + "mmlu_nl": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/mmlu/mmlu_pt-bloom-7b1.json b/evals/mmlu/mmlu_pt-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..3887b3366a9810116b594c74c02905628ee78fcf --- /dev/null +++ b/evals/mmlu/mmlu_pt-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "mmlu_pt": { + "acc": 0.2809216451516061, + "acc_stderr": 0.0038938542873620118, + "acc_norm": 0.287676373461423, + "acc_norm_stderr": 0.0039218389764563225 + } + }, + "versions": { + "mmlu_pt": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/mmlu/mmlu_pt-llama-7B.json b/evals/mmlu/mmlu_pt-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..d5ff15ab450754ca303e55e1503611a1b7fd3d44 --- /dev/null +++ b/evals/mmlu/mmlu_pt-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "mmlu_pt": { + "acc": 0.3016361453017112, + "acc_stderr": 0.003976322071656026, + "acc_norm": 0.3007355148604023, + "acc_norm_stderr": 0.003972940683152965 + } + }, + "versions": { + "mmlu_pt": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/mmlu/mmlu_ro-bloom-7b1.json b/evals/mmlu/mmlu_ro-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..b9ced8c74d8ae4d628e7fe9168ff402ce98cd279 --- /dev/null +++ b/evals/mmlu/mmlu_ro-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "mmlu_ro": { + "acc": 0.2555891238670695, + "acc_stderr": 0.003790966515146354, + "acc_norm": 0.2737160120845921, + "acc_norm_stderr": 0.0038750360364507622 + } + }, + "versions": { + "mmlu_ro": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/mmlu/mmlu_ro-llama-7B.json b/evals/mmlu/mmlu_ro-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..7474e610db1236709be35a3a648960d8b40a838e --- /dev/null +++ b/evals/mmlu/mmlu_ro-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "mmlu_ro": { + "acc": 0.29342900302114805, + "acc_stderr": 0.003957326026204448, + "acc_norm": 0.2965256797583082, + "acc_norm_stderr": 0.003969425800928827 + } + }, + "versions": { + "mmlu_ro": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/mmlu/mmlu_ru-bloom-7b1.json b/evals/mmlu/mmlu_ru-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..597b21a215ebd9c9d442c41b7c7577008553e896 --- /dev/null +++ b/evals/mmlu/mmlu_ru-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "mmlu_ru": { + "acc": 0.2525563158299377, + "acc_stderr": 0.0038097500220131194, + "acc_norm": 0.2695471669101253, + "acc_norm_stderr": 0.0038908241231695112 + } + }, + "versions": { + "mmlu_ru": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/mmlu/mmlu_ru-llama-7B.json b/evals/mmlu/mmlu_ru-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..1cc8eed486b867ef15f762b1387fd29a6cf4416b --- /dev/null +++ b/evals/mmlu/mmlu_ru-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "mmlu_ru": { + "acc": 0.29445683093718766, + "acc_stderr": 0.0039966925205054795, + "acc_norm": 0.3016068270931037, + "acc_norm_stderr": 0.004024377402999243 + } + }, + "versions": { + "mmlu_ru": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/mmlu/mmlu_sk-bloom-7b1.json b/evals/mmlu/mmlu_sk-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..c5c41d03419b8a4038c58ab0e4166ce0e96c28d9 --- /dev/null +++ b/evals/mmlu/mmlu_sk-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "mmlu_sk": { + "acc": 0.24927269943347113, + "acc_stderr": 0.003785212350164864, + "acc_norm": 0.26672791303016385, + "acc_norm_stderr": 0.003869711564658995 + } + }, + "versions": { + "mmlu_sk": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/mmlu/mmlu_sk-llama-7B.json b/evals/mmlu/mmlu_sk-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..309a344b59b192e0dbc8e50b499a16b67538c1ef --- /dev/null +++ b/evals/mmlu/mmlu_sk-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "mmlu_sk": { + "acc": 0.28127392436074106, + "acc_stderr": 0.003934216199449274, + "acc_norm": 0.2944418925126321, + "acc_norm_stderr": 0.003988209639409228 + } + }, + "versions": { + "mmlu_sk": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/mmlu/mmlu_sr-bloom-7b1.json b/evals/mmlu/mmlu_sr-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..88c6699b6f71aadafabd08193c19c50d25887e85 --- /dev/null +++ b/evals/mmlu/mmlu_sr-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "mmlu_sr": { + "acc": 0.25650952706293173, + "acc_stderr": 0.0038050782551146203, + "acc_norm": 0.27245122599256055, + "acc_norm_stderr": 0.003879266167871199 + } + }, + "versions": { + "mmlu_sr": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/mmlu/mmlu_sr-llama-7B.json b/evals/mmlu/mmlu_sr-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..fbe389b6b884d3a9692413dc84031d5ea2363b31 --- /dev/null +++ b/evals/mmlu/mmlu_sr-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "mmlu_sr": { + "acc": 0.2902907462233356, + "acc_stderr": 0.003954858675409034, + "acc_norm": 0.2920367418203902, + "acc_norm_stderr": 0.003961851981605455 + } + }, + "versions": { + "mmlu_sr": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/mmlu/mmlu_sv-bloom-7b1.json b/evals/mmlu/mmlu_sv-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..90ee3cd4e9733639263cdcf04b82e171f8485253 --- /dev/null +++ b/evals/mmlu/mmlu_sv-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "mmlu_sv": { + "acc": 0.26122788446998335, + "acc_stderr": 0.003820033520031446, + "acc_norm": 0.27491305005292604, + "acc_norm_stderr": 0.0038823517609477554 + } + }, + "versions": { + "mmlu_sv": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/mmlu/mmlu_sv-llama-7B.json b/evals/mmlu/mmlu_sv-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..d962d7acbb38d8ae28b5d3c396c6389a2ae6bf49 --- /dev/null +++ b/evals/mmlu/mmlu_sv-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "mmlu_sv": { + "acc": 0.30024194767881446, + "acc_stderr": 0.003985765983480769, + "acc_norm": 0.29321034326326934, + "acc_norm_stderr": 0.003958556933478504 + } + }, + "versions": { + "mmlu_sv": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/mmlu/mmlu_ta-bloom-7b1.json b/evals/mmlu/mmlu_ta-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..227c87597c1eb663c59c29f3eb1d52a08a3d189d --- /dev/null +++ b/evals/mmlu/mmlu_ta-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "mmlu_ta": { + "acc": 0.2531252694197776, + "acc_stderr": 0.00403738422854994, + "acc_norm": 0.2664884903871023, + "acc_norm_stderr": 0.004105359016847502 + } + }, + "versions": { + "mmlu_ta": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/mmlu/mmlu_ta-llama-7B.json b/evals/mmlu/mmlu_ta-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..c47ddc1d3941b02c8ef307b03e1af7c3f33d41f8 --- /dev/null +++ b/evals/mmlu/mmlu_ta-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "mmlu_ta": { + "acc": 0.24743512371756185, + "acc_stderr": 0.004006923901271705, + "acc_norm": 0.27752392447624796, + "acc_norm_stderr": 0.004157865121797154 + } + }, + "versions": { + "mmlu_ta": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/mmlu/mmlu_te-bloom-7b1.json b/evals/mmlu/mmlu_te-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..6dda2185b223b03895db5556e33db9db1733d107 --- /dev/null +++ b/evals/mmlu/mmlu_te-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "mmlu_te": { + "acc": 0.2502857142857143, + "acc_stderr": 0.004061713740284853, + "acc_norm": 0.2618901098901099, + "acc_norm_stderr": 0.00412252643604891 + } + }, + "versions": { + "mmlu_te": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/mmlu/mmlu_te-llama-7B.json b/evals/mmlu/mmlu_te-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..d495ac0b0d562ef0467a6d5a79b03bb80ccfc6a4 --- /dev/null +++ b/evals/mmlu/mmlu_te-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "mmlu_te": { + "acc": 0.24562637362637363, + "acc_stderr": 0.00403621353648515, + "acc_norm": 0.26874725274725275, + "acc_norm_stderr": 0.004156704581054155 + } + }, + "versions": { + "mmlu_te": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/mmlu/mmlu_uk-bloom-7b1.json b/evals/mmlu/mmlu_uk-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..7ad6aa7c934875a8ffa40228178610c089842e74 --- /dev/null +++ b/evals/mmlu/mmlu_uk-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "mmlu_uk": { + "acc": 0.24719188163296923, + "acc_stderr": 0.0037969053429642604, + "acc_norm": 0.2663258191959098, + "acc_norm_stderr": 0.003890709230487387 + } + }, + "versions": { + "mmlu_uk": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/mmlu/mmlu_uk-llama-7B.json b/evals/mmlu/mmlu_uk-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..2ac08620ea865817dc03d2021d1c2a89e95bd091 --- /dev/null +++ b/evals/mmlu/mmlu_uk-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "mmlu_uk": { + "acc": 0.2894104888062592, + "acc_stderr": 0.003991508434906801, + "acc_norm": 0.2939809435277713, + "acc_norm_stderr": 0.004009944142684111 + } + }, + "versions": { + "mmlu_uk": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/mmlu/mmlu_vi-bloom-7b1.json b/evals/mmlu/mmlu_vi-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..3b29824403bc095477d8a6a0acdb87f1e76c4dfb --- /dev/null +++ b/evals/mmlu/mmlu_vi-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "mmlu_vi": { + "acc": 0.26726381871076405, + "acc_stderr": 0.003872181345366132, + "acc_norm": 0.281427040269484, + "acc_norm_stderr": 0.003934867675165376 + } + }, + "versions": { + "mmlu_vi": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/mmlu/mmlu_vi-llama-7B.json b/evals/mmlu/mmlu_vi-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..194b2dd47470bee66f0c97bb28f1a825707dccea --- /dev/null +++ b/evals/mmlu/mmlu_vi-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "mmlu_vi": { + "acc": 0.26052671872607563, + "acc_stderr": 0.0038406007591986315, + "acc_norm": 0.28579084366865715, + "acc_norm_stderr": 0.003953198731610307 + } + }, + "versions": { + "mmlu_vi": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/mmlu/mmlu_zh-bloom-7b1.json b/evals/mmlu/mmlu_zh-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..e98a766b006fc2ceed3e7d766f77be6fdaf5abe6 --- /dev/null +++ b/evals/mmlu/mmlu_zh-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "mmlu_zh": { + "acc": 0.27884542347132546, + "acc_stderr": 0.003908427008060506, + "acc_norm": 0.29137865552601594, + "acc_norm_stderr": 0.003960427300065885 + } + }, + "versions": { + "mmlu_zh": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/mmlu/mmlu_zh-llama-7B.json b/evals/mmlu/mmlu_zh-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..963997e00a6c8204be6df0d19adfe241fd53d094 --- /dev/null +++ b/evals/mmlu/mmlu_zh-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "mmlu_zh": { + "acc": 0.2769464489175845, + "acc_stderr": 0.003900220811105949, + "acc_norm": 0.2883402962400304, + "acc_norm_stderr": 0.003948161607934338 + } + }, + "versions": { + "mmlu_zh": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_bn_mc_bloom-7b1.json b/evals/truthfulqa-mc/truthfulqa_bn_mc_bloom-7b1.json deleted file mode 100644 index 9370c174001acd0fca0cddf24e9076e303b9a18d..0000000000000000000000000000000000000000 --- a/evals/truthfulqa-mc/truthfulqa_bn_mc_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_bn_mc": { - "mc1": 0.26548672566371684, - "mc1_stderr": 0.015711139487640472, - "mc2": 0.4852587344144857, - "mc2_stderr": 0.01612406516233488 - } - }, - "versions": { - "truthfulqa_bn_mc": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_bn_mc_llama-7B.json b/evals/truthfulqa-mc/truthfulqa_bn_mc_llama-7B.json deleted file mode 100644 index 16e9590be5e353f400674681f4f4e162bad08d5f..0000000000000000000000000000000000000000 --- a/evals/truthfulqa-mc/truthfulqa_bn_mc_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_bn_mc": { - "mc1": 0.27939317319848295, - "mc1_stderr": 0.015964066769100945, - "mc2": 0.513392699496713, - "mc2_stderr": 0.016700880970144227 - } - }, - "versions": { - "truthfulqa_bn_mc": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_ca_mc_bloom-7b1.json b/evals/truthfulqa-mc/truthfulqa_ca_mc_bloom-7b1.json deleted file mode 100644 index 11285119043f95ac0d376ad5c3e9afaeb0e2d7e9..0000000000000000000000000000000000000000 --- a/evals/truthfulqa-mc/truthfulqa_ca_mc_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_ca_mc": { - "mc1": 0.24261874197689345, - "mc1_stderr": 0.01536843525152329, - "mc2": 0.39989771937446994, - "mc2_stderr": 0.015246797370718152 - } - }, - "versions": { - "truthfulqa_ca_mc": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_es_mc_bloom-7b1.json b/evals/truthfulqa-mc/truthfulqa_es_mc_bloom-7b1.json deleted file mode 100644 index c983b9fd981831059a19411e2f854761bb466743..0000000000000000000000000000000000000000 --- a/evals/truthfulqa-mc/truthfulqa_es_mc_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_es_mc": { - "mc1": 0.2468354430379747, - "mc1_stderr": 0.01535006418032032, - "mc2": 0.40446379335454147, - "mc2_stderr": 0.01462209461275691 - } - }, - "versions": { - "truthfulqa_es_mc": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_es_mc_llama-7B.json b/evals/truthfulqa-mc/truthfulqa_es_mc_llama-7B.json deleted file mode 100644 index ded6c86f6861c4d0dc091db262fe1d2a25208804..0000000000000000000000000000000000000000 --- a/evals/truthfulqa-mc/truthfulqa_es_mc_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_es_mc": { - "mc1": 0.22658227848101264, - "mc1_stderr": 0.014903268563982738, - "mc2": 0.37120532090630015, - "mc2_stderr": 0.014441690126415349 - } - }, - "versions": { - "truthfulqa_es_mc": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_eu_mc_bloom-7b1.json b/evals/truthfulqa-mc/truthfulqa_eu_mc_bloom-7b1.json deleted file mode 100644 index 52f4939ac5fa964406f4eecce983e80178660657..0000000000000000000000000000000000000000 --- a/evals/truthfulqa-mc/truthfulqa_eu_mc_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_eu_mc": { - "mc1": 0.26214833759590794, - "mc1_stderr": 0.015737384911607682, - "mc2": 0.4464332201206485, - "mc2_stderr": 0.01621754992783137 - } - }, - "versions": { - "truthfulqa_eu_mc": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_eu_mc_llama-7B.json b/evals/truthfulqa-mc/truthfulqa_eu_mc_llama-7B.json deleted file mode 100644 index 2591b2575e316599868892fc6541e53cca27f1eb..0000000000000000000000000000000000000000 --- a/evals/truthfulqa-mc/truthfulqa_eu_mc_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_eu_mc": { - "mc1": 0.22762148337595908, - "mc1_stderr": 0.01500362498587022, - "mc2": 0.4077400427662786, - "mc2_stderr": 0.01655029094183041 - } - }, - "versions": { - "truthfulqa_eu_mc": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_fr_mc_bloom-7b1.json b/evals/truthfulqa-mc/truthfulqa_fr_mc_bloom-7b1.json deleted file mode 100644 index 74d3041ce242f33429dfa1dec98c70a446ad3459..0000000000000000000000000000000000000000 --- a/evals/truthfulqa-mc/truthfulqa_fr_mc_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_fr_mc": { - "mc1": 0.2598225602027883, - "mc1_stderr": 0.015622237721822354, - "mc2": 0.40857191925599595, - "mc2_stderr": 0.01474266494761903 - } - }, - "versions": { - "truthfulqa_fr_mc": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_fr_mc_llama-7B.json b/evals/truthfulqa-mc/truthfulqa_fr_mc_llama-7B.json deleted file mode 100644 index 800ad2a78b80c2eb4974ba18bc90689969705247..0000000000000000000000000000000000000000 --- a/evals/truthfulqa-mc/truthfulqa_fr_mc_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_fr_mc": { - "mc1": 0.23827629911280102, - "mc1_stderr": 0.015176654543722067, - "mc2": 0.39924075017495203, - "mc2_stderr": 0.014258162205908845 - } - }, - "versions": { - "truthfulqa_fr_mc": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_gu_mc_bloom-7b1.json b/evals/truthfulqa-mc/truthfulqa_gu_mc_bloom-7b1.json deleted file mode 100644 index 64f963ad419e8b93cc4134accc25685a3b6c7973..0000000000000000000000000000000000000000 --- a/evals/truthfulqa-mc/truthfulqa_gu_mc_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_gu_mc": { - "mc1": 0.2572944297082228, - "mc1_stderr": 0.015930376662111265, - "mc2": 0.4550226506739247, - "mc2_stderr": 0.016990336661822224 - } - }, - "versions": { - "truthfulqa_gu_mc": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_gu_mc_llama-7B.json b/evals/truthfulqa-mc/truthfulqa_gu_mc_llama-7B.json deleted file mode 100644 index c069c02eb514218d456bb1424dd8cfe77f48a1ab..0000000000000000000000000000000000000000 --- a/evals/truthfulqa-mc/truthfulqa_gu_mc_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_gu_mc": { - "mc1": 0.2572944297082228, - "mc1_stderr": 0.015930376662111265, - "mc2": 0.42704504017782213, - "mc2_stderr": 0.017012444121235887 - } - }, - "versions": { - "truthfulqa_gu_mc": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_hi_mc_bloom-7b1.json b/evals/truthfulqa-mc/truthfulqa_hi_mc_bloom-7b1.json deleted file mode 100644 index 8962a71a352d9b104821eb68a25a8785186a6f80..0000000000000000000000000000000000000000 --- a/evals/truthfulqa-mc/truthfulqa_hi_mc_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_hi_mc": { - "mc1": 0.26153846153846155, - "mc1_stderr": 0.0157457370262172, - "mc2": 0.4459427734456273, - "mc2_stderr": 0.015816895972907637 - } - }, - "versions": { - "truthfulqa_hi_mc": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_hi_mc_llama-7B.json b/evals/truthfulqa-mc/truthfulqa_hi_mc_llama-7B.json deleted file mode 100644 index 2f7c57699fb99f36e65e991419808a451e65b58d..0000000000000000000000000000000000000000 --- a/evals/truthfulqa-mc/truthfulqa_hi_mc_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_hi_mc": { - "mc1": 0.28076923076923077, - "mc1_stderr": 0.016100529409585174, - "mc2": 0.47439648196687334, - "mc2_stderr": 0.016645149126511907 - } - }, - "versions": { - "truthfulqa_hi_mc": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_hr_mc_bloom-7b1.json b/evals/truthfulqa-mc/truthfulqa_hr_mc_bloom-7b1.json deleted file mode 100644 index 314546568b9f50af4248c3961474c8f4e4d3b021..0000000000000000000000000000000000000000 --- a/evals/truthfulqa-mc/truthfulqa_hr_mc_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_hr_mc": { - "mc1": 0.2805194805194805, - "mc1_stderr": 0.01620047927370478, - "mc2": 0.4799867976765054, - "mc2_stderr": 0.016630823388575047 - } - }, - "versions": { - "truthfulqa_hr_mc": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_hr_mc_llama-7B.json b/evals/truthfulqa-mc/truthfulqa_hr_mc_llama-7B.json deleted file mode 100644 index a89b4ca336f2e469df36faf9e3b8bae78e238226..0000000000000000000000000000000000000000 --- a/evals/truthfulqa-mc/truthfulqa_hr_mc_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_hr_mc": { - "mc1": 0.24285714285714285, - "mc1_stderr": 0.015463264535393416, - "mc2": 0.4178069276061212, - "mc2_stderr": 0.015457117904740929 - } - }, - "versions": { - "truthfulqa_hr_mc": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_hu_mc_bloom-7b1.json b/evals/truthfulqa-mc/truthfulqa_hu_mc_bloom-7b1.json deleted file mode 100644 index f0063c59598d9ace87e37889c678b775e7685f4e..0000000000000000000000000000000000000000 --- a/evals/truthfulqa-mc/truthfulqa_hu_mc_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_hu_mc": { - "mc1": 0.2664941785252264, - "mc1_stderr": 0.01591244793052595, - "mc2": 0.5012245769743321, - "mc2_stderr": 0.017012659134722635 - } - }, - "versions": { - "truthfulqa_hu_mc": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_hu_mc_llama-7B.json b/evals/truthfulqa-mc/truthfulqa_hu_mc_llama-7B.json deleted file mode 100644 index 8186b5b669612791a673c1748562011f0fa91aec..0000000000000000000000000000000000000000 --- a/evals/truthfulqa-mc/truthfulqa_hu_mc_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_hu_mc": { - "mc1": 0.24579560155239327, - "mc1_stderr": 0.01549611867708382, - "mc2": 0.432092949382587, - "mc2_stderr": 0.015533288486024798 - } - }, - "versions": { - "truthfulqa_hu_mc": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_hy_mc_bloom-7b1.json b/evals/truthfulqa-mc/truthfulqa_hy_mc_bloom-7b1.json deleted file mode 100644 index ddde03654d791b6a3794476cfb89b83c5ef45e53..0000000000000000000000000000000000000000 --- a/evals/truthfulqa-mc/truthfulqa_hy_mc_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_hy_mc": { - "mc1": 0.2629032258064516, - "mc1_stderr": 0.017693546356249937, - "mc2": 0.4681902443615651, - "mc2_stderr": 0.019292338415181538 - } - }, - "versions": { - "truthfulqa_hy_mc": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_hy_mc_llama-7B.json b/evals/truthfulqa-mc/truthfulqa_hy_mc_llama-7B.json deleted file mode 100644 index f5ca203decb570b2e7314edadae5c00a4adfc62c..0000000000000000000000000000000000000000 --- a/evals/truthfulqa-mc/truthfulqa_hy_mc_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_hy_mc": { - "mc1": 0.2564516129032258, - "mc1_stderr": 0.017551409976203195, - "mc2": 0.46436602760838236, - "mc2_stderr": 0.018999233967880117 - } - }, - "versions": { - "truthfulqa_hy_mc": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_id_mc_bloom-7b1.json b/evals/truthfulqa-mc/truthfulqa_id_mc_bloom-7b1.json deleted file mode 100644 index e5c70280232e984fefca1f1a8cfe4a29409de1c8..0000000000000000000000000000000000000000 --- a/evals/truthfulqa-mc/truthfulqa_id_mc_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_id_mc": { - "mc1": 0.25288831835686776, - "mc1_stderr": 0.015583584105316878, - "mc2": 0.4035395580966099, - "mc2_stderr": 0.015018121460072335 - } - }, - "versions": { - "truthfulqa_id_mc": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_id_mc_llama-7B.json b/evals/truthfulqa-mc/truthfulqa_id_mc_llama-7B.json deleted file mode 100644 index 6dfd743ea67805acd941cfb18f2d6362f9880f82..0000000000000000000000000000000000000000 --- a/evals/truthfulqa-mc/truthfulqa_id_mc_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_id_mc": { - "mc1": 0.25673940949935814, - "mc1_stderr": 0.015661271683095182, - "mc2": 0.39766031480749814, - "mc2_stderr": 0.015508891980724996 - } - }, - "versions": { - "truthfulqa_id_mc": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_it_mc_bloom-7b1.json b/evals/truthfulqa-mc/truthfulqa_it_mc_bloom-7b1.json deleted file mode 100644 index e83a75ef58f484e4f28d9b48fd6106931bcd7a26..0000000000000000000000000000000000000000 --- a/evals/truthfulqa-mc/truthfulqa_it_mc_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_it_mc": { - "mc1": 0.2697201017811705, - "mc1_stderr": 0.015840413061442026, - "mc2": 0.4389841648203799, - "mc2_stderr": 0.015926853851979495 - } - }, - "versions": { - "truthfulqa_it_mc": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_it_mc_llama-7B.json b/evals/truthfulqa-mc/truthfulqa_it_mc_llama-7B.json deleted file mode 100644 index b9f0f156188649c4a7542e6d6f2ba9b37c457655..0000000000000000000000000000000000000000 --- a/evals/truthfulqa-mc/truthfulqa_it_mc_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_it_mc": { - "mc1": 0.24427480916030533, - "mc1_stderr": 0.015335094706043257, - "mc2": 0.39785622787135533, - "mc2_stderr": 0.014810294602470058 - } - }, - "versions": { - "truthfulqa_it_mc": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_kn_mc_bloom-7b1.json b/evals/truthfulqa-mc/truthfulqa_kn_mc_bloom-7b1.json deleted file mode 100644 index 4fe9dd96dcdc93bdacfb696ab94a78b3f7f7a246..0000000000000000000000000000000000000000 --- a/evals/truthfulqa-mc/truthfulqa_kn_mc_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_kn_mc": { - "mc1": 0.28792134831460675, - "mc1_stderr": 0.0169811116006733, - "mc2": 0.4971377207989088, - "mc2_stderr": 0.0171981853340177 - } - }, - "versions": { - "truthfulqa_kn_mc": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_kn_mc_llama-7B.json b/evals/truthfulqa-mc/truthfulqa_kn_mc_llama-7B.json deleted file mode 100644 index 993e1c25914137bc34c8316cf67a25cc17ab83c4..0000000000000000000000000000000000000000 --- a/evals/truthfulqa-mc/truthfulqa_kn_mc_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_kn_mc": { - "mc1": 0.27808988764044945, - "mc1_stderr": 0.01680348492221316, - "mc2": 0.46974001502290064, - "mc2_stderr": 0.017840960060966953 - } - }, - "versions": { - "truthfulqa_kn_mc": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_ml_mc_bloom-7b1.json b/evals/truthfulqa-mc/truthfulqa_ml_mc_bloom-7b1.json deleted file mode 100644 index 24914faf5345d35faa0a1b782d6c784b3edd07d6..0000000000000000000000000000000000000000 --- a/evals/truthfulqa-mc/truthfulqa_ml_mc_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_ml_mc": { - "mc1": 0.25831202046035806, - "mc1_stderr": 0.01566236755478916, - "mc2": 0.4909574719052267, - "mc2_stderr": 0.016823307128975565 - } - }, - "versions": { - "truthfulqa_ml_mc": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_ml_mc_llama-7B.json b/evals/truthfulqa-mc/truthfulqa_ml_mc_llama-7B.json deleted file mode 100644 index 0a3806514d04876fc28bdf3370af03eacd615826..0000000000000000000000000000000000000000 --- a/evals/truthfulqa-mc/truthfulqa_ml_mc_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_ml_mc": { - "mc1": 0.2749360613810742, - "mc1_stderr": 0.015976383961112832, - "mc2": 0.5095091855665959, - "mc2_stderr": 0.016954647599861927 - } - }, - "versions": { - "truthfulqa_ml_mc": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_mr_mc_bloom-7b1.json b/evals/truthfulqa-mc/truthfulqa_mr_mc_bloom-7b1.json deleted file mode 100644 index cf87faf0ec1093a1588fff90b3e24e1f69710c9d..0000000000000000000000000000000000000000 --- a/evals/truthfulqa-mc/truthfulqa_mr_mc_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_mr_mc": { - "mc1": 0.2753807106598985, - "mc1_stderr": 0.015923346195889237, - "mc2": 0.47635177057868366, - "mc2_stderr": 0.016517346765693778 - } - }, - "versions": { - "truthfulqa_mr_mc": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_mr_mc_llama-7B.json b/evals/truthfulqa-mc/truthfulqa_mr_mc_llama-7B.json deleted file mode 100644 index 0ddcccf963e412f25a53de4c98f439abf7a25388..0000000000000000000000000000000000000000 --- a/evals/truthfulqa-mc/truthfulqa_mr_mc_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_mr_mc": { - "mc1": 0.28553299492385786, - "mc1_stderr": 0.01610022231189975, - "mc2": 0.4895379243686521, - "mc2_stderr": 0.016741018968357894 - } - }, - "versions": { - "truthfulqa_mr_mc": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_ne_mc_bloom-7b1.json b/evals/truthfulqa-mc/truthfulqa_ne_mc_bloom-7b1.json deleted file mode 100644 index 90e378f71e4638bf2da69d765a15b05858d6e2b9..0000000000000000000000000000000000000000 --- a/evals/truthfulqa-mc/truthfulqa_ne_mc_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_ne_mc": { - "mc1": 0.2880710659898477, - "mc1_stderr": 0.016142870973426694, - "mc2": 0.467435004054711, - "mc2_stderr": 0.016544742019032287 - } - }, - "versions": { - "truthfulqa_ne_mc": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_nl_mc_bloom-7b1.json b/evals/truthfulqa-mc/truthfulqa_nl_mc_bloom-7b1.json deleted file mode 100644 index 3ce8ddbd63d98848d347aa0302b9cbaccb48cbd3..0000000000000000000000000000000000000000 --- a/evals/truthfulqa-mc/truthfulqa_nl_mc_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_nl_mc": { - "mc1": 0.25477707006369427, - "mc1_stderr": 0.015561993973145626, - "mc2": 0.4267767591847509, - "mc2_stderr": 0.016186878668566853 - } - }, - "versions": { - "truthfulqa_nl_mc": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_pt_mc_bloom-7b1.json b/evals/truthfulqa-mc/truthfulqa_pt_mc_bloom-7b1.json deleted file mode 100644 index b684b021a3805f5bd343cabdea341eecc0435e00..0000000000000000000000000000000000000000 --- a/evals/truthfulqa-mc/truthfulqa_pt_mc_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_pt_mc": { - "mc1": 0.23857868020304568, - "mc1_stderr": 0.015192910034567015, - "mc2": 0.38894722340741383, - "mc2_stderr": 0.014531269277587647 - } - }, - "versions": { - "truthfulqa_pt_mc": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_ro_mc_bloom-7b1.json b/evals/truthfulqa-mc/truthfulqa_ro_mc_bloom-7b1.json deleted file mode 100644 index af2110ac326a3065b94fa267cded253cc069b3e0..0000000000000000000000000000000000000000 --- a/evals/truthfulqa-mc/truthfulqa_ro_mc_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_ro_mc": { - "mc1": 0.2608695652173913, - "mc1_stderr": 0.015712552179082358, - "mc2": 0.46132785760214634, - "mc2_stderr": 0.016284566824666485 - } - }, - "versions": { - "truthfulqa_ro_mc": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_ro_mc_llama-7B.json b/evals/truthfulqa-mc/truthfulqa_ro_mc_llama-7B.json deleted file mode 100644 index fe7ed655b7f61f10c4accbd13f5b9fc293536300..0000000000000000000000000000000000000000 --- a/evals/truthfulqa-mc/truthfulqa_ro_mc_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_ro_mc": { - "mc1": 0.22762148337595908, - "mc1_stderr": 0.015003624985870205, - "mc2": 0.37160168017693795, - "mc2_stderr": 0.015014785650167688 - } - }, - "versions": { - "truthfulqa_ro_mc": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_ru_mc_bloom-7b1.json b/evals/truthfulqa-mc/truthfulqa_ru_mc_bloom-7b1.json deleted file mode 100644 index d15e5341b01a6e2876ffb863286387d4dcc69456..0000000000000000000000000000000000000000 --- a/evals/truthfulqa-mc/truthfulqa_ru_mc_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_ru_mc": { - "mc1": 0.30632911392405066, - "mc1_stderr": 0.016410898874958186, - "mc2": 0.49751656068823824, - "mc2_stderr": 0.016150279946055047 - } - }, - "versions": { - "truthfulqa_ru_mc": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_ru_mc_llama-7B.json b/evals/truthfulqa-mc/truthfulqa_ru_mc_llama-7B.json deleted file mode 100644 index 2036782896e35aee07acce858c408720bcb3b9b9..0000000000000000000000000000000000000000 --- a/evals/truthfulqa-mc/truthfulqa_ru_mc_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_ru_mc": { - "mc1": 0.24556962025316456, - "mc1_stderr": 0.015323515145952671, - "mc2": 0.40851860840920967, - "mc2_stderr": 0.015225752517489843 - } - }, - "versions": { - "truthfulqa_ru_mc": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_sk_mc_llama-7B.json b/evals/truthfulqa-mc/truthfulqa_sk_mc_llama-7B.json deleted file mode 100644 index 13785fc105b2964d3bcf70bb68daf0ddc0ccdbfd..0000000000000000000000000000000000000000 --- a/evals/truthfulqa-mc/truthfulqa_sk_mc_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_sk_mc": { - "mc1": 0.22692307692307692, - "mc1_stderr": 0.01500658794494848, - "mc2": 0.40846796746265707, - "mc2_stderr": 0.015828756550364212 - } - }, - "versions": { - "truthfulqa_sk_mc": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_sr_mc_llama-7B.json b/evals/truthfulqa-mc/truthfulqa_sr_mc_llama-7B.json deleted file mode 100644 index 3a70158ad0bf874c11233369e2b8b2fbd08bb508..0000000000000000000000000000000000000000 --- a/evals/truthfulqa-mc/truthfulqa_sr_mc_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_sr_mc": { - "mc1": 0.2684478371501272, - "mc1_stderr": 0.015816769133859612, - "mc2": 0.42343608663478216, - "mc2_stderr": 0.015372831241353751 - } - }, - "versions": { - "truthfulqa_sr_mc": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_sv_mc_llama-7B.json b/evals/truthfulqa-mc/truthfulqa_sv_mc_llama-7B.json deleted file mode 100644 index 1665d4f2e88a870557fd94395d1d54f58919d85c..0000000000000000000000000000000000000000 --- a/evals/truthfulqa-mc/truthfulqa_sv_mc_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_sv_mc": { - "mc1": 0.2596899224806202, - "mc1_stderr": 0.015770469834891904, - "mc2": 0.40528913702963154, - "mc2_stderr": 0.015006798915735541 - } - }, - "versions": { - "truthfulqa_sv_mc": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_ta_mc_bloom-7b1.json b/evals/truthfulqa-mc/truthfulqa_ta_mc_bloom-7b1.json deleted file mode 100644 index 4d2164cc879ad161ad6563ad86aee4884b45ea32..0000000000000000000000000000000000000000 --- a/evals/truthfulqa-mc/truthfulqa_ta_mc_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_ta_mc": { - "mc1": 0.26015228426395937, - "mc1_stderr": 0.015638591095633272, - "mc2": 0.4828328722219756, - "mc2_stderr": 0.01641270817636116 - } - }, - "versions": { - "truthfulqa_ta_mc": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_ta_mc_llama-7B.json b/evals/truthfulqa-mc/truthfulqa_ta_mc_llama-7B.json deleted file mode 100644 index fee0b1146f0fd8e72ac72b5e05a85a9d0c18afcb..0000000000000000000000000000000000000000 --- a/evals/truthfulqa-mc/truthfulqa_ta_mc_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_ta_mc": { - "mc1": 0.27411167512690354, - "mc1_stderr": 0.015900519226497174, - "mc2": 0.5027478455482438, - "mc2_stderr": 0.016693455124890125 - } - }, - "versions": { - "truthfulqa_ta_mc": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_te_mc_bloom-7b1.json b/evals/truthfulqa-mc/truthfulqa_te_mc_bloom-7b1.json deleted file mode 100644 index cb186a4cf39dc7c369f4adcb4c21742a3bb8d875..0000000000000000000000000000000000000000 --- a/evals/truthfulqa-mc/truthfulqa_te_mc_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_te_mc": { - "mc1": 0.2646276595744681, - "mc1_stderr": 0.016097235388949582, - "mc2": 0.4761751419934964, - "mc2_stderr": 0.01699481972514669 - } - }, - "versions": { - "truthfulqa_te_mc": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_te_mc_llama-7B.json b/evals/truthfulqa-mc/truthfulqa_te_mc_llama-7B.json deleted file mode 100644 index 6a27e1784964b5486d2d2aeb7d5418ef3fbc892d..0000000000000000000000000000000000000000 --- a/evals/truthfulqa-mc/truthfulqa_te_mc_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_te_mc": { - "mc1": 0.2898936170212766, - "mc1_stderr": 0.016556215331027437, - "mc2": 0.4950446673992078, - "mc2_stderr": 0.017314129921675917 - } - }, - "versions": { - "truthfulqa_te_mc": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_uk_mc_bloom-7b1.json b/evals/truthfulqa-mc/truthfulqa_uk_mc_bloom-7b1.json deleted file mode 100644 index 2a55f54ab6ab50194bfa1058aacbecc18b36d6e7..0000000000000000000000000000000000000000 --- a/evals/truthfulqa-mc/truthfulqa_uk_mc_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_uk_mc": { - "mc1": 0.3082901554404145, - "mc1_stderr": 0.016630856554976103, - "mc2": 0.5156453949784039, - "mc2_stderr": 0.01673540498425732 - } - }, - "versions": { - "truthfulqa_uk_mc": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_uk_mc_llama-7B.json b/evals/truthfulqa-mc/truthfulqa_uk_mc_llama-7B.json deleted file mode 100644 index 87ffa1a02265b9ec13f193b53fba9b06f985e7a2..0000000000000000000000000000000000000000 --- a/evals/truthfulqa-mc/truthfulqa_uk_mc_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_uk_mc": { - "mc1": 0.23575129533678757, - "mc1_stderr": 0.015286822062573322, - "mc2": 0.41551850845167937, - "mc2_stderr": 0.01559551532730194 - } - }, - "versions": { - "truthfulqa_uk_mc": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_vi_mc_bloom-7b1.json b/evals/truthfulqa-mc/truthfulqa_vi_mc_bloom-7b1.json deleted file mode 100644 index 641e07a270f97ae74adc933fcaaf2f17f0cc2720..0000000000000000000000000000000000000000 --- a/evals/truthfulqa-mc/truthfulqa_vi_mc_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_vi_mc": { - "mc1": 0.2969543147208122, - "mc1_stderr": 0.01628730493420265, - "mc2": 0.44687544361363724, - "mc2_stderr": 0.015032707389451902 - } - }, - "versions": { - "truthfulqa_vi_mc": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_vi_mc_llama-7B.json b/evals/truthfulqa-mc/truthfulqa_vi_mc_llama-7B.json deleted file mode 100644 index 281dd4ecf9b86e311de0e817f9acf01943305b44..0000000000000000000000000000000000000000 --- a/evals/truthfulqa-mc/truthfulqa_vi_mc_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_vi_mc": { - "mc1": 0.2436548223350254, - "mc1_stderr": 0.015302421509379252, - "mc2": 0.42906776165158894, - "mc2_stderr": 0.016213220197264143 - } - }, - "versions": { - "truthfulqa_vi_mc": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_zh_mc_bloom-7b1.json b/evals/truthfulqa-mc/truthfulqa_zh_mc_bloom-7b1.json deleted file mode 100644 index ccc762b26a77cd8c55bbb320f1c81e3b51e30910..0000000000000000000000000000000000000000 --- a/evals/truthfulqa-mc/truthfulqa_zh_mc_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_zh_mc": { - "mc1": 0.22727272727272727, - "mc1_stderr": 0.014900421035751319, - "mc2": 0.3872774224063368, - "mc2_stderr": 0.01489618179042084 - } - }, - "versions": { - "truthfulqa_zh_mc": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_zh_mc_llama-7B.json b/evals/truthfulqa-mc/truthfulqa_zh_mc_llama-7B.json deleted file mode 100644 index 5e49b170e61cb016ddf2105ccf2469c2fd884a24..0000000000000000000000000000000000000000 --- a/evals/truthfulqa-mc/truthfulqa_zh_mc_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_zh_mc": { - "mc1": 0.26515151515151514, - "mc1_stderr": 0.015694869766795665, - "mc2": 0.43429601246293487, - "mc2_stderr": 0.015796890327346987 - } - }, - "versions": { - "truthfulqa_zh_mc": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_ar-bloom-7b1.json b/evals/truthfulqa/truthfulqa_ar-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..4ecb61811afa7d48353c2bef8d82befffceceb07 --- /dev/null +++ b/evals/truthfulqa/truthfulqa_ar-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_ar": { + "mc1": 0.26002587322121606, + "mc1_stderr": 0.015787301353849415, + "mc2": 0.4256353881905651, + "mc2_stderr": 0.015737567507798107 + } + }, + "versions": { + "truthfulqa_ar": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc-challenge/arc_ml_challenge_gpt2.json b/evals/truthfulqa/truthfulqa_ar-gpt2.json similarity index 53% rename from evals/arc-challenge/arc_ml_challenge_gpt2.json rename to evals/truthfulqa/truthfulqa_ar-gpt2.json index 0c8fc7d983c690076289a5040bce6204cb0b9146..f83b2bef80b7c2c4a74c05764b7e0d0996d4b489 100644 --- a/evals/arc-challenge/arc_ml_challenge_gpt2.json +++ b/evals/truthfulqa/truthfulqa_ar-gpt2.json @@ -1,19 +1,19 @@ { "results": { - "arc_ml_challenge": { - "acc": 0.25, - "acc_stderr": 0.025210974204480537, - "acc_norm": 0.21283783783783783, - "acc_norm_stderr": 0.023831178311967415 + "truthfulqa_ar": { + "mc1": 0.23932729624838292, + "mc1_stderr": 0.015356292760819215, + "mc2": 0.44027391572034885, + "mc2_stderr": 0.01696958534622728 } }, "versions": { - "arc_ml_challenge": 0 + "truthfulqa_ar": 1 }, "config": { "model": "hf-auto", "model_args": "pretrained=gpt2", - "batch_size": "1", + "batch_size": 1, "device": "cuda", "no_cache": false, "limit": null, diff --git a/evals/truthfulqa/truthfulqa_ar-llama-7B.json b/evals/truthfulqa/truthfulqa_ar-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..8eaf03b60bf7c8428a848aa8ce0dceeb1b8649da --- /dev/null +++ b/evals/truthfulqa/truthfulqa_ar-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_ar": { + "mc1": 0.278137128072445, + "mc1_stderr": 0.016126799456170973, + "mc2": 0.4510826498021589, + "mc2_stderr": 0.01621099626555797 + } + }, + "versions": { + "truthfulqa_ar": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_da_mc_bloom-7b1.json b/evals/truthfulqa/truthfulqa_bn-bloom-7b1.json similarity index 56% rename from evals/truthfulqa-mc/truthfulqa_da_mc_bloom-7b1.json rename to evals/truthfulqa/truthfulqa_bn-bloom-7b1.json index e55ee209ca0f7da10707018a73476230d0beb314..3f0f5acb8958dae16338d6f3538d1c45fd1d5be8 100644 --- a/evals/truthfulqa-mc/truthfulqa_da_mc_bloom-7b1.json +++ b/evals/truthfulqa/truthfulqa_bn-bloom-7b1.json @@ -1,19 +1,19 @@ { "results": { - "truthfulqa_da_mc": { + "truthfulqa_bn": { "mc1": 0.26248399487836105, "mc1_stderr": 0.015753963575796108, - "mc2": 0.4375025988127948, - "mc2_stderr": 0.01662443223981383 + "mc2": 0.48383834952509674, + "mc2_stderr": 0.01620495508989729 } }, "versions": { - "truthfulqa_da_mc": 1 + "truthfulqa_bn": 1 }, "config": { "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, "device": "cuda", "no_cache": false, "limit": null, diff --git a/evals/truthfulqa/truthfulqa_bn-llama-7B.json b/evals/truthfulqa/truthfulqa_bn-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..3c9c3b9489ea6ca298a17d5e7f442b2a42217543 --- /dev/null +++ b/evals/truthfulqa/truthfulqa_bn-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_bn": { + "mc1": 0.2765685019206146, + "mc1_stderr": 0.016015952210618845, + "mc2": 0.5123820777474262, + "mc2_stderr": 0.01680032112327857 + } + }, + "versions": { + "truthfulqa_bn": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_ca-bloom-7b1.json b/evals/truthfulqa/truthfulqa_ca-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..ef3e258e39add637921d92a92ce41f916a905cce --- /dev/null +++ b/evals/truthfulqa/truthfulqa_ca-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_ca": { + "mc1": 0.24324324324324326, + "mc1_stderr": 0.015401665455019378, + "mc2": 0.4007618819736215, + "mc2_stderr": 0.015273518926419462 + } + }, + "versions": { + "truthfulqa_ca": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_ca-llama-7B.json b/evals/truthfulqa/truthfulqa_ca-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..279d4a6dd8300c3fdf93c1251995060f831d8f3d --- /dev/null +++ b/evals/truthfulqa/truthfulqa_ca-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_ca": { + "mc1": 0.23423423423423423, + "mc1_stderr": 0.015203455154765249, + "mc2": 0.3889981216363435, + "mc2_stderr": 0.015057090749567676 + } + }, + "versions": { + "truthfulqa_ca": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_da-bloom-7b1.json b/evals/truthfulqa/truthfulqa_da-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..74bcde7ba97432b4b569a73b77198ee611a380d0 --- /dev/null +++ b/evals/truthfulqa/truthfulqa_da-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_da": { + "mc1": 0.26248399487836105, + "mc1_stderr": 0.01575396357579612, + "mc2": 0.4375025988127945, + "mc2_stderr": 0.01662443223981383 + } + }, + "versions": { + "truthfulqa_da": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_da_mc_llama-7B.json b/evals/truthfulqa/truthfulqa_da-llama-7B.json similarity index 70% rename from evals/truthfulqa-mc/truthfulqa_da_mc_llama-7B.json rename to evals/truthfulqa/truthfulqa_da-llama-7B.json index 1b7cb2557be3886ead061adba89f89d50eefb9dd..08c1d956bd1de9206944f2438d9f56022794d2d5 100644 --- a/evals/truthfulqa-mc/truthfulqa_da_mc_llama-7B.json +++ b/evals/truthfulqa/truthfulqa_da-llama-7B.json @@ -1,19 +1,19 @@ { "results": { - "truthfulqa_da_mc": { + "truthfulqa_da": { "mc1": 0.2573623559539053, "mc1_stderr": 0.01565358047400349, - "mc2": 0.4161317873775416, - "mc2_stderr": 0.015138516880476799 + "mc2": 0.4161317873775415, + "mc2_stderr": 0.015138516880476807 } }, "versions": { - "truthfulqa_da_mc": 1 + "truthfulqa_da": 1 }, "config": { "model": "hf-auto", "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", + "batch_size": 1, "device": "cuda", "no_cache": false, "limit": null, diff --git a/evals/truthfulqa-mc/truthfulqa_de_mc_bloom-7b1.json b/evals/truthfulqa/truthfulqa_de-bloom-7b1.json similarity index 56% rename from evals/truthfulqa-mc/truthfulqa_de_mc_bloom-7b1.json rename to evals/truthfulqa/truthfulqa_de-bloom-7b1.json index f9009861966dc1cff1e1868b91e2bb41bfccd0f4..068e8c49c1d499f40d02aeb1b4037569845e3f39 100644 --- a/evals/truthfulqa-mc/truthfulqa_de_mc_bloom-7b1.json +++ b/evals/truthfulqa/truthfulqa_de-bloom-7b1.json @@ -1,19 +1,19 @@ { "results": { - "truthfulqa_de_mc": { + "truthfulqa_de": { "mc1": 0.24746192893401014, - "mc1_stderr": 0.015382646812261827, - "mc2": 0.4351673407370902, + "mc1_stderr": 0.015382646812261825, + "mc2": 0.43516734073709074, "mc2_stderr": 0.015914493454090475 } }, "versions": { - "truthfulqa_de_mc": 1 + "truthfulqa_de": 1 }, "config": { "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, "device": "cuda", "no_cache": false, "limit": null, diff --git a/evals/truthfulqa-mc/truthfulqa_de_mc_llama-7B.json b/evals/truthfulqa/truthfulqa_de-llama-7B.json similarity index 62% rename from evals/truthfulqa-mc/truthfulqa_de_mc_llama-7B.json rename to evals/truthfulqa/truthfulqa_de-llama-7B.json index 37147ee36d47e8dd84509b2c477c0c4563f0a7c9..870d9cc5a8bc73c2ca376de43d027b704b474970 100644 --- a/evals/truthfulqa-mc/truthfulqa_de_mc_llama-7B.json +++ b/evals/truthfulqa/truthfulqa_de-llama-7B.json @@ -1,19 +1,19 @@ { "results": { - "truthfulqa_de_mc": { + "truthfulqa_de": { "mc1": 0.233502538071066, - "mc1_stderr": 0.015080432502225447, - "mc2": 0.383224305558326, - "mc2_stderr": 0.014662714095686993 + "mc1_stderr": 0.015080432502225448, + "mc2": 0.38322430555832593, + "mc2_stderr": 0.014662714095687 } }, "versions": { - "truthfulqa_de_mc": 1 + "truthfulqa_de": 1 }, "config": { "model": "hf-auto", "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", + "batch_size": 1, "device": "cuda", "no_cache": false, "limit": null, diff --git a/evals/truthfulqa/truthfulqa_es-bloom-7b1.json b/evals/truthfulqa/truthfulqa_es-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..ff2caf3355fd7554ac124714fa094f7631c4b942 --- /dev/null +++ b/evals/truthfulqa/truthfulqa_es-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_es": { + "mc1": 0.24714828897338403, + "mc1_stderr": 0.015366339219335662, + "mc2": 0.4037104105160595, + "mc2_stderr": 0.014621192787404666 + } + }, + "versions": { + "truthfulqa_es": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_es-llama-7B.json b/evals/truthfulqa/truthfulqa_es-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..57d59d5a6d7fcd5e98b4558ed333d506ab551069 --- /dev/null +++ b/evals/truthfulqa/truthfulqa_es-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_es": { + "mc1": 0.22686945500633712, + "mc1_stderr": 0.014919398735157142, + "mc2": 0.3704736235055417, + "mc2_stderr": 0.014441434139778718 + } + }, + "versions": { + "truthfulqa_es": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_eu-bloom-7b1.json b/evals/truthfulqa/truthfulqa_eu-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..0af0c1ab614e35a49f6251d7b28e594279fd4640 --- /dev/null +++ b/evals/truthfulqa/truthfulqa_eu-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_eu": { + "mc1": 0.26098191214470284, + "mc1_stderr": 0.015795849655411115, + "mc2": 0.4458532690626118, + "mc2_stderr": 0.016282676760451684 + } + }, + "versions": { + "truthfulqa_eu": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_eu-llama-7B.json b/evals/truthfulqa/truthfulqa_eu-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..173bbf1cdee4e48adcce1026ba92eea153711152 --- /dev/null +++ b/evals/truthfulqa/truthfulqa_eu-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_eu": { + "mc1": 0.22739018087855298, + "mc1_stderr": 0.015075655972442521, + "mc2": 0.4067861653338961, + "mc2_stderr": 0.016617765169363637 + } + }, + "versions": { + "truthfulqa_eu": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_fr-bloom-7b1.json b/evals/truthfulqa/truthfulqa_fr-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..59d411be1a435aa79d393d5234b98b20153fa489 --- /dev/null +++ b/evals/truthfulqa/truthfulqa_fr-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_fr": { + "mc1": 0.2604828462515883, + "mc1_stderr": 0.015654976408037494, + "mc2": 0.40875422704780084, + "mc2_stderr": 0.014771598297171899 + } + }, + "versions": { + "truthfulqa_fr": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_fr-llama-7B.json b/evals/truthfulqa/truthfulqa_fr-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..f2cf1301239dab8cdd09c7e41a803f442a37aaff --- /dev/null +++ b/evals/truthfulqa/truthfulqa_fr-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_fr": { + "mc1": 0.2388818297331639, + "mc1_stderr": 0.015209198584184304, + "mc2": 0.3992160965584639, + "mc2_stderr": 0.014275541507345014 + } + }, + "versions": { + "truthfulqa_fr": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_gu-bloom-7b1.json b/evals/truthfulqa/truthfulqa_gu-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..2e428d6ce6e3db9502a089fe9c54da6bd4d4e2fa --- /dev/null +++ b/evals/truthfulqa/truthfulqa_gu-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_gu": { + "mc1": 0.2585499316005472, + "mc1_stderr": 0.016205100857272815, + "mc2": 0.4553767987804663, + "mc2_stderr": 0.01727282663518889 + } + }, + "versions": { + "truthfulqa_gu": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_gu-llama-7B.json b/evals/truthfulqa/truthfulqa_gu-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..a439f0578967f86f0d5cd4f63d5c8655fa596680 --- /dev/null +++ b/evals/truthfulqa/truthfulqa_gu-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_gu": { + "mc1": 0.2612859097127223, + "mc1_stderr": 0.016260532228493024, + "mc2": 0.42794967344995166, + "mc2_stderr": 0.017270715140237876 + } + }, + "versions": { + "truthfulqa_gu": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_hi-bloom-7b1.json b/evals/truthfulqa/truthfulqa_hi-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..8576765f053944525c9eb8954a99cd9ce76a4d1c --- /dev/null +++ b/evals/truthfulqa/truthfulqa_hi-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_hi": { + "mc1": 0.2613195342820181, + "mc1_stderr": 0.01581268409688839, + "mc2": 0.44399239540333224, + "mc2_stderr": 0.015881067623592954 + } + }, + "versions": { + "truthfulqa_hi": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_hi-llama-7B.json b/evals/truthfulqa/truthfulqa_hi-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..e21366d36ceaf8601da21d648ee943852d911560 --- /dev/null +++ b/evals/truthfulqa/truthfulqa_hi-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_hi": { + "mc1": 0.2794307891332471, + "mc1_stderr": 0.016149769533382482, + "mc2": 0.47236250377441935, + "mc2_stderr": 0.016709755014514986 + } + }, + "versions": { + "truthfulqa_hi": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_hr-bloom-7b1.json b/evals/truthfulqa/truthfulqa_hr-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..672cbb9e39a1a7e019ee45709b90eec7588d5235 --- /dev/null +++ b/evals/truthfulqa/truthfulqa_hr-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_hr": { + "mc1": 0.2808842652795839, + "mc1_stderr": 0.016217447153754203, + "mc2": 0.4793142433106635, + "mc2_stderr": 0.01663884163172186 + } + }, + "versions": { + "truthfulqa_hr": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_hr-llama-7B.json b/evals/truthfulqa/truthfulqa_hr-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..3d1d11b77357870c8e0a53dcbafb4e8980c01f9f --- /dev/null +++ b/evals/truthfulqa/truthfulqa_hr-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_hr": { + "mc1": 0.24187256176853056, + "mc1_stderr": 0.015451967985505181, + "mc2": 0.41709863857620866, + "mc2_stderr": 0.01546097371205123 + } + }, + "versions": { + "truthfulqa_hr": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_hu-bloom-7b1.json b/evals/truthfulqa/truthfulqa_hu-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..54432301293d130afd643eb21b0db15d9f209b67 --- /dev/null +++ b/evals/truthfulqa/truthfulqa_hu-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_hu": { + "mc1": 0.26718547341115434, + "mc1_stderr": 0.015946232556288537, + "mc2": 0.49994152241197887, + "mc2_stderr": 0.01703257765685213 + } + }, + "versions": { + "truthfulqa_hu": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_hu-llama-7B.json b/evals/truthfulqa/truthfulqa_hu-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..ccaefb69215b32c9208f055af2f3a1cf9c8760bc --- /dev/null +++ b/evals/truthfulqa/truthfulqa_hu-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_hu": { + "mc1": 0.24643320363164722, + "mc1_stderr": 0.015529773657188122, + "mc2": 0.4311628343540659, + "mc2_stderr": 0.01555491548978951 + } + }, + "versions": { + "truthfulqa_hu": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_hy-bloom-7b1.json b/evals/truthfulqa/truthfulqa_hy-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..debcc1a8876d402702e3c9c496eb89bc3ad0f709 --- /dev/null +++ b/evals/truthfulqa/truthfulqa_hy-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_hy": { + "mc1": 0.2585895117540687, + "mc1_stderr": 0.018636539619637415, + "mc2": 0.44943643103428205, + "mc2_stderr": 0.02033094239607556 + } + }, + "versions": { + "truthfulqa_hy": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_hy-llama-7B.json b/evals/truthfulqa/truthfulqa_hy-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..433e953ddf49c551d21da840cc57c95f665a192a --- /dev/null +++ b/evals/truthfulqa/truthfulqa_hy-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_hy": { + "mc1": 0.2585895117540687, + "mc1_stderr": 0.018636539619637415, + "mc2": 0.4550713950263578, + "mc2_stderr": 0.020036965332656535 + } + }, + "versions": { + "truthfulqa_hy": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_id-bloom-7b1.json b/evals/truthfulqa/truthfulqa_id-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..d6ab9911631d5cf4f7387d705739f249f1da7de2 --- /dev/null +++ b/evals/truthfulqa/truthfulqa_id-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_id": { + "mc1": 0.2532133676092545, + "mc1_stderr": 0.01560023256901984, + "mc2": 0.4031249320049949, + "mc2_stderr": 0.015031705347347539 + } + }, + "versions": { + "truthfulqa_id": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_id-llama-7B.json b/evals/truthfulqa/truthfulqa_id-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..0967fc5439ed4e2c5217256c546b2f76aa443e6b --- /dev/null +++ b/evals/truthfulqa/truthfulqa_id-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_id": { + "mc1": 0.2570694087403599, + "mc1_stderr": 0.015677933234808462, + "mc2": 0.3981714076698207, + "mc2_stderr": 0.015520404506158571 + } + }, + "versions": { + "truthfulqa_id": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_it-bloom-7b1.json b/evals/truthfulqa/truthfulqa_it-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..9599a6d59070c187811a37aa2dcaec596f4e300c --- /dev/null +++ b/evals/truthfulqa/truthfulqa_it-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_it": { + "mc1": 0.2707535121328225, + "mc1_stderr": 0.015889888362560486, + "mc2": 0.4374801864181257, + "mc2_stderr": 0.015955762711633903 + } + }, + "versions": { + "truthfulqa_it": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_it-llama-7B.json b/evals/truthfulqa/truthfulqa_it-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..221af91b2b82bf70d904265c27c0279db93872af --- /dev/null +++ b/evals/truthfulqa/truthfulqa_it-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_it": { + "mc1": 0.24521072796934865, + "mc1_stderr": 0.015384352284543929, + "mc2": 0.39642666716879443, + "mc2_stderr": 0.01483705265700183 + } + }, + "versions": { + "truthfulqa_it": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_kn-bloom-7b1.json b/evals/truthfulqa/truthfulqa_kn-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..b116af421e76c9c9f0d685f0a1156de33d48fa41 --- /dev/null +++ b/evals/truthfulqa/truthfulqa_kn-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_kn": { + "mc1": 0.28466076696165193, + "mc1_stderr": 0.017343050775840425, + "mc2": 0.49109028617714945, + "mc2_stderr": 0.017608862092749467 + } + }, + "versions": { + "truthfulqa_kn": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_kn-llama-7B.json b/evals/truthfulqa/truthfulqa_kn-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..f05f0339406ac5574d7a1dc62bddacb292f097eb --- /dev/null +++ b/evals/truthfulqa/truthfulqa_kn-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_kn": { + "mc1": 0.275811209439528, + "mc1_stderr": 0.017176612615872052, + "mc2": 0.4635130117214921, + "mc2_stderr": 0.01825683954680752 + } + }, + "versions": { + "truthfulqa_kn": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_ml-bloom-7b1.json b/evals/truthfulqa/truthfulqa_ml-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..d2ada8ce66115bbf7e7e2ac501b996bc7b9ab3a1 --- /dev/null +++ b/evals/truthfulqa/truthfulqa_ml-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_ml": { + "mc1": 0.260806916426513, + "mc1_stderr": 0.01667907195342198, + "mc2": 0.47996911862138697, + "mc2_stderr": 0.017778690252427683 + } + }, + "versions": { + "truthfulqa_ml": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_ml-llama-7B.json b/evals/truthfulqa/truthfulqa_ml-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..4dd3caeb8a76c583e812d275589a2c18156d6935 --- /dev/null +++ b/evals/truthfulqa/truthfulqa_ml-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_ml": { + "mc1": 0.2824207492795389, + "mc1_stderr": 0.01710080754090615, + "mc2": 0.5024391989231584, + "mc2_stderr": 0.017936047828800445 + } + }, + "versions": { + "truthfulqa_ml": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_mr-bloom-7b1.json b/evals/truthfulqa/truthfulqa_mr-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..181033bdf126dc47bfc09557ea24531f4fead727 --- /dev/null +++ b/evals/truthfulqa/truthfulqa_mr-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_mr": { + "mc1": 0.2761780104712042, + "mc1_stderr": 0.016186321628712155, + "mc2": 0.4765064151203332, + "mc2_stderr": 0.016772466571288412 + } + }, + "versions": { + "truthfulqa_mr": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_mr-llama-7B.json b/evals/truthfulqa/truthfulqa_mr-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..a1fcd59738ae0b14a296aba32a13e2bda55370e3 --- /dev/null +++ b/evals/truthfulqa/truthfulqa_mr-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_mr": { + "mc1": 0.2905759162303665, + "mc1_stderr": 0.016436922328865435, + "mc2": 0.49306373435254724, + "mc2_stderr": 0.016980148211258952 + } + }, + "versions": { + "truthfulqa_mr": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_ne-bloom-7b1.json b/evals/truthfulqa/truthfulqa_ne-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..89defff7cdf83326b83aee4c35f6b7ab666393c0 --- /dev/null +++ b/evals/truthfulqa/truthfulqa_ne-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_ne": { + "mc1": 0.28811369509043927, + "mc1_stderr": 0.0162891162717815, + "mc2": 0.46164155205805624, + "mc2_stderr": 0.016689007834004295 + } + }, + "versions": { + "truthfulqa_ne": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_ne-llama-7B.json b/evals/truthfulqa/truthfulqa_ne-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..b18b50165478e2f5e3938b2978e51ae65ffb09b0 --- /dev/null +++ b/evals/truthfulqa/truthfulqa_ne-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_ne": { + "mc1": 0.29198966408268734, + "mc1_stderr": 0.016353615824015625, + "mc2": 0.4636310825029969, + "mc2_stderr": 0.016928691048242774 + } + }, + "versions": { + "truthfulqa_ne": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_nl-bloom-7b1.json b/evals/truthfulqa/truthfulqa_nl-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..d1cfa8f1fcea4cc13a3119d8a4cf2b83a9a5a879 --- /dev/null +++ b/evals/truthfulqa/truthfulqa_nl-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_nl": { + "mc1": 0.25477707006369427, + "mc1_stderr": 0.01556199397314563, + "mc2": 0.42677675918475044, + "mc2_stderr": 0.016186878668566846 + } + }, + "versions": { + "truthfulqa_nl": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_nl_mc_llama-7B.json b/evals/truthfulqa/truthfulqa_nl-llama-7B.json similarity index 62% rename from evals/truthfulqa-mc/truthfulqa_nl_mc_llama-7B.json rename to evals/truthfulqa/truthfulqa_nl-llama-7B.json index 22e9d2c488076c5884e9224d8636a092fac4fe96..9646b968c2b96cfe4136c6e86627780bff5218ce 100644 --- a/evals/truthfulqa-mc/truthfulqa_nl_mc_llama-7B.json +++ b/evals/truthfulqa/truthfulqa_nl-llama-7B.json @@ -1,19 +1,19 @@ { "results": { - "truthfulqa_nl_mc": { + "truthfulqa_nl": { "mc1": 0.24331210191082803, - "mc1_stderr": 0.015324355488601159, - "mc2": 0.40023342153314706, - "mc2_stderr": 0.014679036703865578 + "mc1_stderr": 0.015324355488601135, + "mc2": 0.40023342153314656, + "mc2_stderr": 0.014679036703865582 } }, "versions": { - "truthfulqa_nl_mc": 1 + "truthfulqa_nl": 1 }, "config": { "model": "hf-auto", "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", + "batch_size": 1, "device": "cuda", "no_cache": false, "limit": null, diff --git a/evals/truthfulqa/truthfulqa_pt-bloom-7b1.json b/evals/truthfulqa/truthfulqa_pt-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..d9c6cefe30e562acfb981870f9e593f27f720a3d --- /dev/null +++ b/evals/truthfulqa/truthfulqa_pt-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_pt": { + "mc1": 0.23857868020304568, + "mc1_stderr": 0.015192910034567013, + "mc2": 0.38894722340741417, + "mc2_stderr": 0.014531269277587645 + } + }, + "versions": { + "truthfulqa_pt": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_pt_mc_llama-7B.json b/evals/truthfulqa/truthfulqa_pt-llama-7B.json similarity index 69% rename from evals/truthfulqa-mc/truthfulqa_pt_mc_llama-7B.json rename to evals/truthfulqa/truthfulqa_pt-llama-7B.json index 7084df35e971145794041e3080344faabab95729..1ae678becb49d878dc30174f2c390f2c1b5a1f49 100644 --- a/evals/truthfulqa-mc/truthfulqa_pt_mc_llama-7B.json +++ b/evals/truthfulqa/truthfulqa_pt-llama-7B.json @@ -1,19 +1,19 @@ { "results": { - "truthfulqa_pt_mc": { + "truthfulqa_pt": { "mc1": 0.22842639593908629, - "mc1_stderr": 0.014964922033138024, + "mc1_stderr": 0.014964922033138022, "mc2": 0.3823261607330551, - "mc2_stderr": 0.01463319398314419 + "mc2_stderr": 0.014633193983144183 } }, "versions": { - "truthfulqa_pt_mc": 1 + "truthfulqa_pt": 1 }, "config": { "model": "hf-auto", "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", + "batch_size": 1, "device": "cuda", "no_cache": false, "limit": null, diff --git a/evals/truthfulqa/truthfulqa_ro-bloom-7b1.json b/evals/truthfulqa/truthfulqa_ro-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..e9d6490be6beab45fd85e68d9df1e301bf2dff28 --- /dev/null +++ b/evals/truthfulqa/truthfulqa_ro-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_ro": { + "mc1": 0.26187419768934533, + "mc1_stderr": 0.015762378425124946, + "mc2": 0.4605371384706094, + "mc2_stderr": 0.016307442681458683 + } + }, + "versions": { + "truthfulqa_ro": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_ro-llama-7B.json b/evals/truthfulqa/truthfulqa_ro-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..26abd62509f8f15981ca8051421f879ea16ddc2f --- /dev/null +++ b/evals/truthfulqa/truthfulqa_ro-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_ro": { + "mc1": 0.22849807445442877, + "mc1_stderr": 0.015052893222788351, + "mc2": 0.37047262828252514, + "mc2_stderr": 0.015022205435273333 + } + }, + "versions": { + "truthfulqa_ro": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_ru-bloom-7b1.json b/evals/truthfulqa/truthfulqa_ru-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..3347a51ef0c14c0658f692111f9112b52f876a5c --- /dev/null +++ b/evals/truthfulqa/truthfulqa_ru-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_ru": { + "mc1": 0.30710659898477155, + "mc1_stderr": 0.016443354533552747, + "mc2": 0.49874761323987404, + "mc2_stderr": 0.016167778359600482 + } + }, + "versions": { + "truthfulqa_ru": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_ru-llama-7B.json b/evals/truthfulqa/truthfulqa_ru-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..54b06b11d61f59c9f47d987a96a9290c09921a27 --- /dev/null +++ b/evals/truthfulqa/truthfulqa_ru-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_ru": { + "mc1": 0.24619289340101522, + "mc1_stderr": 0.015356084872692898, + "mc2": 0.40938277991151933, + "mc2_stderr": 0.015252017769860154 + } + }, + "versions": { + "truthfulqa_ru": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_sk-bloom-7b1.json b/evals/truthfulqa/truthfulqa_sk-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..1132cb125d8848afa4abc9ecef17405375f5ccc0 --- /dev/null +++ b/evals/truthfulqa/truthfulqa_sk-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_sk": { + "mc1": 0.2390745501285347, + "mc1_stderr": 0.015301260856408254, + "mc2": 0.43782616190313467, + "mc2_stderr": 0.01657761354751216 + } + }, + "versions": { + "truthfulqa_sk": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_sk-llama-7B.json b/evals/truthfulqa/truthfulqa_sk-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..71e866145020816a8524a8bd50cddf94af5042ea --- /dev/null +++ b/evals/truthfulqa/truthfulqa_sk-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_sk": { + "mc1": 0.2275064267352185, + "mc1_stderr": 0.015039512631474048, + "mc2": 0.40729144857566124, + "mc2_stderr": 0.015845697731465 + } + }, + "versions": { + "truthfulqa_sk": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_sr-bloom-7b1.json b/evals/truthfulqa/truthfulqa_sr-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..75efa51eca0c0d99414987b87632f9c19f581a21 --- /dev/null +++ b/evals/truthfulqa/truthfulqa_sr-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_sr": { + "mc1": 0.2878980891719745, + "mc1_stderr": 0.016170834614246097, + "mc2": 0.4604993074094113, + "mc2_stderr": 0.01649631560714403 + } + }, + "versions": { + "truthfulqa_sr": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_sr-llama-7B.json b/evals/truthfulqa/truthfulqa_sr-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..a65b681172e15a187d19448a27058ec125e2b1f1 --- /dev/null +++ b/evals/truthfulqa/truthfulqa_sr-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_sr": { + "mc1": 0.26878980891719745, + "mc1_stderr": 0.01583322873155152, + "mc2": 0.422701657829082, + "mc2_stderr": 0.015374851085961157 + } + }, + "versions": { + "truthfulqa_sr": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa-mc/truthfulqa_sv_mc_bloom-7b1.json b/evals/truthfulqa/truthfulqa_sv-bloom-7b1.json similarity index 64% rename from evals/truthfulqa-mc/truthfulqa_sv_mc_bloom-7b1.json rename to evals/truthfulqa/truthfulqa_sv-bloom-7b1.json index 9885cf6375b817aa059b00ca8a5df86a2f6bbce4..85698716bf120fd641d6dfcb551bdb145d17bc87 100644 --- a/evals/truthfulqa-mc/truthfulqa_sv_mc_bloom-7b1.json +++ b/evals/truthfulqa/truthfulqa_sv-bloom-7b1.json @@ -1,19 +1,19 @@ { "results": { - "truthfulqa_sv_mc": { + "truthfulqa_sv": { "mc1": 0.2622739018087855, "mc1_stderr": 0.015821052272364522, - "mc2": 0.4457248931967088, + "mc2": 0.44572489319670916, "mc2_stderr": 0.016517364176123605 } }, "versions": { - "truthfulqa_sv_mc": 1 + "truthfulqa_sv": 1 }, "config": { "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, "device": "cuda", "no_cache": false, "limit": null, diff --git a/evals/truthfulqa-mc/truthfulqa_ar_mc_bloom-7b1.json b/evals/truthfulqa/truthfulqa_sv-llama-7B.json similarity index 68% rename from evals/truthfulqa-mc/truthfulqa_ar_mc_bloom-7b1.json rename to evals/truthfulqa/truthfulqa_sv-llama-7B.json index d572bf57654e75d51e028e16a79aa73942dadca1..f2f88649e17469e2a7fdc44f296619fe407feac6 100644 --- a/evals/truthfulqa-mc/truthfulqa_ar_mc_bloom-7b1.json +++ b/evals/truthfulqa/truthfulqa_sv-llama-7B.json @@ -1,19 +1,19 @@ { "results": { - "truthfulqa_ar_mc": { + "truthfulqa_sv": { "mc1": 0.2596899224806202, "mc1_stderr": 0.01577046983489191, - "mc2": 0.4250856388236661, - "mc2_stderr": 0.01572683307613003 + "mc2": 0.4052891370296314, + "mc2_stderr": 0.01500679891573553 } }, "versions": { - "truthfulqa_ar_mc": 1 + "truthfulqa_sv": 1 }, "config": { "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/bloom-7b1", - "batch_size": "1", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, "device": "cuda", "no_cache": false, "limit": null, diff --git a/evals/truthfulqa/truthfulqa_ta-bloom-7b1.json b/evals/truthfulqa/truthfulqa_ta-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..956d773e26ebf10fc23669bb18d5b9df924be462 --- /dev/null +++ b/evals/truthfulqa/truthfulqa_ta-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_ta": { + "mc1": 0.2651413189771198, + "mc1_stderr": 0.016204613164182584, + "mc2": 0.48348066773619114, + "mc2_stderr": 0.016887213348384833 + } + }, + "versions": { + "truthfulqa_ta": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_ta-llama-7B.json b/evals/truthfulqa/truthfulqa_ta-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..3edaa546d22cbb705a02af8433a7b3ecb4f29213 --- /dev/null +++ b/evals/truthfulqa/truthfulqa_ta-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_ta": { + "mc1": 0.28263795423956933, + "mc1_stderr": 0.016530366611189357, + "mc2": 0.5032626048969708, + "mc2_stderr": 0.01719880976895468 + } + }, + "versions": { + "truthfulqa_ta": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_te-bloom-7b1.json b/evals/truthfulqa/truthfulqa_te-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..d139c759617d41dd724dc54443b08c7eba5c2a83 --- /dev/null +++ b/evals/truthfulqa/truthfulqa_te-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_te": { + "mc1": 0.2652482269503546, + "mc1_stderr": 0.016638349265004355, + "mc2": 0.4612285746093752, + "mc2_stderr": 0.017504699336599025 + } + }, + "versions": { + "truthfulqa_te": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_te-llama-7B.json b/evals/truthfulqa/truthfulqa_te-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..b7371487cfa5f3b205258d0c63aa1d722e304a75 --- /dev/null +++ b/evals/truthfulqa/truthfulqa_te-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_te": { + "mc1": 0.2851063829787234, + "mc1_stderr": 0.01701523103469595, + "mc2": 0.4821795923320059, + "mc2_stderr": 0.01784811574301116 + } + }, + "versions": { + "truthfulqa_te": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_uk-bloom-7b1.json b/evals/truthfulqa/truthfulqa_uk-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..da866d1706ae757888bf53041acc427c30e98a06 --- /dev/null +++ b/evals/truthfulqa/truthfulqa_uk-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_uk": { + "mc1": 0.3090909090909091, + "mc1_stderr": 0.01666442755255745, + "mc2": 0.5143873310692731, + "mc2_stderr": 0.016755211041268873 + } + }, + "versions": { + "truthfulqa_uk": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_uk-llama-7B.json b/evals/truthfulqa/truthfulqa_uk-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..3a420b35b0478fcc320798e8287f213c061df4fe --- /dev/null +++ b/evals/truthfulqa/truthfulqa_uk-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_uk": { + "mc1": 0.23636363636363636, + "mc1_stderr": 0.015320412612327241, + "mc2": 0.4141829984231552, + "mc2_stderr": 0.01560702677887637 + } + }, + "versions": { + "truthfulqa_uk": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc-challenge/arc_hy_challenge_bloom-560.json b/evals/truthfulqa/truthfulqa_vi-bloom-7b1.json similarity index 50% rename from evals/arc-challenge/arc_hy_challenge_bloom-560.json rename to evals/truthfulqa/truthfulqa_vi-bloom-7b1.json index 38b99f7004830ebf484274ad893c53cff9de33a4..f21113c3d005bd269763438b047147bb50ac5125 100644 --- a/evals/arc-challenge/arc_hy_challenge_bloom-560.json +++ b/evals/truthfulqa/truthfulqa_vi-bloom-7b1.json @@ -1,19 +1,19 @@ { "results": { - "arc_hy_challenge": { - "acc": 0.19655172413793104, - "acc_stderr": 0.023375906908472157, - "acc_norm": 0.2482758620689655, - "acc_norm_stderr": 0.02541251077219611 + "truthfulqa_vi": { + "mc1": 0.2968152866242038, + "mc1_stderr": 0.016316229722585934, + "mc2": 0.44721474578334436, + "mc2_stderr": 0.015073430494043749 } }, "versions": { - "arc_hy_challenge": 0 + "truthfulqa_vi": 1 }, "config": { "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-560m", - "batch_size": "1", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, "device": "cuda", "no_cache": false, "limit": null, diff --git a/evals/truthfulqa/truthfulqa_vi-llama-7B.json b/evals/truthfulqa/truthfulqa_vi-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..bc5992da0821ee82c8ce26e99fb73e6e2f872651 --- /dev/null +++ b/evals/truthfulqa/truthfulqa_vi-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_vi": { + "mc1": 0.2445859872611465, + "mc1_stderr": 0.015351480770855935, + "mc2": 0.42975481561967727, + "mc2_stderr": 0.01625176801732652 + } + }, + "versions": { + "truthfulqa_vi": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_zh-bloom-7b1.json b/evals/truthfulqa/truthfulqa_zh-bloom-7b1.json new file mode 100644 index 0000000000000000000000000000000000000000..7496dee8d8893c925eac3f5a5de1723f69d1ad77 --- /dev/null +++ b/evals/truthfulqa/truthfulqa_zh-bloom-7b1.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_zh": { + "mc1": 0.22842639593908629, + "mc1_stderr": 0.014964922033138017, + "mc2": 0.38822244050439564, + "mc2_stderr": 0.014953544130092178 + } + }, + "versions": { + "truthfulqa_zh": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=bigscience/bloom-7b1", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_zh-llama-7B.json b/evals/truthfulqa/truthfulqa_zh-llama-7B.json new file mode 100644 index 0000000000000000000000000000000000000000..eeab4eff270462460733b050ac068062679cc507 --- /dev/null +++ b/evals/truthfulqa/truthfulqa_zh-llama-7B.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_zh": { + "mc1": 0.26649746192893403, + "mc1_stderr": 0.015760136800242356, + "mc2": 0.43598966702035913, + "mc2_stderr": 0.015850355717645676 + } + }, + "versions": { + "truthfulqa_zh": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", + "batch_size": 1, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file